LLVM 20.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
25#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/LLVMContext.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCExpr.h"
36#include <cctype>
37using namespace llvm;
38
39/// NOTE: The TargetMachine owns TLOF.
41 : TargetLoweringBase(tm) {}
42
43const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
44 return nullptr;
45}
46
49}
50
51/// Check whether a given call node is in tail position within its function. If
52/// so, it sets Chain to the input chain of the tail call.
54 SDValue &Chain) const {
56
57 // First, check if tail calls have been disabled in this function.
58 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
59 return false;
60
61 // Conservatively require the attributes of the call to match those of
62 // the return. Ignore following attributes because they don't affect the
63 // call sequence.
64 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
65 for (const auto &Attr :
66 {Attribute::Alignment, Attribute::Dereferenceable,
67 Attribute::DereferenceableOrNull, Attribute::NoAlias,
68 Attribute::NonNull, Attribute::NoUndef, Attribute::Range})
69 CallerAttrs.removeAttribute(Attr);
70
71 if (CallerAttrs.hasAttributes())
72 return false;
73
74 // It's not safe to eliminate the sign / zero extension of the return value.
75 if (CallerAttrs.contains(Attribute::ZExt) ||
76 CallerAttrs.contains(Attribute::SExt))
77 return false;
78
79 // Check if the only use is a function return node.
80 return isUsedByReturnOnly(Node, Chain);
81}
82
84 const uint32_t *CallerPreservedMask,
85 const SmallVectorImpl<CCValAssign> &ArgLocs,
86 const SmallVectorImpl<SDValue> &OutVals) const {
87 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
88 const CCValAssign &ArgLoc = ArgLocs[I];
89 if (!ArgLoc.isRegLoc())
90 continue;
91 MCRegister Reg = ArgLoc.getLocReg();
92 // Only look at callee saved registers.
93 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
94 continue;
95 // Check that we pass the value used for the caller.
96 // (We look for a CopyFromReg reading a virtual register that is used
97 // for the function live-in value of register Reg)
98 SDValue Value = OutVals[I];
99 if (Value->getOpcode() == ISD::AssertZext)
100 Value = Value.getOperand(0);
101 if (Value->getOpcode() != ISD::CopyFromReg)
102 return false;
103 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
104 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
105 return false;
106 }
107 return true;
108}
109
110/// Set CallLoweringInfo attribute flags based on a call instruction
111/// and called function attributes.
113 unsigned ArgIdx) {
114 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
115 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
116 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
117 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
118 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
119 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
120 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
121 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
122 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
123 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
124 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
125 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
126 Alignment = Call->getParamStackAlign(ArgIdx);
127 IndirectType = nullptr;
129 "multiple ABI attributes?");
130 if (IsByVal) {
131 IndirectType = Call->getParamByValType(ArgIdx);
132 if (!Alignment)
133 Alignment = Call->getParamAlign(ArgIdx);
134 }
135 if (IsPreallocated)
136 IndirectType = Call->getParamPreallocatedType(ArgIdx);
137 if (IsInAlloca)
138 IndirectType = Call->getParamInAllocaType(ArgIdx);
139 if (IsSRet)
140 IndirectType = Call->getParamStructRetType(ArgIdx);
141}
142
143/// Generate a libcall taking the given operands as arguments and returning a
144/// result of type RetVT.
145std::pair<SDValue, SDValue>
148 MakeLibCallOptions CallOptions,
149 const SDLoc &dl,
150 SDValue InChain) const {
151 if (!InChain)
152 InChain = DAG.getEntryNode();
153
155 Args.reserve(Ops.size());
156
158 for (unsigned i = 0; i < Ops.size(); ++i) {
159 SDValue NewOp = Ops[i];
160 Entry.Node = NewOp;
161 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
162 Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
163 CallOptions.IsSExt);
164 Entry.IsZExt = !Entry.IsSExt;
165
166 if (CallOptions.IsSoften &&
168 Entry.IsSExt = Entry.IsZExt = false;
169 }
170 Args.push_back(Entry);
171 }
172
173 if (LC == RTLIB::UNKNOWN_LIBCALL)
174 report_fatal_error("Unsupported library call operation!");
177
178 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
180 bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
181 bool zeroExtend = !signExtend;
182
183 if (CallOptions.IsSoften &&
185 signExtend = zeroExtend = false;
186 }
187
188 CLI.setDebugLoc(dl)
189 .setChain(InChain)
190 .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
191 .setNoReturn(CallOptions.DoesNotReturn)
194 .setSExtResult(signExtend)
195 .setZExtResult(zeroExtend);
196 return LowerCallTo(CLI);
197}
198
200 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
201 unsigned SrcAS, const AttributeList &FuncAttributes) const {
202 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
203 Op.getSrcAlign() < Op.getDstAlign())
204 return false;
205
206 EVT VT = getOptimalMemOpType(Op, FuncAttributes);
207
208 if (VT == MVT::Other) {
209 // Use the largest integer type whose alignment constraints are satisfied.
210 // We only need to check DstAlign here as SrcAlign is always greater or
211 // equal to DstAlign (or zero).
212 VT = MVT::LAST_INTEGER_VALUETYPE;
213 if (Op.isFixedDstAlign())
214 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
215 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
217 assert(VT.isInteger());
218
219 // Find the largest legal integer type.
220 MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
221 while (!isTypeLegal(LVT))
222 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
223 assert(LVT.isInteger());
224
225 // If the type we've chosen is larger than the largest legal integer type
226 // then use that instead.
227 if (VT.bitsGT(LVT))
228 VT = LVT;
229 }
230
231 unsigned NumMemOps = 0;
232 uint64_t Size = Op.size();
233 while (Size) {
234 unsigned VTSize = VT.getSizeInBits() / 8;
235 while (VTSize > Size) {
236 // For now, only use non-vector load / store's for the left-over pieces.
237 EVT NewVT = VT;
238 unsigned NewVTSize;
239
240 bool Found = false;
241 if (VT.isVector() || VT.isFloatingPoint()) {
242 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
245 Found = true;
246 else if (NewVT == MVT::i64 &&
248 isSafeMemOpType(MVT::f64)) {
249 // i64 is usually not legal on 32-bit targets, but f64 may be.
250 NewVT = MVT::f64;
251 Found = true;
252 }
253 }
254
255 if (!Found) {
256 do {
257 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
258 if (NewVT == MVT::i8)
259 break;
260 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
261 }
262 NewVTSize = NewVT.getSizeInBits() / 8;
263
264 // If the new VT cannot cover all of the remaining bits, then consider
265 // issuing a (or a pair of) unaligned and overlapping load / store.
266 unsigned Fast;
267 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
269 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
271 Fast)
272 VTSize = Size;
273 else {
274 VT = NewVT;
275 VTSize = NewVTSize;
276 }
277 }
278
279 if (++NumMemOps > Limit)
280 return false;
281
282 MemOps.push_back(VT);
283 Size -= VTSize;
284 }
285
286 return true;
287}
288
289/// Soften the operands of a comparison. This code is shared among BR_CC,
290/// SELECT_CC, and SETCC handlers.
292 SDValue &NewLHS, SDValue &NewRHS,
293 ISD::CondCode &CCCode,
294 const SDLoc &dl, const SDValue OldLHS,
295 const SDValue OldRHS) const {
296 SDValue Chain;
297 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
298 OldRHS, Chain);
299}
300
302 SDValue &NewLHS, SDValue &NewRHS,
303 ISD::CondCode &CCCode,
304 const SDLoc &dl, const SDValue OldLHS,
305 const SDValue OldRHS,
306 SDValue &Chain,
307 bool IsSignaling) const {
308 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
309 // not supporting it. We can update this code when libgcc provides such
310 // functions.
311
312 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
313 && "Unsupported setcc type!");
314
315 // Expand into one or more soft-fp libcall(s).
316 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
317 bool ShouldInvertCC = false;
318 switch (CCCode) {
319 case ISD::SETEQ:
320 case ISD::SETOEQ:
321 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
322 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
323 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
324 break;
325 case ISD::SETNE:
326 case ISD::SETUNE:
327 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
328 (VT == MVT::f64) ? RTLIB::UNE_F64 :
329 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
330 break;
331 case ISD::SETGE:
332 case ISD::SETOGE:
333 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
334 (VT == MVT::f64) ? RTLIB::OGE_F64 :
335 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
336 break;
337 case ISD::SETLT:
338 case ISD::SETOLT:
339 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
340 (VT == MVT::f64) ? RTLIB::OLT_F64 :
341 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
342 break;
343 case ISD::SETLE:
344 case ISD::SETOLE:
345 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
346 (VT == MVT::f64) ? RTLIB::OLE_F64 :
347 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
348 break;
349 case ISD::SETGT:
350 case ISD::SETOGT:
351 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
352 (VT == MVT::f64) ? RTLIB::OGT_F64 :
353 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
354 break;
355 case ISD::SETO:
356 ShouldInvertCC = true;
357 [[fallthrough]];
358 case ISD::SETUO:
359 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
360 (VT == MVT::f64) ? RTLIB::UO_F64 :
361 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
362 break;
363 case ISD::SETONE:
364 // SETONE = O && UNE
365 ShouldInvertCC = true;
366 [[fallthrough]];
367 case ISD::SETUEQ:
368 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
369 (VT == MVT::f64) ? RTLIB::UO_F64 :
370 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
371 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
372 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
373 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
374 break;
375 default:
376 // Invert CC for unordered comparisons
377 ShouldInvertCC = true;
378 switch (CCCode) {
379 case ISD::SETULT:
380 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
381 (VT == MVT::f64) ? RTLIB::OGE_F64 :
382 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
383 break;
384 case ISD::SETULE:
385 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
386 (VT == MVT::f64) ? RTLIB::OGT_F64 :
387 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
388 break;
389 case ISD::SETUGT:
390 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
391 (VT == MVT::f64) ? RTLIB::OLE_F64 :
392 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
393 break;
394 case ISD::SETUGE:
395 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
396 (VT == MVT::f64) ? RTLIB::OLT_F64 :
397 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
398 break;
399 default: llvm_unreachable("Do not know how to soften this setcc!");
400 }
401 }
402
403 // Use the target specific return value for comparison lib calls.
405 SDValue Ops[2] = {NewLHS, NewRHS};
407 EVT OpsVT[2] = { OldLHS.getValueType(),
408 OldRHS.getValueType() };
409 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
410 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
411 NewLHS = Call.first;
412 NewRHS = DAG.getConstant(0, dl, RetVT);
413
414 CCCode = getCmpLibcallCC(LC1);
415 if (ShouldInvertCC) {
416 assert(RetVT.isInteger());
417 CCCode = getSetCCInverse(CCCode, RetVT);
418 }
419
420 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
421 // Update Chain.
422 Chain = Call.second;
423 } else {
424 EVT SetCCVT =
425 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
426 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
427 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
428 CCCode = getCmpLibcallCC(LC2);
429 if (ShouldInvertCC)
430 CCCode = getSetCCInverse(CCCode, RetVT);
431 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
432 if (Chain)
433 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
434 Call2.second);
435 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
436 Tmp.getValueType(), Tmp, NewLHS);
437 NewRHS = SDValue();
438 }
439}
440
441/// Return the entry encoding for a jump table in the current function. The
442/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
444 // In non-pic modes, just use the address of a block.
445 if (!isPositionIndependent())
447
448 // In PIC mode, if the target supports a GPRel32 directive, use it.
449 if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
451
452 // Otherwise, use a label difference.
454}
455
457 SelectionDAG &DAG) const {
458 // If our PIC model is GP relative, use the global offset table as the base.
459 unsigned JTEncoding = getJumpTableEncoding();
460
464
465 return Table;
466}
467
468/// This returns the relocation base for the given PIC jumptable, the same as
469/// getPICJumpTableRelocBase, but as an MCExpr.
470const MCExpr *
472 unsigned JTI,MCContext &Ctx) const{
473 // The normal PIC reloc base is the label at the start of the jump table.
474 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
475}
476
478 SDValue Addr, int JTI,
479 SelectionDAG &DAG) const {
480 SDValue Chain = Value;
481 // Jump table debug info is only needed if CodeView is enabled.
483 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
484 }
485 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
486}
487
488bool
490 const TargetMachine &TM = getTargetMachine();
491 const GlobalValue *GV = GA->getGlobal();
492
493 // If the address is not even local to this DSO we will have to load it from
494 // a got and then add the offset.
495 if (!TM.shouldAssumeDSOLocal(GV))
496 return false;
497
498 // If the code is position independent we will have to add a base register.
499 if (isPositionIndependent())
500 return false;
501
502 // Otherwise we can do it.
503 return true;
504}
505
506//===----------------------------------------------------------------------===//
507// Optimization Methods
508//===----------------------------------------------------------------------===//
509
510/// If the specified instruction has a constant integer operand and there are
511/// bits set in that constant that are not demanded, then clear those bits and
512/// return true.
514 const APInt &DemandedBits,
515 const APInt &DemandedElts,
516 TargetLoweringOpt &TLO) const {
517 SDLoc DL(Op);
518 unsigned Opcode = Op.getOpcode();
519
520 // Early-out if we've ended up calling an undemanded node, leave this to
521 // constant folding.
522 if (DemandedBits.isZero() || DemandedElts.isZero())
523 return false;
524
525 // Do target-specific constant optimization.
526 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
527 return TLO.New.getNode();
528
529 // FIXME: ISD::SELECT, ISD::SELECT_CC
530 switch (Opcode) {
531 default:
532 break;
533 case ISD::XOR:
534 case ISD::AND:
535 case ISD::OR: {
536 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
537 if (!Op1C || Op1C->isOpaque())
538 return false;
539
540 // If this is a 'not' op, don't touch it because that's a canonical form.
541 const APInt &C = Op1C->getAPIntValue();
542 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
543 return false;
544
545 if (!C.isSubsetOf(DemandedBits)) {
546 EVT VT = Op.getValueType();
547 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
548 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
549 Op->getFlags());
550 return TLO.CombineTo(Op, NewOp);
551 }
552
553 break;
554 }
555 }
556
557 return false;
558}
559
561 const APInt &DemandedBits,
562 TargetLoweringOpt &TLO) const {
563 EVT VT = Op.getValueType();
564 APInt DemandedElts = VT.isVector()
566 : APInt(1, 1);
567 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
568}
569
570/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
571/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
572/// but it could be generalized for targets with other types of implicit
573/// widening casts.
575 const APInt &DemandedBits,
576 TargetLoweringOpt &TLO) const {
577 assert(Op.getNumOperands() == 2 &&
578 "ShrinkDemandedOp only supports binary operators!");
579 assert(Op.getNode()->getNumValues() == 1 &&
580 "ShrinkDemandedOp only supports nodes with one result!");
581
582 EVT VT = Op.getValueType();
583 SelectionDAG &DAG = TLO.DAG;
584 SDLoc dl(Op);
585
586 // Early return, as this function cannot handle vector types.
587 if (VT.isVector())
588 return false;
589
590 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
591 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
592 "ShrinkDemandedOp only supports operands that have the same size!");
593
594 // Don't do this if the node has another user, which may require the
595 // full value.
596 if (!Op.getNode()->hasOneUse())
597 return false;
598
599 // Search for the smallest integer type with free casts to and from
600 // Op's type. For expedience, just check power-of-2 integer types.
601 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
602 unsigned DemandedSize = DemandedBits.getActiveBits();
603 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
604 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
605 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
606 if (TLI.isTruncateFree(VT, SmallVT) && TLI.isZExtFree(SmallVT, VT)) {
607 // We found a type with free casts.
608 SDValue X = DAG.getNode(
609 Op.getOpcode(), dl, SmallVT,
610 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
611 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
612 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
613 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
614 return TLO.CombineTo(Op, Z);
615 }
616 }
617 return false;
618}
619
621 DAGCombinerInfo &DCI) const {
622 SelectionDAG &DAG = DCI.DAG;
623 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
624 !DCI.isBeforeLegalizeOps());
625 KnownBits Known;
626
627 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
628 if (Simplified) {
629 DCI.AddToWorklist(Op.getNode());
631 }
632 return Simplified;
633}
634
636 const APInt &DemandedElts,
637 DAGCombinerInfo &DCI) const {
638 SelectionDAG &DAG = DCI.DAG;
639 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
640 !DCI.isBeforeLegalizeOps());
641 KnownBits Known;
642
643 bool Simplified =
644 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
645 if (Simplified) {
646 DCI.AddToWorklist(Op.getNode());
648 }
649 return Simplified;
650}
651
653 KnownBits &Known,
655 unsigned Depth,
656 bool AssumeSingleUse) const {
657 EVT VT = Op.getValueType();
658
659 // Since the number of lanes in a scalable vector is unknown at compile time,
660 // we track one bit which is implicitly broadcast to all lanes. This means
661 // that all lanes in a scalable vector are considered demanded.
662 APInt DemandedElts = VT.isFixedLengthVector()
664 : APInt(1, 1);
665 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
666 AssumeSingleUse);
667}
668
669// TODO: Under what circumstances can we create nodes? Constant folding?
671 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
672 SelectionDAG &DAG, unsigned Depth) const {
673 EVT VT = Op.getValueType();
674
675 // Limit search depth.
677 return SDValue();
678
679 // Ignore UNDEFs.
680 if (Op.isUndef())
681 return SDValue();
682
683 // Not demanding any bits/elts from Op.
684 if (DemandedBits == 0 || DemandedElts == 0)
685 return DAG.getUNDEF(VT);
686
687 bool IsLE = DAG.getDataLayout().isLittleEndian();
688 unsigned NumElts = DemandedElts.getBitWidth();
689 unsigned BitWidth = DemandedBits.getBitWidth();
690 KnownBits LHSKnown, RHSKnown;
691 switch (Op.getOpcode()) {
692 case ISD::BITCAST: {
693 if (VT.isScalableVector())
694 return SDValue();
695
696 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
697 EVT SrcVT = Src.getValueType();
698 EVT DstVT = Op.getValueType();
699 if (SrcVT == DstVT)
700 return Src;
701
702 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
703 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
704 if (NumSrcEltBits == NumDstEltBits)
705 if (SDValue V = SimplifyMultipleUseDemandedBits(
706 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
707 return DAG.getBitcast(DstVT, V);
708
709 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
710 unsigned Scale = NumDstEltBits / NumSrcEltBits;
711 unsigned NumSrcElts = SrcVT.getVectorNumElements();
712 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
713 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
714 for (unsigned i = 0; i != Scale; ++i) {
715 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
716 unsigned BitOffset = EltOffset * NumSrcEltBits;
717 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
718 if (!Sub.isZero()) {
719 DemandedSrcBits |= Sub;
720 for (unsigned j = 0; j != NumElts; ++j)
721 if (DemandedElts[j])
722 DemandedSrcElts.setBit((j * Scale) + i);
723 }
724 }
725
726 if (SDValue V = SimplifyMultipleUseDemandedBits(
727 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
728 return DAG.getBitcast(DstVT, V);
729 }
730
731 // TODO - bigendian once we have test coverage.
732 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
733 unsigned Scale = NumSrcEltBits / NumDstEltBits;
734 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
735 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
736 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
737 for (unsigned i = 0; i != NumElts; ++i)
738 if (DemandedElts[i]) {
739 unsigned Offset = (i % Scale) * NumDstEltBits;
740 DemandedSrcBits.insertBits(DemandedBits, Offset);
741 DemandedSrcElts.setBit(i / Scale);
742 }
743
744 if (SDValue V = SimplifyMultipleUseDemandedBits(
745 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
746 return DAG.getBitcast(DstVT, V);
747 }
748
749 break;
750 }
751 case ISD::FREEZE: {
752 SDValue N0 = Op.getOperand(0);
753 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
754 /*PoisonOnly=*/false))
755 return N0;
756 break;
757 }
758 case ISD::AND: {
759 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
760 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
761
762 // If all of the demanded bits are known 1 on one side, return the other.
763 // These bits cannot contribute to the result of the 'and' in this
764 // context.
765 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
766 return Op.getOperand(0);
767 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
768 return Op.getOperand(1);
769 break;
770 }
771 case ISD::OR: {
772 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
773 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
774
775 // If all of the demanded bits are known zero on one side, return the
776 // other. These bits cannot contribute to the result of the 'or' in this
777 // context.
778 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
779 return Op.getOperand(0);
780 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
781 return Op.getOperand(1);
782 break;
783 }
784 case ISD::XOR: {
785 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
786 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
787
788 // If all of the demanded bits are known zero on one side, return the
789 // other.
790 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
791 return Op.getOperand(0);
792 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
793 return Op.getOperand(1);
794 break;
795 }
796 case ISD::SHL: {
797 // If we are only demanding sign bits then we can use the shift source
798 // directly.
799 if (std::optional<uint64_t> MaxSA =
800 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
801 SDValue Op0 = Op.getOperand(0);
802 unsigned ShAmt = *MaxSA;
803 unsigned NumSignBits =
804 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
805 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
806 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
807 return Op0;
808 }
809 break;
810 }
811 case ISD::SETCC: {
812 SDValue Op0 = Op.getOperand(0);
813 SDValue Op1 = Op.getOperand(1);
814 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
815 // If (1) we only need the sign-bit, (2) the setcc operands are the same
816 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
817 // -1, we may be able to bypass the setcc.
818 if (DemandedBits.isSignMask() &&
822 // If we're testing X < 0, then this compare isn't needed - just use X!
823 // FIXME: We're limiting to integer types here, but this should also work
824 // if we don't care about FP signed-zero. The use of SETLT with FP means
825 // that we don't care about NaNs.
826 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
828 return Op0;
829 }
830 break;
831 }
833 // If none of the extended bits are demanded, eliminate the sextinreg.
834 SDValue Op0 = Op.getOperand(0);
835 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
836 unsigned ExBits = ExVT.getScalarSizeInBits();
837 if (DemandedBits.getActiveBits() <= ExBits &&
839 return Op0;
840 // If the input is already sign extended, just drop the extension.
841 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
842 if (NumSignBits >= (BitWidth - ExBits + 1))
843 return Op0;
844 break;
845 }
849 if (VT.isScalableVector())
850 return SDValue();
851
852 // If we only want the lowest element and none of extended bits, then we can
853 // return the bitcasted source vector.
854 SDValue Src = Op.getOperand(0);
855 EVT SrcVT = Src.getValueType();
856 EVT DstVT = Op.getValueType();
857 if (IsLE && DemandedElts == 1 &&
858 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
859 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
860 return DAG.getBitcast(DstVT, Src);
861 }
862 break;
863 }
865 if (VT.isScalableVector())
866 return SDValue();
867
868 // If we don't demand the inserted element, return the base vector.
869 SDValue Vec = Op.getOperand(0);
870 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
871 EVT VecVT = Vec.getValueType();
872 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
873 !DemandedElts[CIdx->getZExtValue()])
874 return Vec;
875 break;
876 }
878 if (VT.isScalableVector())
879 return SDValue();
880
881 SDValue Vec = Op.getOperand(0);
882 SDValue Sub = Op.getOperand(1);
883 uint64_t Idx = Op.getConstantOperandVal(2);
884 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
885 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
886 // If we don't demand the inserted subvector, return the base vector.
887 if (DemandedSubElts == 0)
888 return Vec;
889 break;
890 }
891 case ISD::VECTOR_SHUFFLE: {
893 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
894
895 // If all the demanded elts are from one operand and are inline,
896 // then we can use the operand directly.
897 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
898 for (unsigned i = 0; i != NumElts; ++i) {
899 int M = ShuffleMask[i];
900 if (M < 0 || !DemandedElts[i])
901 continue;
902 AllUndef = false;
903 IdentityLHS &= (M == (int)i);
904 IdentityRHS &= ((M - NumElts) == i);
905 }
906
907 if (AllUndef)
908 return DAG.getUNDEF(Op.getValueType());
909 if (IdentityLHS)
910 return Op.getOperand(0);
911 if (IdentityRHS)
912 return Op.getOperand(1);
913 break;
914 }
915 default:
916 // TODO: Probably okay to remove after audit; here to reduce change size
917 // in initial enablement patch for scalable vectors
918 if (VT.isScalableVector())
919 return SDValue();
920
921 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
922 if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
923 Op, DemandedBits, DemandedElts, DAG, Depth))
924 return V;
925 break;
926 }
927 return SDValue();
928}
929
932 unsigned Depth) const {
933 EVT VT = Op.getValueType();
934 // Since the number of lanes in a scalable vector is unknown at compile time,
935 // we track one bit which is implicitly broadcast to all lanes. This means
936 // that all lanes in a scalable vector are considered demanded.
937 APInt DemandedElts = VT.isFixedLengthVector()
939 : APInt(1, 1);
940 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
941 Depth);
942}
943
945 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
946 unsigned Depth) const {
947 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
948 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
949 Depth);
950}
951
952// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
953// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
956 const TargetLowering &TLI,
957 const APInt &DemandedBits,
958 const APInt &DemandedElts, unsigned Depth) {
959 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
960 "SRL or SRA node is required here!");
961 // Is the right shift using an immediate value of 1?
962 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
963 if (!N1C || !N1C->isOne())
964 return SDValue();
965
966 // We are looking for an avgfloor
967 // add(ext, ext)
968 // or one of these as a avgceil
969 // add(add(ext, ext), 1)
970 // add(add(ext, 1), ext)
971 // add(ext, add(ext, 1))
972 SDValue Add = Op.getOperand(0);
973 if (Add.getOpcode() != ISD::ADD)
974 return SDValue();
975
976 SDValue ExtOpA = Add.getOperand(0);
977 SDValue ExtOpB = Add.getOperand(1);
978 SDValue Add2;
979 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
980 ConstantSDNode *ConstOp;
981 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
982 ConstOp->isOne()) {
983 ExtOpA = Op1;
984 ExtOpB = Op3;
985 Add2 = A;
986 return true;
987 }
988 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
989 ConstOp->isOne()) {
990 ExtOpA = Op1;
991 ExtOpB = Op2;
992 Add2 = A;
993 return true;
994 }
995 return false;
996 };
997 bool IsCeil =
998 (ExtOpA.getOpcode() == ISD::ADD &&
999 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1000 (ExtOpB.getOpcode() == ISD::ADD &&
1001 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1002
1003 // If the shift is signed (sra):
1004 // - Needs >= 2 sign bit for both operands.
1005 // - Needs >= 2 zero bits.
1006 // If the shift is unsigned (srl):
1007 // - Needs >= 1 zero bit for both operands.
1008 // - Needs 1 demanded bit zero and >= 2 sign bits.
1009 SelectionDAG &DAG = TLO.DAG;
1010 unsigned ShiftOpc = Op.getOpcode();
1011 bool IsSigned = false;
1012 unsigned KnownBits;
1013 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1014 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1015 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1016 unsigned NumZeroA =
1017 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1018 unsigned NumZeroB =
1019 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1020 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1021
1022 switch (ShiftOpc) {
1023 default:
1024 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1025 case ISD::SRA: {
1026 if (NumZero >= 2 && NumSigned < NumZero) {
1027 IsSigned = false;
1028 KnownBits = NumZero;
1029 break;
1030 }
1031 if (NumSigned >= 1) {
1032 IsSigned = true;
1033 KnownBits = NumSigned;
1034 break;
1035 }
1036 return SDValue();
1037 }
1038 case ISD::SRL: {
1039 if (NumZero >= 1 && NumSigned < NumZero) {
1040 IsSigned = false;
1041 KnownBits = NumZero;
1042 break;
1043 }
1044 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1045 IsSigned = true;
1046 KnownBits = NumSigned;
1047 break;
1048 }
1049 return SDValue();
1050 }
1051 }
1052
1053 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1054 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1055
1056 // Find the smallest power-2 type that is legal for this vector size and
1057 // operation, given the original type size and the number of known sign/zero
1058 // bits.
1059 EVT VT = Op.getValueType();
1060 unsigned MinWidth =
1061 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1062 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1064 return SDValue();
1065 if (VT.isVector())
1066 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1067 if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1068 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1069 // larger type size to do the transform.
1070 if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1071 return SDValue();
1072 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1073 Add.getOperand(1)) &&
1074 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1075 Add2.getOperand(1))))
1076 NVT = VT;
1077 else
1078 return SDValue();
1079 }
1080
1081 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1082 // this is likely to stop other folds (reassociation, value tracking etc.)
1083 if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1084 (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1085 return SDValue();
1086
1087 SDLoc DL(Op);
1088 SDValue ResultAVG =
1089 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1090 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1091 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1092}
1093
1094/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1095/// result of Op are ever used downstream. If we can use this information to
1096/// simplify Op, create a new simplified DAG node and return true, returning the
1097/// original and new nodes in Old and New. Otherwise, analyze the expression and
1098/// return a mask of Known bits for the expression (used to simplify the
1099/// caller). The Known bits may only be accurate for those bits in the
1100/// OriginalDemandedBits and OriginalDemandedElts.
1102 SDValue Op, const APInt &OriginalDemandedBits,
1103 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1104 unsigned Depth, bool AssumeSingleUse) const {
1105 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1106 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1107 "Mask size mismatches value type size!");
1108
1109 // Don't know anything.
1110 Known = KnownBits(BitWidth);
1111
1112 EVT VT = Op.getValueType();
1113 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1114 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1115 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1116 "Unexpected vector size");
1117
1118 APInt DemandedBits = OriginalDemandedBits;
1119 APInt DemandedElts = OriginalDemandedElts;
1120 SDLoc dl(Op);
1121
1122 // Undef operand.
1123 if (Op.isUndef())
1124 return false;
1125
1126 // We can't simplify target constants.
1127 if (Op.getOpcode() == ISD::TargetConstant)
1128 return false;
1129
1130 if (Op.getOpcode() == ISD::Constant) {
1131 // We know all of the bits for a constant!
1132 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1133 return false;
1134 }
1135
1136 if (Op.getOpcode() == ISD::ConstantFP) {
1137 // We know all of the bits for a floating point constant!
1139 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1140 return false;
1141 }
1142
1143 // Other users may use these bits.
1144 bool HasMultiUse = false;
1145 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1147 // Limit search depth.
1148 return false;
1149 }
1150 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1152 DemandedElts = APInt::getAllOnes(NumElts);
1153 HasMultiUse = true;
1154 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1155 // Not demanding any bits/elts from Op.
1156 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1157 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1158 // Limit search depth.
1159 return false;
1160 }
1161
1162 KnownBits Known2;
1163 switch (Op.getOpcode()) {
1164 case ISD::SCALAR_TO_VECTOR: {
1165 if (VT.isScalableVector())
1166 return false;
1167 if (!DemandedElts[0])
1168 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1169
1170 KnownBits SrcKnown;
1171 SDValue Src = Op.getOperand(0);
1172 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1173 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1174 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1175 return true;
1176
1177 // Upper elements are undef, so only get the knownbits if we just demand
1178 // the bottom element.
1179 if (DemandedElts == 1)
1180 Known = SrcKnown.anyextOrTrunc(BitWidth);
1181 break;
1182 }
1183 case ISD::BUILD_VECTOR:
1184 // Collect the known bits that are shared by every demanded element.
1185 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1186 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1187 return false; // Don't fall through, will infinitely loop.
1188 case ISD::SPLAT_VECTOR: {
1189 SDValue Scl = Op.getOperand(0);
1190 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1191 KnownBits KnownScl;
1192 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1193 return true;
1194
1195 // Implicitly truncate the bits to match the official semantics of
1196 // SPLAT_VECTOR.
1197 Known = KnownScl.trunc(BitWidth);
1198 break;
1199 }
1200 case ISD::LOAD: {
1201 auto *LD = cast<LoadSDNode>(Op);
1202 if (getTargetConstantFromLoad(LD)) {
1203 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1204 return false; // Don't fall through, will infinitely loop.
1205 }
1206 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1207 // If this is a ZEXTLoad and we are looking at the loaded value.
1208 EVT MemVT = LD->getMemoryVT();
1209 unsigned MemBits = MemVT.getScalarSizeInBits();
1210 Known.Zero.setBitsFrom(MemBits);
1211 return false; // Don't fall through, will infinitely loop.
1212 }
1213 break;
1214 }
1216 if (VT.isScalableVector())
1217 return false;
1218 SDValue Vec = Op.getOperand(0);
1219 SDValue Scl = Op.getOperand(1);
1220 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1221 EVT VecVT = Vec.getValueType();
1222
1223 // If index isn't constant, assume we need all vector elements AND the
1224 // inserted element.
1225 APInt DemandedVecElts(DemandedElts);
1226 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1227 unsigned Idx = CIdx->getZExtValue();
1228 DemandedVecElts.clearBit(Idx);
1229
1230 // Inserted element is not required.
1231 if (!DemandedElts[Idx])
1232 return TLO.CombineTo(Op, Vec);
1233 }
1234
1235 KnownBits KnownScl;
1236 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1237 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1238 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1239 return true;
1240
1241 Known = KnownScl.anyextOrTrunc(BitWidth);
1242
1243 KnownBits KnownVec;
1244 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1245 Depth + 1))
1246 return true;
1247
1248 if (!!DemandedVecElts)
1249 Known = Known.intersectWith(KnownVec);
1250
1251 return false;
1252 }
1253 case ISD::INSERT_SUBVECTOR: {
1254 if (VT.isScalableVector())
1255 return false;
1256 // Demand any elements from the subvector and the remainder from the src its
1257 // inserted into.
1258 SDValue Src = Op.getOperand(0);
1259 SDValue Sub = Op.getOperand(1);
1260 uint64_t Idx = Op.getConstantOperandVal(2);
1261 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1262 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1263 APInt DemandedSrcElts = DemandedElts;
1264 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1265
1266 KnownBits KnownSub, KnownSrc;
1267 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1268 Depth + 1))
1269 return true;
1270 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1271 Depth + 1))
1272 return true;
1273
1274 Known.Zero.setAllBits();
1275 Known.One.setAllBits();
1276 if (!!DemandedSubElts)
1277 Known = Known.intersectWith(KnownSub);
1278 if (!!DemandedSrcElts)
1279 Known = Known.intersectWith(KnownSrc);
1280
1281 // Attempt to avoid multi-use src if we don't need anything from it.
1282 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1283 !DemandedSrcElts.isAllOnes()) {
1284 SDValue NewSub = SimplifyMultipleUseDemandedBits(
1285 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1286 SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1287 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1288 if (NewSub || NewSrc) {
1289 NewSub = NewSub ? NewSub : Sub;
1290 NewSrc = NewSrc ? NewSrc : Src;
1291 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1292 Op.getOperand(2));
1293 return TLO.CombineTo(Op, NewOp);
1294 }
1295 }
1296 break;
1297 }
1299 if (VT.isScalableVector())
1300 return false;
1301 // Offset the demanded elts by the subvector index.
1302 SDValue Src = Op.getOperand(0);
1303 if (Src.getValueType().isScalableVector())
1304 break;
1305 uint64_t Idx = Op.getConstantOperandVal(1);
1306 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1307 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1308
1309 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1310 Depth + 1))
1311 return true;
1312
1313 // Attempt to avoid multi-use src if we don't need anything from it.
1314 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1315 SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1316 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1317 if (DemandedSrc) {
1318 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1319 Op.getOperand(1));
1320 return TLO.CombineTo(Op, NewOp);
1321 }
1322 }
1323 break;
1324 }
1325 case ISD::CONCAT_VECTORS: {
1326 if (VT.isScalableVector())
1327 return false;
1328 Known.Zero.setAllBits();
1329 Known.One.setAllBits();
1330 EVT SubVT = Op.getOperand(0).getValueType();
1331 unsigned NumSubVecs = Op.getNumOperands();
1332 unsigned NumSubElts = SubVT.getVectorNumElements();
1333 for (unsigned i = 0; i != NumSubVecs; ++i) {
1334 APInt DemandedSubElts =
1335 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1336 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1337 Known2, TLO, Depth + 1))
1338 return true;
1339 // Known bits are shared by every demanded subvector element.
1340 if (!!DemandedSubElts)
1341 Known = Known.intersectWith(Known2);
1342 }
1343 break;
1344 }
1345 case ISD::VECTOR_SHUFFLE: {
1346 assert(!VT.isScalableVector());
1347 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1348
1349 // Collect demanded elements from shuffle operands..
1350 APInt DemandedLHS, DemandedRHS;
1351 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1352 DemandedRHS))
1353 break;
1354
1355 if (!!DemandedLHS || !!DemandedRHS) {
1356 SDValue Op0 = Op.getOperand(0);
1357 SDValue Op1 = Op.getOperand(1);
1358
1359 Known.Zero.setAllBits();
1360 Known.One.setAllBits();
1361 if (!!DemandedLHS) {
1362 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1363 Depth + 1))
1364 return true;
1365 Known = Known.intersectWith(Known2);
1366 }
1367 if (!!DemandedRHS) {
1368 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1369 Depth + 1))
1370 return true;
1371 Known = Known.intersectWith(Known2);
1372 }
1373
1374 // Attempt to avoid multi-use ops if we don't need anything from them.
1375 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1376 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1377 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1378 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1379 if (DemandedOp0 || DemandedOp1) {
1380 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1381 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1382 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1383 return TLO.CombineTo(Op, NewOp);
1384 }
1385 }
1386 break;
1387 }
1388 case ISD::AND: {
1389 SDValue Op0 = Op.getOperand(0);
1390 SDValue Op1 = Op.getOperand(1);
1391
1392 // If the RHS is a constant, check to see if the LHS would be zero without
1393 // using the bits from the RHS. Below, we use knowledge about the RHS to
1394 // simplify the LHS, here we're using information from the LHS to simplify
1395 // the RHS.
1396 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1397 // Do not increment Depth here; that can cause an infinite loop.
1398 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1399 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1400 if ((LHSKnown.Zero & DemandedBits) ==
1401 (~RHSC->getAPIntValue() & DemandedBits))
1402 return TLO.CombineTo(Op, Op0);
1403
1404 // If any of the set bits in the RHS are known zero on the LHS, shrink
1405 // the constant.
1406 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1407 DemandedElts, TLO))
1408 return true;
1409
1410 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1411 // constant, but if this 'and' is only clearing bits that were just set by
1412 // the xor, then this 'and' can be eliminated by shrinking the mask of
1413 // the xor. For example, for a 32-bit X:
1414 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1415 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1416 LHSKnown.One == ~RHSC->getAPIntValue()) {
1417 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1418 return TLO.CombineTo(Op, Xor);
1419 }
1420 }
1421
1422 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1423 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1424 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1425 (Op0.getOperand(0).isUndef() ||
1427 Op0->hasOneUse()) {
1428 unsigned NumSubElts =
1430 unsigned SubIdx = Op0.getConstantOperandVal(2);
1431 APInt DemandedSub =
1432 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1433 KnownBits KnownSubMask =
1434 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1435 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1436 SDValue NewAnd =
1437 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1438 SDValue NewInsert =
1439 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1440 Op0.getOperand(1), Op0.getOperand(2));
1441 return TLO.CombineTo(Op, NewInsert);
1442 }
1443 }
1444
1445 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1446 Depth + 1))
1447 return true;
1448 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1449 Known2, TLO, Depth + 1))
1450 return true;
1451
1452 // If all of the demanded bits are known one on one side, return the other.
1453 // These bits cannot contribute to the result of the 'and'.
1454 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1455 return TLO.CombineTo(Op, Op0);
1456 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1457 return TLO.CombineTo(Op, Op1);
1458 // If all of the demanded bits in the inputs are known zeros, return zero.
1459 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1460 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1461 // If the RHS is a constant, see if we can simplify it.
1462 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1463 TLO))
1464 return true;
1465 // If the operation can be done in a smaller type, do so.
1466 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1467 return true;
1468
1469 // Attempt to avoid multi-use ops if we don't need anything from them.
1470 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1471 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1472 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1473 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1474 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1475 if (DemandedOp0 || DemandedOp1) {
1476 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1477 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1478 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1479 return TLO.CombineTo(Op, NewOp);
1480 }
1481 }
1482
1483 Known &= Known2;
1484 break;
1485 }
1486 case ISD::OR: {
1487 SDValue Op0 = Op.getOperand(0);
1488 SDValue Op1 = Op.getOperand(1);
1489 SDNodeFlags Flags = Op.getNode()->getFlags();
1490 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1491 Depth + 1)) {
1492 if (Flags.hasDisjoint()) {
1493 Flags.setDisjoint(false);
1494 Op->setFlags(Flags);
1495 }
1496 return true;
1497 }
1498
1499 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1500 Known2, TLO, Depth + 1)) {
1501 if (Flags.hasDisjoint()) {
1502 Flags.setDisjoint(false);
1503 Op->setFlags(Flags);
1504 }
1505 return true;
1506 }
1507
1508 // If all of the demanded bits are known zero on one side, return the other.
1509 // These bits cannot contribute to the result of the 'or'.
1510 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1511 return TLO.CombineTo(Op, Op0);
1512 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1513 return TLO.CombineTo(Op, Op1);
1514 // If the RHS is a constant, see if we can simplify it.
1515 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1516 return true;
1517 // If the operation can be done in a smaller type, do so.
1518 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1519 return true;
1520
1521 // Attempt to avoid multi-use ops if we don't need anything from them.
1522 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1523 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1524 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1525 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1526 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1527 if (DemandedOp0 || DemandedOp1) {
1528 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1529 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1530 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1531 return TLO.CombineTo(Op, NewOp);
1532 }
1533 }
1534
1535 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1536 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1537 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1538 Op0->hasOneUse() && Op1->hasOneUse()) {
1539 // Attempt to match all commutations - m_c_Or would've been useful!
1540 for (int I = 0; I != 2; ++I) {
1541 SDValue X = Op.getOperand(I).getOperand(0);
1542 SDValue C1 = Op.getOperand(I).getOperand(1);
1543 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1544 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1545 if (Alt.getOpcode() == ISD::OR) {
1546 for (int J = 0; J != 2; ++J) {
1547 if (X == Alt.getOperand(J)) {
1548 SDValue Y = Alt.getOperand(1 - J);
1549 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1550 {C1, C2})) {
1551 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1552 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1553 return TLO.CombineTo(
1554 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1555 }
1556 }
1557 }
1558 }
1559 }
1560 }
1561
1562 Known |= Known2;
1563 break;
1564 }
1565 case ISD::XOR: {
1566 SDValue Op0 = Op.getOperand(0);
1567 SDValue Op1 = Op.getOperand(1);
1568
1569 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1570 Depth + 1))
1571 return true;
1572 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1573 Depth + 1))
1574 return true;
1575
1576 // If all of the demanded bits are known zero on one side, return the other.
1577 // These bits cannot contribute to the result of the 'xor'.
1578 if (DemandedBits.isSubsetOf(Known.Zero))
1579 return TLO.CombineTo(Op, Op0);
1580 if (DemandedBits.isSubsetOf(Known2.Zero))
1581 return TLO.CombineTo(Op, Op1);
1582 // If the operation can be done in a smaller type, do so.
1583 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1584 return true;
1585
1586 // If all of the unknown bits are known to be zero on one side or the other
1587 // turn this into an *inclusive* or.
1588 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1589 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1590 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1591
1592 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1593 if (C) {
1594 // If one side is a constant, and all of the set bits in the constant are
1595 // also known set on the other side, turn this into an AND, as we know
1596 // the bits will be cleared.
1597 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1598 // NB: it is okay if more bits are known than are requested
1599 if (C->getAPIntValue() == Known2.One) {
1600 SDValue ANDC =
1601 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1602 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1603 }
1604
1605 // If the RHS is a constant, see if we can change it. Don't alter a -1
1606 // constant because that's a 'not' op, and that is better for combining
1607 // and codegen.
1608 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1609 // We're flipping all demanded bits. Flip the undemanded bits too.
1610 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1611 return TLO.CombineTo(Op, New);
1612 }
1613
1614 unsigned Op0Opcode = Op0.getOpcode();
1615 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1616 if (ConstantSDNode *ShiftC =
1617 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1618 // Don't crash on an oversized shift. We can not guarantee that a
1619 // bogus shift has been simplified to undef.
1620 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1621 uint64_t ShiftAmt = ShiftC->getZExtValue();
1623 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1624 : Ones.lshr(ShiftAmt);
1625 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
1626 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1627 TLI.isDesirableToCommuteXorWithShift(Op.getNode())) {
1628 // If the xor constant is a demanded mask, do a 'not' before the
1629 // shift:
1630 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1631 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1632 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1633 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1634 Op0.getOperand(1)));
1635 }
1636 }
1637 }
1638 }
1639 }
1640
1641 // If we can't turn this into a 'not', try to shrink the constant.
1642 if (!C || !C->isAllOnes())
1643 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1644 return true;
1645
1646 // Attempt to avoid multi-use ops if we don't need anything from them.
1647 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1648 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1649 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1650 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1651 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1652 if (DemandedOp0 || DemandedOp1) {
1653 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1654 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1655 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1656 return TLO.CombineTo(Op, NewOp);
1657 }
1658 }
1659
1660 Known ^= Known2;
1661 break;
1662 }
1663 case ISD::SELECT:
1664 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1665 Known, TLO, Depth + 1))
1666 return true;
1667 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1668 Known2, TLO, Depth + 1))
1669 return true;
1670
1671 // If the operands are constants, see if we can simplify them.
1672 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1673 return true;
1674
1675 // Only known if known in both the LHS and RHS.
1676 Known = Known.intersectWith(Known2);
1677 break;
1678 case ISD::VSELECT:
1679 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1680 Known, TLO, Depth + 1))
1681 return true;
1682 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1683 Known2, TLO, Depth + 1))
1684 return true;
1685
1686 // Only known if known in both the LHS and RHS.
1687 Known = Known.intersectWith(Known2);
1688 break;
1689 case ISD::SELECT_CC:
1690 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1691 Known, TLO, Depth + 1))
1692 return true;
1693 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1694 Known2, TLO, Depth + 1))
1695 return true;
1696
1697 // If the operands are constants, see if we can simplify them.
1698 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1699 return true;
1700
1701 // Only known if known in both the LHS and RHS.
1702 Known = Known.intersectWith(Known2);
1703 break;
1704 case ISD::SETCC: {
1705 SDValue Op0 = Op.getOperand(0);
1706 SDValue Op1 = Op.getOperand(1);
1707 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1708 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1709 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1710 // -1, we may be able to bypass the setcc.
1711 if (DemandedBits.isSignMask() &&
1715 // If we're testing X < 0, then this compare isn't needed - just use X!
1716 // FIXME: We're limiting to integer types here, but this should also work
1717 // if we don't care about FP signed-zero. The use of SETLT with FP means
1718 // that we don't care about NaNs.
1719 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1721 return TLO.CombineTo(Op, Op0);
1722
1723 // TODO: Should we check for other forms of sign-bit comparisons?
1724 // Examples: X <= -1, X >= 0
1725 }
1726 if (getBooleanContents(Op0.getValueType()) ==
1728 BitWidth > 1)
1729 Known.Zero.setBitsFrom(1);
1730 break;
1731 }
1732 case ISD::SHL: {
1733 SDValue Op0 = Op.getOperand(0);
1734 SDValue Op1 = Op.getOperand(1);
1735 EVT ShiftVT = Op1.getValueType();
1736
1737 if (std::optional<uint64_t> KnownSA =
1738 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1739 unsigned ShAmt = *KnownSA;
1740 if (ShAmt == 0)
1741 return TLO.CombineTo(Op, Op0);
1742
1743 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1744 // single shift. We can do this if the bottom bits (which are shifted
1745 // out) are never demanded.
1746 // TODO - support non-uniform vector amounts.
1747 if (Op0.getOpcode() == ISD::SRL) {
1748 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1749 if (std::optional<uint64_t> InnerSA =
1750 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1751 unsigned C1 = *InnerSA;
1752 unsigned Opc = ISD::SHL;
1753 int Diff = ShAmt - C1;
1754 if (Diff < 0) {
1755 Diff = -Diff;
1756 Opc = ISD::SRL;
1757 }
1758 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1759 return TLO.CombineTo(
1760 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1761 }
1762 }
1763 }
1764
1765 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1766 // are not demanded. This will likely allow the anyext to be folded away.
1767 // TODO - support non-uniform vector amounts.
1768 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1769 SDValue InnerOp = Op0.getOperand(0);
1770 EVT InnerVT = InnerOp.getValueType();
1771 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1772 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1773 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1774 SDValue NarrowShl = TLO.DAG.getNode(
1775 ISD::SHL, dl, InnerVT, InnerOp,
1776 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1777 return TLO.CombineTo(
1778 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1779 }
1780
1781 // Repeat the SHL optimization above in cases where an extension
1782 // intervenes: (shl (anyext (shr x, c1)), c2) to
1783 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1784 // aren't demanded (as above) and that the shifted upper c1 bits of
1785 // x aren't demanded.
1786 // TODO - support non-uniform vector amounts.
1787 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1788 InnerOp.hasOneUse()) {
1789 if (std::optional<uint64_t> SA2 = TLO.DAG.getValidShiftAmount(
1790 InnerOp, DemandedElts, Depth + 2)) {
1791 unsigned InnerShAmt = *SA2;
1792 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1793 DemandedBits.getActiveBits() <=
1794 (InnerBits - InnerShAmt + ShAmt) &&
1795 DemandedBits.countr_zero() >= ShAmt) {
1796 SDValue NewSA =
1797 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1798 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1799 InnerOp.getOperand(0));
1800 return TLO.CombineTo(
1801 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1802 }
1803 }
1804 }
1805 }
1806
1807 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1808 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1809 Depth + 1)) {
1810 SDNodeFlags Flags = Op.getNode()->getFlags();
1811 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1812 // Disable the nsw and nuw flags. We can no longer guarantee that we
1813 // won't wrap after simplification.
1814 Flags.setNoSignedWrap(false);
1815 Flags.setNoUnsignedWrap(false);
1816 Op->setFlags(Flags);
1817 }
1818 return true;
1819 }
1820 Known.Zero <<= ShAmt;
1821 Known.One <<= ShAmt;
1822 // low bits known zero.
1823 Known.Zero.setLowBits(ShAmt);
1824
1825 // Attempt to avoid multi-use ops if we don't need anything from them.
1826 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1827 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1828 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1829 if (DemandedOp0) {
1830 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1831 return TLO.CombineTo(Op, NewOp);
1832 }
1833 }
1834
1835 // TODO: Can we merge this fold with the one below?
1836 // Try shrinking the operation as long as the shift amount will still be
1837 // in range.
1838 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1839 Op.getNode()->hasOneUse()) {
1840 // Search for the smallest integer type with free casts to and from
1841 // Op's type. For expedience, just check power-of-2 integer types.
1842 unsigned DemandedSize = DemandedBits.getActiveBits();
1843 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1844 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1845 EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1846 if (isNarrowingProfitable(VT, SmallVT) &&
1847 isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1848 isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1849 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1850 assert(DemandedSize <= SmallVTBits &&
1851 "Narrowed below demanded bits?");
1852 // We found a type with free casts.
1853 SDValue NarrowShl = TLO.DAG.getNode(
1854 ISD::SHL, dl, SmallVT,
1855 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1856 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1857 return TLO.CombineTo(
1858 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1859 }
1860 }
1861 }
1862
1863 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1864 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1865 // Only do this if we demand the upper half so the knownbits are correct.
1866 unsigned HalfWidth = BitWidth / 2;
1867 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1868 DemandedBits.countLeadingOnes() >= HalfWidth) {
1869 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1870 if (isNarrowingProfitable(VT, HalfVT) &&
1871 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1872 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1873 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1874 // If we're demanding the upper bits at all, we must ensure
1875 // that the upper bits of the shift result are known to be zero,
1876 // which is equivalent to the narrow shift being NUW.
1877 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1878 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1879 SDNodeFlags Flags;
1880 Flags.setNoSignedWrap(IsNSW);
1881 Flags.setNoUnsignedWrap(IsNUW);
1882 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1883 SDValue NewShiftAmt =
1884 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1885 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1886 NewShiftAmt, Flags);
1887 SDValue NewExt =
1888 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1889 return TLO.CombineTo(Op, NewExt);
1890 }
1891 }
1892 }
1893 } else {
1894 // This is a variable shift, so we can't shift the demand mask by a known
1895 // amount. But if we are not demanding high bits, then we are not
1896 // demanding those bits from the pre-shifted operand either.
1897 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1898 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1899 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1900 Depth + 1)) {
1901 SDNodeFlags Flags = Op.getNode()->getFlags();
1902 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1903 // Disable the nsw and nuw flags. We can no longer guarantee that we
1904 // won't wrap after simplification.
1905 Flags.setNoSignedWrap(false);
1906 Flags.setNoUnsignedWrap(false);
1907 Op->setFlags(Flags);
1908 }
1909 return true;
1910 }
1911 Known.resetAll();
1912 }
1913 }
1914
1915 // If we are only demanding sign bits then we can use the shift source
1916 // directly.
1917 if (std::optional<uint64_t> MaxSA =
1918 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1919 unsigned ShAmt = *MaxSA;
1920 unsigned NumSignBits =
1921 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1922 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1923 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1924 return TLO.CombineTo(Op, Op0);
1925 }
1926 break;
1927 }
1928 case ISD::SRL: {
1929 SDValue Op0 = Op.getOperand(0);
1930 SDValue Op1 = Op.getOperand(1);
1931 EVT ShiftVT = Op1.getValueType();
1932
1933 if (std::optional<uint64_t> KnownSA =
1934 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1935 unsigned ShAmt = *KnownSA;
1936 if (ShAmt == 0)
1937 return TLO.CombineTo(Op, Op0);
1938
1939 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1940 // single shift. We can do this if the top bits (which are shifted out)
1941 // are never demanded.
1942 // TODO - support non-uniform vector amounts.
1943 if (Op0.getOpcode() == ISD::SHL) {
1944 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1945 if (std::optional<uint64_t> InnerSA =
1946 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1947 unsigned C1 = *InnerSA;
1948 unsigned Opc = ISD::SRL;
1949 int Diff = ShAmt - C1;
1950 if (Diff < 0) {
1951 Diff = -Diff;
1952 Opc = ISD::SHL;
1953 }
1954 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1955 return TLO.CombineTo(
1956 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1957 }
1958 }
1959 }
1960
1961 APInt InDemandedMask = (DemandedBits << ShAmt);
1962
1963 // If the shift is exact, then it does demand the low bits (and knows that
1964 // they are zero).
1965 if (Op->getFlags().hasExact())
1966 InDemandedMask.setLowBits(ShAmt);
1967
1968 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1969 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
1970 if ((BitWidth % 2) == 0 && !VT.isVector()) {
1972 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
1973 if (isNarrowingProfitable(VT, HalfVT) &&
1974 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
1975 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1976 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
1977 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
1978 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
1979 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1980 SDValue NewShiftAmt =
1981 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1982 SDValue NewShift =
1983 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
1984 return TLO.CombineTo(
1985 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
1986 }
1987 }
1988
1989 // Compute the new bits that are at the top now.
1990 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1991 Depth + 1))
1992 return true;
1993 Known.Zero.lshrInPlace(ShAmt);
1994 Known.One.lshrInPlace(ShAmt);
1995 // High bits known zero.
1996 Known.Zero.setHighBits(ShAmt);
1997
1998 // Attempt to avoid multi-use ops if we don't need anything from them.
1999 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2000 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2001 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2002 if (DemandedOp0) {
2003 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2004 return TLO.CombineTo(Op, NewOp);
2005 }
2006 }
2007 } else {
2008 // Use generic knownbits computation as it has support for non-uniform
2009 // shift amounts.
2010 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2011 }
2012
2013 // Try to match AVG patterns (after shift simplification).
2014 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2015 DemandedElts, Depth + 1))
2016 return TLO.CombineTo(Op, AVG);
2017
2018 break;
2019 }
2020 case ISD::SRA: {
2021 SDValue Op0 = Op.getOperand(0);
2022 SDValue Op1 = Op.getOperand(1);
2023 EVT ShiftVT = Op1.getValueType();
2024
2025 // If we only want bits that already match the signbit then we don't need
2026 // to shift.
2027 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2028 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2029 NumHiDemandedBits)
2030 return TLO.CombineTo(Op, Op0);
2031
2032 // If this is an arithmetic shift right and only the low-bit is set, we can
2033 // always convert this into a logical shr, even if the shift amount is
2034 // variable. The low bit of the shift cannot be an input sign bit unless
2035 // the shift amount is >= the size of the datatype, which is undefined.
2036 if (DemandedBits.isOne())
2037 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2038
2039 if (std::optional<uint64_t> KnownSA =
2040 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2041 unsigned ShAmt = *KnownSA;
2042 if (ShAmt == 0)
2043 return TLO.CombineTo(Op, Op0);
2044
2045 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2046 // supports sext_inreg.
2047 if (Op0.getOpcode() == ISD::SHL) {
2048 if (std::optional<uint64_t> InnerSA =
2049 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2050 unsigned LowBits = BitWidth - ShAmt;
2051 EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
2052 if (VT.isVector())
2053 ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
2055
2056 if (*InnerSA == ShAmt) {
2057 if (!TLO.LegalOperations() ||
2059 return TLO.CombineTo(
2060 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2061 Op0.getOperand(0),
2062 TLO.DAG.getValueType(ExtVT)));
2063
2064 // Even if we can't convert to sext_inreg, we might be able to
2065 // remove this shift pair if the input is already sign extended.
2066 unsigned NumSignBits =
2067 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2068 if (NumSignBits > ShAmt)
2069 return TLO.CombineTo(Op, Op0.getOperand(0));
2070 }
2071 }
2072 }
2073
2074 APInt InDemandedMask = (DemandedBits << ShAmt);
2075
2076 // If the shift is exact, then it does demand the low bits (and knows that
2077 // they are zero).
2078 if (Op->getFlags().hasExact())
2079 InDemandedMask.setLowBits(ShAmt);
2080
2081 // If any of the demanded bits are produced by the sign extension, we also
2082 // demand the input sign bit.
2083 if (DemandedBits.countl_zero() < ShAmt)
2084 InDemandedMask.setSignBit();
2085
2086 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2087 Depth + 1))
2088 return true;
2089 Known.Zero.lshrInPlace(ShAmt);
2090 Known.One.lshrInPlace(ShAmt);
2091
2092 // If the input sign bit is known to be zero, or if none of the top bits
2093 // are demanded, turn this into an unsigned shift right.
2094 if (Known.Zero[BitWidth - ShAmt - 1] ||
2095 DemandedBits.countl_zero() >= ShAmt) {
2096 SDNodeFlags Flags;
2097 Flags.setExact(Op->getFlags().hasExact());
2098 return TLO.CombineTo(
2099 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2100 }
2101
2102 int Log2 = DemandedBits.exactLogBase2();
2103 if (Log2 >= 0) {
2104 // The bit must come from the sign.
2105 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2106 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2107 }
2108
2109 if (Known.One[BitWidth - ShAmt - 1])
2110 // New bits are known one.
2111 Known.One.setHighBits(ShAmt);
2112
2113 // Attempt to avoid multi-use ops if we don't need anything from them.
2114 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2115 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2116 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2117 if (DemandedOp0) {
2118 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2119 return TLO.CombineTo(Op, NewOp);
2120 }
2121 }
2122 }
2123
2124 // Try to match AVG patterns (after shift simplification).
2125 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2126 DemandedElts, Depth + 1))
2127 return TLO.CombineTo(Op, AVG);
2128
2129 break;
2130 }
2131 case ISD::FSHL:
2132 case ISD::FSHR: {
2133 SDValue Op0 = Op.getOperand(0);
2134 SDValue Op1 = Op.getOperand(1);
2135 SDValue Op2 = Op.getOperand(2);
2136 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2137
2138 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2139 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2140
2141 // For fshl, 0-shift returns the 1st arg.
2142 // For fshr, 0-shift returns the 2nd arg.
2143 if (Amt == 0) {
2144 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2145 Known, TLO, Depth + 1))
2146 return true;
2147 break;
2148 }
2149
2150 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2151 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2152 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2153 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2154 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2155 Depth + 1))
2156 return true;
2157 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2158 Depth + 1))
2159 return true;
2160
2161 Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2162 Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2163 Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2164 Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2165 Known = Known.unionWith(Known2);
2166
2167 // Attempt to avoid multi-use ops if we don't need anything from them.
2168 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2169 !DemandedElts.isAllOnes()) {
2170 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2171 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2172 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2173 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2174 if (DemandedOp0 || DemandedOp1) {
2175 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2176 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2177 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2178 DemandedOp1, Op2);
2179 return TLO.CombineTo(Op, NewOp);
2180 }
2181 }
2182 }
2183
2184 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2185 if (isPowerOf2_32(BitWidth)) {
2186 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2187 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2188 Known2, TLO, Depth + 1))
2189 return true;
2190 }
2191 break;
2192 }
2193 case ISD::ROTL:
2194 case ISD::ROTR: {
2195 SDValue Op0 = Op.getOperand(0);
2196 SDValue Op1 = Op.getOperand(1);
2197 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2198
2199 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2200 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2201 return TLO.CombineTo(Op, Op0);
2202
2203 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2204 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2205 unsigned RevAmt = BitWidth - Amt;
2206
2207 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2208 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2209 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2210 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2211 Depth + 1))
2212 return true;
2213
2214 // rot*(x, 0) --> x
2215 if (Amt == 0)
2216 return TLO.CombineTo(Op, Op0);
2217
2218 // See if we don't demand either half of the rotated bits.
2219 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2220 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2221 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2222 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2223 }
2224 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2225 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2226 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2227 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2228 }
2229 }
2230
2231 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2232 if (isPowerOf2_32(BitWidth)) {
2233 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2234 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2235 Depth + 1))
2236 return true;
2237 }
2238 break;
2239 }
2240 case ISD::SMIN:
2241 case ISD::SMAX:
2242 case ISD::UMIN:
2243 case ISD::UMAX: {
2244 unsigned Opc = Op.getOpcode();
2245 SDValue Op0 = Op.getOperand(0);
2246 SDValue Op1 = Op.getOperand(1);
2247
2248 // If we're only demanding signbits, then we can simplify to OR/AND node.
2249 unsigned BitOp =
2250 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2251 unsigned NumSignBits =
2252 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2253 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2254 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2255 if (NumSignBits >= NumDemandedUpperBits)
2256 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2257
2258 // Check if one arg is always less/greater than (or equal) to the other arg.
2259 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2260 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2261 switch (Opc) {
2262 case ISD::SMIN:
2263 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2264 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2265 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2266 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2267 Known = KnownBits::smin(Known0, Known1);
2268 break;
2269 case ISD::SMAX:
2270 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2271 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2272 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2273 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2274 Known = KnownBits::smax(Known0, Known1);
2275 break;
2276 case ISD::UMIN:
2277 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2278 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2279 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2280 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2281 Known = KnownBits::umin(Known0, Known1);
2282 break;
2283 case ISD::UMAX:
2284 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2285 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2286 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2287 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2288 Known = KnownBits::umax(Known0, Known1);
2289 break;
2290 }
2291 break;
2292 }
2293 case ISD::BITREVERSE: {
2294 SDValue Src = Op.getOperand(0);
2295 APInt DemandedSrcBits = DemandedBits.reverseBits();
2296 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2297 Depth + 1))
2298 return true;
2299 Known.One = Known2.One.reverseBits();
2300 Known.Zero = Known2.Zero.reverseBits();
2301 break;
2302 }
2303 case ISD::BSWAP: {
2304 SDValue Src = Op.getOperand(0);
2305
2306 // If the only bits demanded come from one byte of the bswap result,
2307 // just shift the input byte into position to eliminate the bswap.
2308 unsigned NLZ = DemandedBits.countl_zero();
2309 unsigned NTZ = DemandedBits.countr_zero();
2310
2311 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2312 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2313 // have 14 leading zeros, round to 8.
2314 NLZ = alignDown(NLZ, 8);
2315 NTZ = alignDown(NTZ, 8);
2316 // If we need exactly one byte, we can do this transformation.
2317 if (BitWidth - NLZ - NTZ == 8) {
2318 // Replace this with either a left or right shift to get the byte into
2319 // the right place.
2320 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2321 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2322 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2323 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2324 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2325 return TLO.CombineTo(Op, NewOp);
2326 }
2327 }
2328
2329 APInt DemandedSrcBits = DemandedBits.byteSwap();
2330 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2331 Depth + 1))
2332 return true;
2333 Known.One = Known2.One.byteSwap();
2334 Known.Zero = Known2.Zero.byteSwap();
2335 break;
2336 }
2337 case ISD::CTPOP: {
2338 // If only 1 bit is demanded, replace with PARITY as long as we're before
2339 // op legalization.
2340 // FIXME: Limit to scalars for now.
2341 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2342 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2343 Op.getOperand(0)));
2344
2345 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2346 break;
2347 }
2349 SDValue Op0 = Op.getOperand(0);
2350 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2351 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2352
2353 // If we only care about the highest bit, don't bother shifting right.
2354 if (DemandedBits.isSignMask()) {
2355 unsigned MinSignedBits =
2356 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2357 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2358 // However if the input is already sign extended we expect the sign
2359 // extension to be dropped altogether later and do not simplify.
2360 if (!AlreadySignExtended) {
2361 // Compute the correct shift amount type, which must be getShiftAmountTy
2362 // for scalar types after legalization.
2363 SDValue ShiftAmt =
2364 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2365 return TLO.CombineTo(Op,
2366 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2367 }
2368 }
2369
2370 // If none of the extended bits are demanded, eliminate the sextinreg.
2371 if (DemandedBits.getActiveBits() <= ExVTBits)
2372 return TLO.CombineTo(Op, Op0);
2373
2374 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2375
2376 // Since the sign extended bits are demanded, we know that the sign
2377 // bit is demanded.
2378 InputDemandedBits.setBit(ExVTBits - 1);
2379
2380 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2381 Depth + 1))
2382 return true;
2383
2384 // If the sign bit of the input is known set or clear, then we know the
2385 // top bits of the result.
2386
2387 // If the input sign bit is known zero, convert this into a zero extension.
2388 if (Known.Zero[ExVTBits - 1])
2389 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2390
2391 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2392 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2393 Known.One.setBitsFrom(ExVTBits);
2394 Known.Zero &= Mask;
2395 } else { // Input sign bit unknown
2396 Known.Zero &= Mask;
2397 Known.One &= Mask;
2398 }
2399 break;
2400 }
2401 case ISD::BUILD_PAIR: {
2402 EVT HalfVT = Op.getOperand(0).getValueType();
2403 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2404
2405 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2406 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2407
2408 KnownBits KnownLo, KnownHi;
2409
2410 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2411 return true;
2412
2413 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2414 return true;
2415
2416 Known = KnownHi.concat(KnownLo);
2417 break;
2418 }
2420 if (VT.isScalableVector())
2421 return false;
2422 [[fallthrough]];
2423 case ISD::ZERO_EXTEND: {
2424 SDValue Src = Op.getOperand(0);
2425 EVT SrcVT = Src.getValueType();
2426 unsigned InBits = SrcVT.getScalarSizeInBits();
2427 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2428 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2429
2430 // If none of the top bits are demanded, convert this into an any_extend.
2431 if (DemandedBits.getActiveBits() <= InBits) {
2432 // If we only need the non-extended bits of the bottom element
2433 // then we can just bitcast to the result.
2434 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2435 VT.getSizeInBits() == SrcVT.getSizeInBits())
2436 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2437
2438 unsigned Opc =
2440 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2441 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2442 }
2443
2444 SDNodeFlags Flags = Op->getFlags();
2445 APInt InDemandedBits = DemandedBits.trunc(InBits);
2446 APInt InDemandedElts = DemandedElts.zext(InElts);
2447 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2448 Depth + 1)) {
2449 if (Flags.hasNonNeg()) {
2450 Flags.setNonNeg(false);
2451 Op->setFlags(Flags);
2452 }
2453 return true;
2454 }
2455 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2456 Known = Known.zext(BitWidth);
2457
2458 // Attempt to avoid multi-use ops if we don't need anything from them.
2459 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2460 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2461 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2462 break;
2463 }
2465 if (VT.isScalableVector())
2466 return false;
2467 [[fallthrough]];
2468 case ISD::SIGN_EXTEND: {
2469 SDValue Src = Op.getOperand(0);
2470 EVT SrcVT = Src.getValueType();
2471 unsigned InBits = SrcVT.getScalarSizeInBits();
2472 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2473 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2474
2475 APInt InDemandedElts = DemandedElts.zext(InElts);
2476 APInt InDemandedBits = DemandedBits.trunc(InBits);
2477
2478 // Since some of the sign extended bits are demanded, we know that the sign
2479 // bit is demanded.
2480 InDemandedBits.setBit(InBits - 1);
2481
2482 // If none of the top bits are demanded, convert this into an any_extend.
2483 if (DemandedBits.getActiveBits() <= InBits) {
2484 // If we only need the non-extended bits of the bottom element
2485 // then we can just bitcast to the result.
2486 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2487 VT.getSizeInBits() == SrcVT.getSizeInBits())
2488 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2489
2490 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2492 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2493 InBits) {
2494 unsigned Opc =
2496 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2497 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2498 }
2499 }
2500
2501 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2502 Depth + 1))
2503 return true;
2504 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2505
2506 // If the sign bit is known one, the top bits match.
2507 Known = Known.sext(BitWidth);
2508
2509 // If the sign bit is known zero, convert this to a zero extend.
2510 if (Known.isNonNegative()) {
2511 unsigned Opc =
2513 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2514 SDNodeFlags Flags;
2515 if (!IsVecInReg)
2516 Flags.setNonNeg(true);
2517 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2518 }
2519 }
2520
2521 // Attempt to avoid multi-use ops if we don't need anything from them.
2522 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2523 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2524 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2525 break;
2526 }
2528 if (VT.isScalableVector())
2529 return false;
2530 [[fallthrough]];
2531 case ISD::ANY_EXTEND: {
2532 SDValue Src = Op.getOperand(0);
2533 EVT SrcVT = Src.getValueType();
2534 unsigned InBits = SrcVT.getScalarSizeInBits();
2535 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2536 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2537
2538 // If we only need the bottom element then we can just bitcast.
2539 // TODO: Handle ANY_EXTEND?
2540 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2541 VT.getSizeInBits() == SrcVT.getSizeInBits())
2542 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2543
2544 APInt InDemandedBits = DemandedBits.trunc(InBits);
2545 APInt InDemandedElts = DemandedElts.zext(InElts);
2546 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2547 Depth + 1))
2548 return true;
2549 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2550 Known = Known.anyext(BitWidth);
2551
2552 // Attempt to avoid multi-use ops if we don't need anything from them.
2553 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2554 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2555 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2556 break;
2557 }
2558 case ISD::TRUNCATE: {
2559 SDValue Src = Op.getOperand(0);
2560
2561 // Simplify the input, using demanded bit information, and compute the known
2562 // zero/one bits live out.
2563 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2564 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2565 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2566 Depth + 1))
2567 return true;
2568 Known = Known.trunc(BitWidth);
2569
2570 // Attempt to avoid multi-use ops if we don't need anything from them.
2571 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2572 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2573 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2574
2575 // If the input is only used by this truncate, see if we can shrink it based
2576 // on the known demanded bits.
2577 switch (Src.getOpcode()) {
2578 default:
2579 break;
2580 case ISD::SRL:
2581 // Shrink SRL by a constant if none of the high bits shifted in are
2582 // demanded.
2583 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2584 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2585 // undesirable.
2586 break;
2587
2588 if (Src.getNode()->hasOneUse()) {
2589 if (isTruncateFree(Src, VT) &&
2590 !isTruncateFree(Src.getValueType(), VT)) {
2591 // If truncate is only free at trunc(srl), do not turn it into
2592 // srl(trunc). The check is done by first check the truncate is free
2593 // at Src's opcode(srl), then check the truncate is not done by
2594 // referencing sub-register. In test, if both trunc(srl) and
2595 // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2596 // trunc(srl)'s trunc is free, trunc(srl) is better.
2597 break;
2598 }
2599
2600 std::optional<uint64_t> ShAmtC =
2601 TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2602 if (!ShAmtC || *ShAmtC >= BitWidth)
2603 break;
2604 uint64_t ShVal = *ShAmtC;
2605
2606 APInt HighBits =
2607 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2608 HighBits.lshrInPlace(ShVal);
2609 HighBits = HighBits.trunc(BitWidth);
2610 if (!(HighBits & DemandedBits)) {
2611 // None of the shifted in bits are needed. Add a truncate of the
2612 // shift input, then shift it.
2613 SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2614 SDValue NewTrunc =
2615 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2616 return TLO.CombineTo(
2617 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2618 }
2619 }
2620 break;
2621 }
2622
2623 break;
2624 }
2625 case ISD::AssertZext: {
2626 // AssertZext demands all of the high bits, plus any of the low bits
2627 // demanded by its users.
2628 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2630 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2631 TLO, Depth + 1))
2632 return true;
2633
2634 Known.Zero |= ~InMask;
2635 Known.One &= (~Known.Zero);
2636 break;
2637 }
2639 SDValue Src = Op.getOperand(0);
2640 SDValue Idx = Op.getOperand(1);
2641 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2642 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2643
2644 if (SrcEltCnt.isScalable())
2645 return false;
2646
2647 // Demand the bits from every vector element without a constant index.
2648 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2649 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2650 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2651 if (CIdx->getAPIntValue().ult(NumSrcElts))
2652 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2653
2654 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2655 // anything about the extended bits.
2656 APInt DemandedSrcBits = DemandedBits;
2657 if (BitWidth > EltBitWidth)
2658 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2659
2660 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2661 Depth + 1))
2662 return true;
2663
2664 // Attempt to avoid multi-use ops if we don't need anything from them.
2665 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2666 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2667 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2668 SDValue NewOp =
2669 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2670 return TLO.CombineTo(Op, NewOp);
2671 }
2672 }
2673
2674 Known = Known2;
2675 if (BitWidth > EltBitWidth)
2676 Known = Known.anyext(BitWidth);
2677 break;
2678 }
2679 case ISD::BITCAST: {
2680 if (VT.isScalableVector())
2681 return false;
2682 SDValue Src = Op.getOperand(0);
2683 EVT SrcVT = Src.getValueType();
2684 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2685
2686 // If this is an FP->Int bitcast and if the sign bit is the only
2687 // thing demanded, turn this into a FGETSIGN.
2688 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2689 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2690 SrcVT.isFloatingPoint()) {
2691 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2692 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2693 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2694 SrcVT != MVT::f128) {
2695 // Cannot eliminate/lower SHL for f128 yet.
2696 EVT Ty = OpVTLegal ? VT : MVT::i32;
2697 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2698 // place. We expect the SHL to be eliminated by other optimizations.
2699 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2700 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2701 if (!OpVTLegal && OpVTSizeInBits > 32)
2702 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2703 unsigned ShVal = Op.getValueSizeInBits() - 1;
2704 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2705 return TLO.CombineTo(Op,
2706 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2707 }
2708 }
2709
2710 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2711 // Demand the elt/bit if any of the original elts/bits are demanded.
2712 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2713 unsigned Scale = BitWidth / NumSrcEltBits;
2714 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2715 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2716 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2717 for (unsigned i = 0; i != Scale; ++i) {
2718 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2719 unsigned BitOffset = EltOffset * NumSrcEltBits;
2720 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2721 if (!Sub.isZero()) {
2722 DemandedSrcBits |= Sub;
2723 for (unsigned j = 0; j != NumElts; ++j)
2724 if (DemandedElts[j])
2725 DemandedSrcElts.setBit((j * Scale) + i);
2726 }
2727 }
2728
2729 APInt KnownSrcUndef, KnownSrcZero;
2730 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2731 KnownSrcZero, TLO, Depth + 1))
2732 return true;
2733
2734 KnownBits KnownSrcBits;
2735 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2736 KnownSrcBits, TLO, Depth + 1))
2737 return true;
2738 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2739 // TODO - bigendian once we have test coverage.
2740 unsigned Scale = NumSrcEltBits / BitWidth;
2741 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2742 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2743 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2744 for (unsigned i = 0; i != NumElts; ++i)
2745 if (DemandedElts[i]) {
2746 unsigned Offset = (i % Scale) * BitWidth;
2747 DemandedSrcBits.insertBits(DemandedBits, Offset);
2748 DemandedSrcElts.setBit(i / Scale);
2749 }
2750
2751 if (SrcVT.isVector()) {
2752 APInt KnownSrcUndef, KnownSrcZero;
2753 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2754 KnownSrcZero, TLO, Depth + 1))
2755 return true;
2756 }
2757
2758 KnownBits KnownSrcBits;
2759 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2760 KnownSrcBits, TLO, Depth + 1))
2761 return true;
2762
2763 // Attempt to avoid multi-use ops if we don't need anything from them.
2764 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2765 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2766 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2767 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2768 return TLO.CombineTo(Op, NewOp);
2769 }
2770 }
2771 }
2772
2773 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2774 // recursive call where Known may be useful to the caller.
2775 if (Depth > 0) {
2776 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2777 return false;
2778 }
2779 break;
2780 }
2781 case ISD::MUL:
2782 if (DemandedBits.isPowerOf2()) {
2783 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2784 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2785 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2786 unsigned CTZ = DemandedBits.countr_zero();
2787 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2788 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2789 SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2790 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2791 return TLO.CombineTo(Op, Shl);
2792 }
2793 }
2794 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2795 // X * X is odd iff X is odd.
2796 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2797 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2798 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2799 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2800 return TLO.CombineTo(Op, And1);
2801 }
2802 [[fallthrough]];
2803 case ISD::ADD:
2804 case ISD::SUB: {
2805 // Add, Sub, and Mul don't demand any bits in positions beyond that
2806 // of the highest bit demanded of them.
2807 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2808 SDNodeFlags Flags = Op.getNode()->getFlags();
2809 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2810 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2811 KnownBits KnownOp0, KnownOp1;
2812 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2813 const KnownBits &KnownRHS) {
2814 if (Op.getOpcode() == ISD::MUL)
2815 Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2816 return Demanded;
2817 };
2818 if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2819 Depth + 1) ||
2820 SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2821 DemandedElts, KnownOp0, TLO, Depth + 1) ||
2822 // See if the operation should be performed at a smaller bit width.
2823 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2824 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2825 // Disable the nsw and nuw flags. We can no longer guarantee that we
2826 // won't wrap after simplification.
2827 Flags.setNoSignedWrap(false);
2828 Flags.setNoUnsignedWrap(false);
2829 Op->setFlags(Flags);
2830 }
2831 return true;
2832 }
2833
2834 // neg x with only low bit demanded is simply x.
2835 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2836 isNullConstant(Op0))
2837 return TLO.CombineTo(Op, Op1);
2838
2839 // Attempt to avoid multi-use ops if we don't need anything from them.
2840 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2841 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2842 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2843 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2844 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2845 if (DemandedOp0 || DemandedOp1) {
2846 Flags.setNoSignedWrap(false);
2847 Flags.setNoUnsignedWrap(false);
2848 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2849 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2850 SDValue NewOp =
2851 TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2852 return TLO.CombineTo(Op, NewOp);
2853 }
2854 }
2855
2856 // If we have a constant operand, we may be able to turn it into -1 if we
2857 // do not demand the high bits. This can make the constant smaller to
2858 // encode, allow more general folding, or match specialized instruction
2859 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2860 // is probably not useful (and could be detrimental).
2862 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2863 if (C && !C->isAllOnes() && !C->isOne() &&
2864 (C->getAPIntValue() | HighMask).isAllOnes()) {
2865 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2866 // Disable the nsw and nuw flags. We can no longer guarantee that we
2867 // won't wrap after simplification.
2868 Flags.setNoSignedWrap(false);
2869 Flags.setNoUnsignedWrap(false);
2870 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
2871 return TLO.CombineTo(Op, NewOp);
2872 }
2873
2874 // Match a multiply with a disguised negated-power-of-2 and convert to a
2875 // an equivalent shift-left amount.
2876 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2877 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2878 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2879 return 0;
2880
2881 // Don't touch opaque constants. Also, ignore zero and power-of-2
2882 // multiplies. Those will get folded later.
2883 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2884 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2885 !MulC->getAPIntValue().isPowerOf2()) {
2886 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2887 if (UnmaskedC.isNegatedPowerOf2())
2888 return (-UnmaskedC).logBase2();
2889 }
2890 return 0;
2891 };
2892
2893 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2894 unsigned ShlAmt) {
2895 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2896 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2897 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2898 return TLO.CombineTo(Op, Res);
2899 };
2900
2902 if (Op.getOpcode() == ISD::ADD) {
2903 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2904 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2905 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2906 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2907 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2908 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2909 }
2910 if (Op.getOpcode() == ISD::SUB) {
2911 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2912 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2913 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2914 }
2915 }
2916
2917 if (Op.getOpcode() == ISD::MUL) {
2918 Known = KnownBits::mul(KnownOp0, KnownOp1);
2919 } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2921 Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(),
2922 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2923 }
2924 break;
2925 }
2926 default:
2927 // We also ask the target about intrinsics (which could be specific to it).
2928 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2929 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2930 // TODO: Probably okay to remove after audit; here to reduce change size
2931 // in initial enablement patch for scalable vectors
2932 if (Op.getValueType().isScalableVector())
2933 break;
2934 if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2935 Known, TLO, Depth))
2936 return true;
2937 break;
2938 }
2939
2940 // Just use computeKnownBits to compute output bits.
2941 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2942 break;
2943 }
2944
2945 // If we know the value of all of the demanded bits, return this as a
2946 // constant.
2947 if (!isTargetCanonicalConstantNode(Op) &&
2948 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2949 // Avoid folding to a constant if any OpaqueConstant is involved.
2950 const SDNode *N = Op.getNode();
2951 for (SDNode *Op :
2953 if (auto *C = dyn_cast<ConstantSDNode>(Op))
2954 if (C->isOpaque())
2955 return false;
2956 }
2957 if (VT.isInteger())
2958 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2959 if (VT.isFloatingPoint())
2960 return TLO.CombineTo(
2961 Op,
2962 TLO.DAG.getConstantFP(
2963 APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
2964 }
2965
2966 // A multi use 'all demanded elts' simplify failed to find any knownbits.
2967 // Try again just for the original demanded elts.
2968 // Ensure we do this AFTER constant folding above.
2969 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
2970 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
2971
2972 return false;
2973}
2974
2976 const APInt &DemandedElts,
2977 DAGCombinerInfo &DCI) const {
2978 SelectionDAG &DAG = DCI.DAG;
2979 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2980 !DCI.isBeforeLegalizeOps());
2981
2982 APInt KnownUndef, KnownZero;
2983 bool Simplified =
2984 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2985 if (Simplified) {
2986 DCI.AddToWorklist(Op.getNode());
2987 DCI.CommitTargetLoweringOpt(TLO);
2988 }
2989
2990 return Simplified;
2991}
2992
2993/// Given a vector binary operation and known undefined elements for each input
2994/// operand, compute whether each element of the output is undefined.
2996 const APInt &UndefOp0,
2997 const APInt &UndefOp1) {
2998 EVT VT = BO.getValueType();
3000 "Vector binop only");
3001
3002 EVT EltVT = VT.getVectorElementType();
3003 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3004 assert(UndefOp0.getBitWidth() == NumElts &&
3005 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3006
3007 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3008 const APInt &UndefVals) {
3009 if (UndefVals[Index])
3010 return DAG.getUNDEF(EltVT);
3011
3012 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3013 // Try hard to make sure that the getNode() call is not creating temporary
3014 // nodes. Ignore opaque integers because they do not constant fold.
3015 SDValue Elt = BV->getOperand(Index);
3016 auto *C = dyn_cast<ConstantSDNode>(Elt);
3017 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3018 return Elt;
3019 }
3020
3021 return SDValue();
3022 };
3023
3024 APInt KnownUndef = APInt::getZero(NumElts);
3025 for (unsigned i = 0; i != NumElts; ++i) {
3026 // If both inputs for this element are either constant or undef and match
3027 // the element type, compute the constant/undef result for this element of
3028 // the vector.
3029 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3030 // not handle FP constants. The code within getNode() should be refactored
3031 // to avoid the danger of creating a bogus temporary node here.
3032 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3033 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3034 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3035 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3036 KnownUndef.setBit(i);
3037 }
3038 return KnownUndef;
3039}
3040
3042 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3043 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3044 bool AssumeSingleUse) const {
3045 EVT VT = Op.getValueType();
3046 unsigned Opcode = Op.getOpcode();
3047 APInt DemandedElts = OriginalDemandedElts;
3048 unsigned NumElts = DemandedElts.getBitWidth();
3049 assert(VT.isVector() && "Expected vector op");
3050
3051 KnownUndef = KnownZero = APInt::getZero(NumElts);
3052
3053 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
3054 if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
3055 return false;
3056
3057 // TODO: For now we assume we know nothing about scalable vectors.
3058 if (VT.isScalableVector())
3059 return false;
3060
3061 assert(VT.getVectorNumElements() == NumElts &&
3062 "Mask size mismatches value type element count!");
3063
3064 // Undef operand.
3065 if (Op.isUndef()) {
3066 KnownUndef.setAllBits();
3067 return false;
3068 }
3069
3070 // If Op has other users, assume that all elements are needed.
3071 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3072 DemandedElts.setAllBits();
3073
3074 // Not demanding any elements from Op.
3075 if (DemandedElts == 0) {
3076 KnownUndef.setAllBits();
3077 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3078 }
3079
3080 // Limit search depth.
3082 return false;
3083
3084 SDLoc DL(Op);
3085 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3086 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3087
3088 // Helper for demanding the specified elements and all the bits of both binary
3089 // operands.
3090 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3091 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3092 TLO.DAG, Depth + 1);
3093 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3094 TLO.DAG, Depth + 1);
3095 if (NewOp0 || NewOp1) {
3096 SDValue NewOp =
3097 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3098 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3099 return TLO.CombineTo(Op, NewOp);
3100 }
3101 return false;
3102 };
3103
3104 switch (Opcode) {
3105 case ISD::SCALAR_TO_VECTOR: {
3106 if (!DemandedElts[0]) {
3107 KnownUndef.setAllBits();
3108 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3109 }
3110 SDValue ScalarSrc = Op.getOperand(0);
3111 if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3112 SDValue Src = ScalarSrc.getOperand(0);
3113 SDValue Idx = ScalarSrc.getOperand(1);
3114 EVT SrcVT = Src.getValueType();
3115
3116 ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3117
3118 if (SrcEltCnt.isScalable())
3119 return false;
3120
3121 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3122 if (isNullConstant(Idx)) {
3123 APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
3124 APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
3125 APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
3126 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3127 TLO, Depth + 1))
3128 return true;
3129 }
3130 }
3131 KnownUndef.setHighBits(NumElts - 1);
3132 break;
3133 }
3134 case ISD::BITCAST: {
3135 SDValue Src = Op.getOperand(0);
3136 EVT SrcVT = Src.getValueType();
3137
3138 // We only handle vectors here.
3139 // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
3140 if (!SrcVT.isVector())
3141 break;
3142
3143 // Fast handling of 'identity' bitcasts.
3144 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3145 if (NumSrcElts == NumElts)
3146 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3147 KnownZero, TLO, Depth + 1);
3148
3149 APInt SrcDemandedElts, SrcZero, SrcUndef;
3150
3151 // Bitcast from 'large element' src vector to 'small element' vector, we
3152 // must demand a source element if any DemandedElt maps to it.
3153 if ((NumElts % NumSrcElts) == 0) {
3154 unsigned Scale = NumElts / NumSrcElts;
3155 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3156 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3157 TLO, Depth + 1))
3158 return true;
3159
3160 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3161 // of the large element.
3162 // TODO - bigendian once we have test coverage.
3163 if (IsLE) {
3164 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3165 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3166 for (unsigned i = 0; i != NumElts; ++i)
3167 if (DemandedElts[i]) {
3168 unsigned Ofs = (i % Scale) * EltSizeInBits;
3169 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3170 }
3171
3172 KnownBits Known;
3173 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3174 TLO, Depth + 1))
3175 return true;
3176
3177 // The bitcast has split each wide element into a number of
3178 // narrow subelements. We have just computed the Known bits
3179 // for wide elements. See if element splitting results in
3180 // some subelements being zero. Only for demanded elements!
3181 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3182 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3183 .isAllOnes())
3184 continue;
3185 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3186 unsigned Elt = Scale * SrcElt + SubElt;
3187 if (DemandedElts[Elt])
3188 KnownZero.setBit(Elt);
3189 }
3190 }
3191 }
3192
3193 // If the src element is zero/undef then all the output elements will be -
3194 // only demanded elements are guaranteed to be correct.
3195 for (unsigned i = 0; i != NumSrcElts; ++i) {
3196 if (SrcDemandedElts[i]) {
3197 if (SrcZero[i])
3198 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3199 if (SrcUndef[i])
3200 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3201 }
3202 }
3203 }
3204
3205 // Bitcast from 'small element' src vector to 'large element' vector, we
3206 // demand all smaller source elements covered by the larger demanded element
3207 // of this vector.
3208 if ((NumSrcElts % NumElts) == 0) {
3209 unsigned Scale = NumSrcElts / NumElts;
3210 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3211 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3212 TLO, Depth + 1))
3213 return true;
3214
3215 // If all the src elements covering an output element are zero/undef, then
3216 // the output element will be as well, assuming it was demanded.
3217 for (unsigned i = 0; i != NumElts; ++i) {
3218 if (DemandedElts[i]) {
3219 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3220 KnownZero.setBit(i);
3221 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3222 KnownUndef.setBit(i);
3223 }
3224 }
3225 }
3226 break;
3227 }
3228 case ISD::FREEZE: {
3229 SDValue N0 = Op.getOperand(0);
3230 if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
3231 /*PoisonOnly=*/false))
3232 return TLO.CombineTo(Op, N0);
3233
3234 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3235 // freeze(op(x, ...)) -> op(freeze(x), ...).
3236 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3237 return TLO.CombineTo(
3239 TLO.DAG.getFreeze(N0.getOperand(0))));
3240 break;
3241 }
3242 case ISD::BUILD_VECTOR: {
3243 // Check all elements and simplify any unused elements with UNDEF.
3244 if (!DemandedElts.isAllOnes()) {
3245 // Don't simplify BROADCASTS.
3246 if (llvm::any_of(Op->op_values(),
3247 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3248 SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
3249 bool Updated = false;
3250 for (unsigned i = 0; i != NumElts; ++i) {
3251 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3252 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3253 KnownUndef.setBit(i);
3254 Updated = true;
3255 }
3256 }
3257 if (Updated)
3258 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3259 }
3260 }
3261 for (unsigned i = 0; i != NumElts; ++i) {
3262 SDValue SrcOp = Op.getOperand(i);
3263 if (SrcOp.isUndef()) {
3264 KnownUndef.setBit(i);
3265 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3267 KnownZero.setBit(i);
3268 }
3269 }
3270 break;
3271 }
3272 case ISD::CONCAT_VECTORS: {
3273 EVT SubVT = Op.getOperand(0).getValueType();
3274 unsigned NumSubVecs = Op.getNumOperands();
3275 unsigned NumSubElts = SubVT.getVectorNumElements();
3276 for (unsigned i = 0; i != NumSubVecs; ++i) {
3277 SDValue SubOp = Op.getOperand(i);
3278 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3279 APInt SubUndef, SubZero;
3280 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3281 Depth + 1))
3282 return true;
3283 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3284 KnownZero.insertBits(SubZero, i * NumSubElts);
3285 }
3286
3287 // Attempt to avoid multi-use ops if we don't need anything from them.
3288 if (!DemandedElts.isAllOnes()) {
3289 bool FoundNewSub = false;
3290 SmallVector<SDValue, 2> DemandedSubOps;
3291 for (unsigned i = 0; i != NumSubVecs; ++i) {
3292 SDValue SubOp = Op.getOperand(i);
3293 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3294 SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3295 SubOp, SubElts, TLO.DAG, Depth + 1);
3296 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3297 FoundNewSub = NewSubOp ? true : FoundNewSub;
3298 }
3299 if (FoundNewSub) {
3300 SDValue NewOp =
3301 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3302 return TLO.CombineTo(Op, NewOp);
3303 }
3304 }
3305 break;
3306 }
3307 case ISD::INSERT_SUBVECTOR: {
3308 // Demand any elements from the subvector and the remainder from the src its
3309 // inserted into.
3310 SDValue Src = Op.getOperand(0);
3311 SDValue Sub = Op.getOperand(1);
3312 uint64_t Idx = Op.getConstantOperandVal(2);
3313 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3314 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3315 APInt DemandedSrcElts = DemandedElts;
3316 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
3317
3318 APInt SubUndef, SubZero;
3319 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3320 Depth + 1))
3321 return true;
3322
3323 // If none of the src operand elements are demanded, replace it with undef.
3324 if (!DemandedSrcElts && !Src.isUndef())
3325 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3326 TLO.DAG.getUNDEF(VT), Sub,
3327 Op.getOperand(2)));
3328
3329 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3330 TLO, Depth + 1))
3331 return true;
3332 KnownUndef.insertBits(SubUndef, Idx);
3333 KnownZero.insertBits(SubZero, Idx);
3334
3335 // Attempt to avoid multi-use ops if we don't need anything from them.
3336 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3337 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3338 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3339 SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3340 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3341 if (NewSrc || NewSub) {
3342 NewSrc = NewSrc ? NewSrc : Src;
3343 NewSub = NewSub ? NewSub : Sub;
3344 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3345 NewSub, Op.getOperand(2));
3346 return TLO.CombineTo(Op, NewOp);
3347 }
3348 }
3349 break;
3350 }
3352 // Offset the demanded elts by the subvector index.
3353 SDValue Src = Op.getOperand(0);
3354 if (Src.getValueType().isScalableVector())
3355 break;
3356 uint64_t Idx = Op.getConstantOperandVal(1);
3357 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3358 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3359
3360 APInt SrcUndef, SrcZero;
3361 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3362 Depth + 1))
3363 return true;
3364 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3365 KnownZero = SrcZero.extractBits(NumElts, Idx);
3366
3367 // Attempt to avoid multi-use ops if we don't need anything from them.
3368 if (!DemandedElts.isAllOnes()) {
3369 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3370 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3371 if (NewSrc) {
3372 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3373 Op.getOperand(1));
3374 return TLO.CombineTo(Op, NewOp);
3375 }
3376 }
3377 break;
3378 }
3380 SDValue Vec = Op.getOperand(0);
3381 SDValue Scl = Op.getOperand(1);
3382 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3383
3384 // For a legal, constant insertion index, if we don't need this insertion
3385 // then strip it, else remove it from the demanded elts.
3386 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3387 unsigned Idx = CIdx->getZExtValue();
3388 if (!DemandedElts[Idx])
3389 return TLO.CombineTo(Op, Vec);
3390
3391 APInt DemandedVecElts(DemandedElts);
3392 DemandedVecElts.clearBit(Idx);
3393 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3394 KnownZero, TLO, Depth + 1))
3395 return true;
3396
3397 KnownUndef.setBitVal(Idx, Scl.isUndef());
3398
3399 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3400 break;
3401 }
3402
3403 APInt VecUndef, VecZero;
3404 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3405 Depth + 1))
3406 return true;
3407 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3408 break;
3409 }
3410 case ISD::VSELECT: {
3411 SDValue Sel = Op.getOperand(0);
3412 SDValue LHS = Op.getOperand(1);
3413 SDValue RHS = Op.getOperand(2);
3414
3415 // Try to transform the select condition based on the current demanded
3416 // elements.
3417 APInt UndefSel, ZeroSel;
3418 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3419 Depth + 1))
3420 return true;
3421
3422 // See if we can simplify either vselect operand.
3423 APInt DemandedLHS(DemandedElts);
3424 APInt DemandedRHS(DemandedElts);
3425 APInt UndefLHS, ZeroLHS;
3426 APInt UndefRHS, ZeroRHS;
3427 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3428 Depth + 1))
3429 return true;
3430 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3431 Depth + 1))
3432 return true;
3433
3434 KnownUndef = UndefLHS & UndefRHS;
3435 KnownZero = ZeroLHS & ZeroRHS;
3436
3437 // If we know that the selected element is always zero, we don't need the
3438 // select value element.
3439 APInt DemandedSel = DemandedElts & ~KnownZero;
3440 if (DemandedSel != DemandedElts)
3441 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3442 Depth + 1))
3443 return true;
3444
3445 break;
3446 }
3447 case ISD::VECTOR_SHUFFLE: {
3448 SDValue LHS = Op.getOperand(0);
3449 SDValue RHS = Op.getOperand(1);
3450 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3451
3452 // Collect demanded elements from shuffle operands..
3453 APInt DemandedLHS(NumElts, 0);
3454 APInt DemandedRHS(NumElts, 0);
3455 for (unsigned i = 0; i != NumElts; ++i) {
3456 int M = ShuffleMask[i];
3457 if (M < 0 || !DemandedElts[i])
3458 continue;
3459 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3460 if (M < (int)NumElts)
3461 DemandedLHS.setBit(M);
3462 else
3463 DemandedRHS.setBit(M - NumElts);
3464 }
3465
3466 // See if we can simplify either shuffle operand.
3467 APInt UndefLHS, ZeroLHS;
3468 APInt UndefRHS, ZeroRHS;
3469 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3470 Depth + 1))
3471 return true;
3472 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3473 Depth + 1))
3474 return true;
3475
3476 // Simplify mask using undef elements from LHS/RHS.
3477 bool Updated = false;
3478 bool IdentityLHS = true, IdentityRHS = true;
3479 SmallVector<int, 32> NewMask(ShuffleMask);
3480 for (unsigned i = 0; i != NumElts; ++i) {
3481 int &M = NewMask[i];
3482 if (M < 0)
3483 continue;
3484 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3485 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3486 Updated = true;
3487 M = -1;
3488 }
3489 IdentityLHS &= (M < 0) || (M == (int)i);
3490 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3491 }
3492
3493 // Update legal shuffle masks based on demanded elements if it won't reduce
3494 // to Identity which can cause premature removal of the shuffle mask.
3495 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3496 SDValue LegalShuffle =
3497 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3498 if (LegalShuffle)
3499 return TLO.CombineTo(Op, LegalShuffle);
3500 }
3501
3502 // Propagate undef/zero elements from LHS/RHS.
3503 for (unsigned i = 0; i != NumElts; ++i) {
3504 int M = ShuffleMask[i];
3505 if (M < 0) {
3506 KnownUndef.setBit(i);
3507 } else if (M < (int)NumElts) {
3508 if (UndefLHS[M])
3509 KnownUndef.setBit(i);
3510 if (ZeroLHS[M])
3511 KnownZero.setBit(i);
3512 } else {
3513 if (UndefRHS[M - NumElts])
3514 KnownUndef.setBit(i);
3515 if (ZeroRHS[M - NumElts])
3516 KnownZero.setBit(i);
3517 }
3518 }
3519 break;
3520 }
3524 APInt SrcUndef, SrcZero;
3525 SDValue Src = Op.getOperand(0);
3526 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3527 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3528 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3529 Depth + 1))
3530 return true;
3531 KnownZero = SrcZero.zextOrTrunc(NumElts);
3532 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3533
3534 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3535 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3536 DemandedSrcElts == 1) {
3537 // aext - if we just need the bottom element then we can bitcast.
3538 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3539 }
3540
3541 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3542 // zext(undef) upper bits are guaranteed to be zero.
3543 if (DemandedElts.isSubsetOf(KnownUndef))
3544 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3545 KnownUndef.clearAllBits();
3546
3547 // zext - if we just need the bottom element then we can mask:
3548 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3549 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3550 Op->isOnlyUserOf(Src.getNode()) &&
3551 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3552 SDLoc DL(Op);
3553 EVT SrcVT = Src.getValueType();
3554 EVT SrcSVT = SrcVT.getScalarType();
3555 SmallVector<SDValue> MaskElts;
3556 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3557 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3558 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3559 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3560 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3561 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3562 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3563 }
3564 }
3565 }
3566 break;
3567 }
3568
3569 // TODO: There are more binop opcodes that could be handled here - MIN,
3570 // MAX, saturated math, etc.
3571 case ISD::ADD: {
3572 SDValue Op0 = Op.getOperand(0);
3573 SDValue Op1 = Op.getOperand(1);
3574 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3575 APInt UndefLHS, ZeroLHS;
3576 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3577 Depth + 1, /*AssumeSingleUse*/ true))
3578 return true;
3579 }
3580 [[fallthrough]];
3581 }
3582 case ISD::AVGCEILS:
3583 case ISD::AVGCEILU:
3584 case ISD::AVGFLOORS:
3585 case ISD::AVGFLOORU:
3586 case ISD::OR:
3587 case ISD::XOR:
3588 case ISD::SUB:
3589 case ISD::FADD:
3590 case ISD::FSUB:
3591 case ISD::FMUL:
3592 case ISD::FDIV:
3593 case ISD::FREM: {
3594 SDValue Op0 = Op.getOperand(0);
3595 SDValue Op1 = Op.getOperand(1);
3596
3597 APInt UndefRHS, ZeroRHS;
3598 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3599 Depth + 1))
3600 return true;
3601 APInt UndefLHS, ZeroLHS;
3602 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3603 Depth + 1))
3604 return true;
3605
3606 KnownZero = ZeroLHS & ZeroRHS;
3607 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3608
3609 // Attempt to avoid multi-use ops if we don't need anything from them.
3610 // TODO - use KnownUndef to relax the demandedelts?
3611 if (!DemandedElts.isAllOnes())
3612 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3613 return true;
3614 break;
3615 }
3616 case ISD::SHL:
3617 case ISD::SRL:
3618 case ISD::SRA:
3619 case ISD::ROTL:
3620 case ISD::ROTR: {
3621 SDValue Op0 = Op.getOperand(0);
3622 SDValue Op1 = Op.getOperand(1);
3623
3624 APInt UndefRHS, ZeroRHS;
3625 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3626 Depth + 1))
3627 return true;
3628 APInt UndefLHS, ZeroLHS;
3629 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3630 Depth + 1))
3631 return true;
3632
3633 KnownZero = ZeroLHS;
3634 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3635
3636 // Attempt to avoid multi-use ops if we don't need anything from them.
3637 // TODO - use KnownUndef to relax the demandedelts?
3638 if (!DemandedElts.isAllOnes())
3639 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3640 return true;
3641 break;
3642 }
3643 case ISD::MUL:
3644 case ISD::MULHU:
3645 case ISD::MULHS:
3646 case ISD::AND: {
3647 SDValue Op0 = Op.getOperand(0);
3648 SDValue Op1 = Op.getOperand(1);
3649
3650 APInt SrcUndef, SrcZero;
3651 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3652 Depth + 1))
3653 return true;
3654 // If we know that a demanded element was zero in Op1 we don't need to
3655 // demand it in Op0 - its guaranteed to be zero.
3656 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3657 if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3658 TLO, Depth + 1))
3659 return true;
3660
3661 KnownUndef &= DemandedElts0;
3662 KnownZero &= DemandedElts0;
3663
3664 // If every element pair has a zero/undef then just fold to zero.
3665 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3666 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3667 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3668 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3669
3670 // If either side has a zero element, then the result element is zero, even
3671 // if the other is an UNDEF.
3672 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3673 // and then handle 'and' nodes with the rest of the binop opcodes.
3674 KnownZero |= SrcZero;
3675 KnownUndef &= SrcUndef;
3676 KnownUndef &= ~KnownZero;
3677
3678 // Attempt to avoid multi-use ops if we don't need anything from them.
3679 if (!DemandedElts.isAllOnes())
3680 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3681 return true;
3682 break;
3683 }
3684 case ISD::TRUNCATE:
3685 case ISD::SIGN_EXTEND:
3686 case ISD::ZERO_EXTEND:
3687 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3688 KnownZero, TLO, Depth + 1))
3689 return true;
3690
3691 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3692 // zext(undef) upper bits are guaranteed to be zero.
3693 if (DemandedElts.isSubsetOf(KnownUndef))
3694 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3695 KnownUndef.clearAllBits();
3696 }
3697 break;
3698 default: {
3699 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3700 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3701 KnownZero, TLO, Depth))
3702 return true;
3703 } else {
3704 KnownBits Known;
3705 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3706 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3707 TLO, Depth, AssumeSingleUse))
3708 return true;
3709 }
3710 break;
3711 }
3712 }
3713 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3714
3715 // Constant fold all undef cases.
3716 // TODO: Handle zero cases as well.
3717 if (DemandedElts.isSubsetOf(KnownUndef))
3718 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3719
3720 return false;
3721}
3722
3723/// Determine which of the bits specified in Mask are known to be either zero or
3724/// one and return them in the Known.
3726 KnownBits &Known,
3727 const APInt &DemandedElts,
3728 const SelectionDAG &DAG,
3729 unsigned Depth) const {
3730 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3731 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3732 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3733 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3734 "Should use MaskedValueIsZero if you don't know whether Op"
3735 " is a target node!");
3736 Known.resetAll();
3737}
3738
3741 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3742 unsigned Depth) const {
3743 Known.resetAll();
3744}
3745
3747 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3748 // The low bits are known zero if the pointer is aligned.
3749 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3750}
3751
3754 unsigned Depth) const {
3755 return Align(1);
3756}
3757
3758/// This method can be implemented by targets that want to expose additional
3759/// information about sign bits to the DAG Combiner.
3761 const APInt &,
3762 const SelectionDAG &,
3763 unsigned Depth) const {
3764 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3765 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3766 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3767 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3768 "Should use ComputeNumSignBits if you don't know whether Op"
3769 " is a target node!");
3770 return 1;
3771}
3772
3774 GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3775 const MachineRegisterInfo &MRI, unsigned Depth) const {
3776 return 1;
3777}
3778
3780 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3781 TargetLoweringOpt &TLO, unsigned Depth) const {
3782 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3783 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3784 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3785 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3786 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3787 " is a target node!");
3788 return false;
3789}
3790
3792 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3793 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3794 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3795 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3796 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3797 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3798 "Should use SimplifyDemandedBits if you don't know whether Op"
3799 " is a target node!");
3800 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3801 return false;
3802}
3803
3805 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3806 SelectionDAG &DAG, unsigned Depth) const {
3807 assert(
3808 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3809 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3810 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3811 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3812 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3813 " is a target node!");
3814 return SDValue();
3815}
3816
3817SDValue
3820 SelectionDAG &DAG) const {
3821 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3822 if (!LegalMask) {
3823 std::swap(N0, N1);
3825 LegalMask = isShuffleMaskLegal(Mask, VT);
3826 }
3827
3828 if (!LegalMask)
3829 return SDValue();
3830
3831 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3832}
3833
3835 return nullptr;
3836}
3837
3839 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3840 bool PoisonOnly, unsigned Depth) const {
3841 assert(
3842 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3843 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3844 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3845 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3846 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3847 " is a target node!");
3848
3849 // If Op can't create undef/poison and none of its operands are undef/poison
3850 // then Op is never undef/poison.
3851 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
3852 /*ConsiderFlags*/ true, Depth) &&
3853 all_of(Op->ops(), [&](SDValue V) {
3854 return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
3855 Depth + 1);
3856 });
3857}
3858
3860 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3861 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3862 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3863 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3864 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3865 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3866 "Should use canCreateUndefOrPoison if you don't know whether Op"
3867 " is a target node!");
3868 // Be conservative and return true.
3869 return true;
3870}
3871
3873 const SelectionDAG &DAG,
3874 bool SNaN,
3875 unsigned Depth) const {
3876 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3877 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3878 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3879 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3880 "Should use isKnownNeverNaN if you don't know whether Op"
3881 " is a target node!");
3882 return false;
3883}
3884
3886 const APInt &DemandedElts,
3887 APInt &UndefElts,
3888 const SelectionDAG &DAG,
3889 unsigned Depth) const {
3890 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3891 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3892 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3893 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3894 "Should use isSplatValue if you don't know whether Op"
3895 " is a target node!");
3896 return false;
3897}
3898
3899// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3900// work with truncating build vectors and vectors with elements of less than
3901// 8 bits.
3903 if (!N)
3904 return false;
3905
3906 unsigned EltWidth;
3907 APInt CVal;
3908 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3909 /*AllowTruncation=*/true)) {
3910 CVal = CN->getAPIntValue();
3911 EltWidth = N.getValueType().getScalarSizeInBits();
3912 } else
3913 return false;
3914
3915 // If this is a truncating splat, truncate the splat value.
3916 // Otherwise, we may fail to match the expected values below.
3917 if (EltWidth < CVal.getBitWidth())
3918 CVal = CVal.trunc(EltWidth);
3919
3920 switch (getBooleanContents(N.getValueType())) {
3922 return CVal[0];
3924 return CVal.isOne();
3926 return CVal.isAllOnes();
3927 }
3928
3929 llvm_unreachable("Invalid boolean contents");
3930}
3931
3933 if (!N)
3934 return false;
3935
3936 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3937 if (!CN) {
3938 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3939 if (!BV)
3940 return false;
3941
3942 // Only interested in constant splats, we don't care about undef
3943 // elements in identifying boolean constants and getConstantSplatNode
3944 // returns NULL if all ops are undef;
3945 CN = BV->getConstantSplatNode();
3946 if (!CN)
3947 return false;
3948 }
3949
3950 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3951 return !CN->getAPIntValue()[0];
3952
3953 return CN->isZero();
3954}
3955
3957 bool SExt) const {
3958 if (VT == MVT::i1)
3959 return N->isOne();
3960
3962 switch (Cnt) {
3964 // An extended value of 1 is always true, unless its original type is i1,
3965 // in which case it will be sign extended to -1.
3966 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3969 return N->isAllOnes() && SExt;
3970 }
3971 llvm_unreachable("Unexpected enumeration.");
3972}
3973
3974/// This helper function of SimplifySetCC tries to optimize the comparison when
3975/// either operand of the SetCC node is a bitwise-and instruction.
3976SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3977 ISD::CondCode Cond, const SDLoc &DL,
3978 DAGCombinerInfo &DCI) const {
3979 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3980 std::swap(N0, N1);
3981
3982 SelectionDAG &DAG = DCI.DAG;
3983 EVT OpVT = N0.getValueType();
3984 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3985 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3986 return SDValue();
3987
3988 // (X & Y) != 0 --> zextOrTrunc(X & Y)
3989 // iff everything but LSB is known zero:
3990 if (Cond == ISD::SETNE && isNullConstant(N1) &&
3993 unsigned NumEltBits = OpVT.getScalarSizeInBits();
3994 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
3995 if (DAG.MaskedValueIsZero(N0, UpperBits))
3996 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
3997 }
3998
3999 // Try to eliminate a power-of-2 mask constant by converting to a signbit
4000 // test in a narrow type that we can truncate to with no cost. Examples:
4001 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4002 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4003 // TODO: This conservatively checks for type legality on the source and
4004 // destination types. That may inhibit optimizations, but it also
4005 // allows setcc->shift transforms that may be more beneficial.
4006 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4007 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4008 isTypeLegal(OpVT) && N0.hasOneUse()) {
4009 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4010 AndC->getAPIntValue().getActiveBits());
4011 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4012 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4013 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4014 return DAG.getSetCC(DL, VT, Trunc, Zero,
4016 }
4017 }
4018
4019 // Match these patterns in any of their permutations:
4020 // (X & Y) == Y
4021 // (X & Y) != Y
4022 SDValue X, Y;
4023 if (N0.getOperand(0) == N1) {
4024 X = N0.getOperand(1);
4025 Y = N0.getOperand(0);
4026 } else if (N0.getOperand(1) == N1) {
4027 X = N0.getOperand(0);
4028 Y = N0.getOperand(1);
4029 } else {
4030 return SDValue();
4031 }
4032
4033 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4034 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4035 // its liable to create and infinite loop.
4036 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4037 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4039 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4040 // Note that where Y is variable and is known to have at most one bit set
4041 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4042 // equivalent when Y == 0.
4043 assert(OpVT.isInteger());
4045 if (DCI.isBeforeLegalizeOps() ||
4047 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4048 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4049 // If the target supports an 'and-not' or 'and-complement' logic operation,
4050 // try to use that to make a comparison operation more efficient.
4051 // But don't do this transform if the mask is a single bit because there are
4052 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4053 // 'rlwinm' on PPC).
4054
4055 // Bail out if the compare operand that we want to turn into a zero is
4056 // already a zero (otherwise, infinite loop).
4057 if (isNullConstant(Y))
4058 return SDValue();
4059
4060 // Transform this into: ~X & Y == 0.
4061 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4062 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4063 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4064 }
4065
4066 return SDValue();
4067}
4068
4069/// There are multiple IR patterns that could be checking whether certain
4070/// truncation of a signed number would be lossy or not. The pattern which is
4071/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4072/// We are looking for the following pattern: (KeptBits is a constant)
4073/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4074/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4075/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4076/// We will unfold it into the natural trunc+sext pattern:
4077/// ((%x << C) a>> C) dstcond %x
4078/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4079SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4080 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4081 const SDLoc &DL) const {
4082 // We must be comparing with a constant.
4083 ConstantSDNode *C1;
4084 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4085 return SDValue();
4086
4087 // N0 should be: add %x, (1 << (KeptBits-1))
4088 if (N0->getOpcode() != ISD::ADD)
4089 return SDValue();
4090
4091 // And we must be 'add'ing a constant.
4092 ConstantSDNode *C01;
4093 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4094 return SDValue();
4095
4096 SDValue X = N0->getOperand(0);
4097 EVT XVT = X.getValueType();
4098
4099 // Validate constants ...
4100
4101 APInt I1 = C1->getAPIntValue();
4102
4103 ISD::CondCode NewCond;
4104 if (Cond == ISD::CondCode::SETULT) {
4105 NewCond = ISD::CondCode::SETEQ;
4106 } else if (Cond == ISD::CondCode::SETULE) {
4107 NewCond = ISD::CondCode::SETEQ;
4108 // But need to 'canonicalize' the constant.
4109 I1 += 1;
4110 } else if (Cond == ISD::CondCode::SETUGT) {
4111 NewCond = ISD::CondCode::SETNE;
4112 // But need to 'canonicalize' the constant.
4113 I1 += 1;
4114 } else if (Cond == ISD::CondCode::SETUGE) {
4115 NewCond = ISD::CondCode::SETNE;
4116 } else
4117 return SDValue();
4118
4119 APInt I01 = C01->getAPIntValue();
4120
4121 auto checkConstants = [&I1, &I01]() -> bool {
4122 // Both of them must be power-of-two, and the constant from setcc is bigger.
4123 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4124 };
4125
4126 if (checkConstants()) {
4127 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4128 } else {
4129 // What if we invert constants? (and the target predicate)
4130 I1.negate();
4131 I01.negate();
4132 assert(XVT.isInteger());
4133 NewCond = getSetCCInverse(NewCond, XVT);
4134 if (!checkConstants())
4135 return SDValue();
4136 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4137 }
4138
4139 // They are power-of-two, so which bit is set?
4140 const unsigned KeptBits = I1.logBase2();
4141 const unsigned KeptBitsMinusOne = I01.logBase2();
4142
4143 // Magic!
4144 if (KeptBits != (KeptBitsMinusOne + 1))
4145 return SDValue();
4146 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4147
4148 // We don't want to do this in every single case.
4149 SelectionDAG &DAG = DCI.DAG;
4151 XVT, KeptBits))
4152 return SDValue();
4153
4154 // Unfold into: sext_inreg(%x) cond %x
4155 // Where 'cond' will be either 'eq' or 'ne'.
4156 SDValue SExtInReg = DAG.getNode(
4158 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4159 return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4160}
4161
4162// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4163SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4164 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4165 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4167 "Should be a comparison with 0.");
4168 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4169 "Valid only for [in]equality comparisons.");
4170
4171 unsigned NewShiftOpcode;
4172 SDValue X, C, Y;
4173
4174 SelectionDAG &DAG = DCI.DAG;
4175 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4176
4177 // Look for '(C l>>/<< Y)'.
4178 auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
4179 // The shift should be one-use.
4180 if (!V.hasOneUse())
4181 return false;
4182 unsigned OldShiftOpcode = V.getOpcode();
4183 switch (OldShiftOpcode) {
4184 case ISD::SHL:
4185 NewShiftOpcode = ISD::SRL;
4186 break;
4187 case ISD::SRL:
4188 NewShiftOpcode = ISD::SHL;
4189 break;
4190 default:
4191 return false; // must be a logical shift.
4192 }
4193 // We should be shifting a constant.
4194 // FIXME: best to use isConstantOrConstantVector().
4195 C = V.getOperand(0);
4197 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4198 if (!CC)
4199 return false;
4200 Y = V.getOperand(1);
4201
4203 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4204 return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4205 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4206 };
4207
4208 // LHS of comparison should be an one-use 'and'.
4209 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4210 return SDValue();
4211
4212 X = N0.getOperand(0);
4213 SDValue Mask = N0.getOperand(1);
4214
4215 // 'and' is commutative!
4216 if (!Match(Mask)) {
4217 std::swap(X, Mask);
4218 if (!Match(Mask))
4219 return SDValue();
4220 }
4221
4222 EVT VT = X.getValueType();
4223
4224 // Produce:
4225 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4226 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4227 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4228 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4229 return T2;
4230}
4231
4232/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4233/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4234/// handle the commuted versions of these patterns.
4235SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4236 ISD::CondCode Cond, const SDLoc &DL,
4237 DAGCombinerInfo &DCI) const {
4238 unsigned BOpcode = N0.getOpcode();
4239 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4240 "Unexpected binop");
4241 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4242
4243 // (X + Y) == X --> Y == 0
4244 // (X - Y) == X --> Y == 0
4245 // (X ^ Y) == X --> Y == 0
4246 SelectionDAG &DAG = DCI.DAG;
4247 EVT OpVT = N0.getValueType();
4248 SDValue X = N0.getOperand(0);
4249 SDValue Y = N0.getOperand(1);
4250 if (X == N1)
4251 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4252
4253 if (Y != N1)
4254 return SDValue();
4255
4256 // (X + Y) == Y --> X == 0
4257 // (X ^ Y) == Y --> X == 0
4258 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4259 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4260
4261 // The shift would not be valid if the operands are boolean (i1).
4262 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4263 return SDValue();
4264
4265 // (X - Y) == Y --> X == Y << 1
4266 SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4267 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4268 if (!DCI.isCalledByLegalizer())
4269 DCI.AddToWorklist(YShl1.getNode());
4270 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4271}
4272
4274 SDValue N0, const APInt &C1,
4275 ISD::CondCode Cond, const SDLoc &dl,
4276 SelectionDAG &DAG) {
4277 // Look through truncs that don't change the value of a ctpop.
4278 // FIXME: Add vector support? Need to be careful with setcc result type below.
4279 SDValue CTPOP = N0;
4280 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4282 CTPOP = N0.getOperand(0);
4283
4284 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4285 return SDValue();
4286
4287 EVT CTVT = CTPOP.getValueType();
4288 SDValue CTOp = CTPOP.getOperand(0);
4289
4290 // Expand a power-of-2-or-zero comparison based on ctpop:
4291 // (ctpop x) u< 2 -> (x & x-1) == 0
4292 // (ctpop x) u> 1 -> (x & x-1) != 0
4293 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4294 // Keep the CTPOP if it is a cheap vector op.
4295 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4296 return SDValue();
4297
4298 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4299 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4300 return SDValue();
4301 if (C1 == 0 && (Cond == ISD::SETULT))
4302 return SDValue(); // This is handled elsewhere.
4303
4304 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4305
4306 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4307 SDValue Result = CTOp;
4308 for (unsigned i = 0; i < Passes; i++) {
4309 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4310 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4311 }
4313 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4314 }
4315
4316 // Expand a power-of-2 comparison based on ctpop
4317 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4318 // Keep the CTPOP if it is cheap.
4319 if (TLI.isCtpopFast(CTVT))
4320 return SDValue();
4321
4322 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4323 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4324 assert(CTVT.isInteger());
4325 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4326
4327 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4328 // check before emitting a potentially unnecessary op.
4329 if (DAG.isKnownNeverZero(CTOp)) {
4330 // (ctpop x) == 1 --> (x & x-1) == 0
4331 // (ctpop x) != 1 --> (x & x-1) != 0
4332 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4333 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4334 return RHS;
4335 }
4336
4337 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4338 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4339 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4341 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4342 }
4343
4344 return SDValue();
4345}
4346
4348 ISD::CondCode Cond, const SDLoc &dl,
4349 SelectionDAG &DAG) {
4350 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4351 return SDValue();
4352
4353 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4354 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4355 return SDValue();
4356
4357 auto getRotateSource = [](SDValue X) {
4358 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4359 return X.getOperand(0);
4360 return SDValue();
4361 };
4362
4363 // Peek through a rotated value compared against 0 or -1:
4364 // (rot X, Y) == 0/-1 --> X == 0/-1
4365 // (rot X, Y) != 0/-1 --> X != 0/-1
4366 if (SDValue R = getRotateSource(N0))
4367 return DAG.getSetCC(dl, VT, R, N1, Cond);
4368
4369 // Peek through an 'or' of a rotated value compared against 0:
4370 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4371 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4372 //
4373 // TODO: Add the 'and' with -1 sibling.
4374 // TODO: Recurse through a series of 'or' ops to find the rotate.
4375 EVT OpVT = N0.getValueType();
4376 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4377 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4378 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4379 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4380 }
4381 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4382 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4383 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4384 }
4385 }
4386
4387 return SDValue();
4388}
4389
4391 ISD::CondCode Cond, const SDLoc &dl,
4392 SelectionDAG &DAG) {
4393 // If we are testing for all-bits-clear, we might be able to do that with
4394 // less shifting since bit-order does not matter.
4395 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4396 return SDValue();
4397
4398 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4399 if (!C1 || !C1->isZero())
4400 return SDValue();
4401
4402 if (!N0.hasOneUse() ||
4403 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4404 return SDValue();
4405
4406 unsigned BitWidth = N0.getScalarValueSizeInBits();
4407 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4408 if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
4409 return SDValue();
4410
4411 // Canonicalize fshr as fshl to reduce pattern-matching.
4412 unsigned ShAmt = ShAmtC->getZExtValue();
4413 if (N0.getOpcode() == ISD::FSHR)
4414 ShAmt = BitWidth - ShAmt;
4415
4416 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4417 SDValue X, Y;
4418 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4419 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4420 return false;
4421 if (Or.getOperand(0) == Other) {
4422 X = Or.getOperand(0);
4423 Y = Or.getOperand(1);
4424 return true;
4425 }
4426 if (Or.getOperand(1) == Other) {
4427 X = Or.getOperand(1);
4428 Y = Or.getOperand(0);
4429 return true;
4430 }
4431 return false;
4432 };
4433
4434 EVT OpVT = N0.getValueType();
4435 EVT ShAmtVT = N0.getOperand(2).getValueType();
4436 SDValue F0 = N0.getOperand(0);
4437 SDValue F1 = N0.getOperand(1);
4438 if (matchOr(F0, F1)) {
4439 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4440 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4441 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4442 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4443 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4444 }
4445 if (matchOr(F1, F0)) {
4446 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4447 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4448 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4449 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4450 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4451 }
4452
4453 return SDValue();
4454}
4455
4456/// Try to simplify a setcc built with the specified operands and cc. If it is
4457/// unable to simplify it, return a null SDValue.
4459 ISD::CondCode Cond, bool foldBooleans,
4460 DAGCombinerInfo &DCI,
4461 const SDLoc &dl) const {
4462 SelectionDAG &DAG = DCI.DAG;
4463 const DataLayout &Layout = DAG.getDataLayout();
4464 EVT OpVT = N0.getValueType();
4466
4467 // Constant fold or commute setcc.
4468 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4469 return Fold;
4470
4471 bool N0ConstOrSplat =
4472 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4473 bool N1ConstOrSplat =
4474 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4475
4476 // Canonicalize toward having the constant on the RHS.
4477 // TODO: Handle non-splat vector constants. All undef causes trouble.
4478 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4479 // infinite loop here when we encounter one.
4481 if (N0ConstOrSplat && !N1ConstOrSplat &&
4482 (DCI.isBeforeLegalizeOps() ||
4483 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4484 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4485
4486 // If we have a subtract with the same 2 non-constant operands as this setcc
4487 // -- but in reverse order -- then try to commute the operands of this setcc
4488 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4489 // instruction on some targets.
4490 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4491 (DCI.isBeforeLegalizeOps() ||
4492 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4493 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4494 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4495 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4496
4497 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4498 return V;
4499
4500 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4501 return V;
4502
4503 if (auto *N1C = isConstOrConstSplat(N1)) {
4504 const APInt &C1 = N1C->getAPIntValue();
4505
4506 // Optimize some CTPOP cases.
4507 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4508 return V;
4509
4510 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4511 // X * Y == 0 --> (X == 0) || (Y == 0)
4512 // X * Y != 0 --> (X != 0) && (Y != 0)
4513 // TODO: This bails out if minsize is set, but if the target doesn't have a
4514 // single instruction multiply for this type, it would likely be
4515 // smaller to decompose.
4516 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4517 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4518 (N0->getFlags().hasNoUnsignedWrap() ||
4519 N0->getFlags().hasNoSignedWrap()) &&
4520 !Attr.hasFnAttr(Attribute::MinSize)) {
4521 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4522 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4523 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4524 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4525 }
4526
4527 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4528 // equality comparison, then we're just comparing whether X itself is
4529 // zero.
4530 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4531 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4532 llvm::has_single_bit<uint32_t>(N0.getScalarValueSizeInBits())) {
4533 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4534 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4535 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4536 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4537 // (srl (ctlz x), 5) == 0 -> X != 0
4538 // (srl (ctlz x), 5) != 1 -> X != 0
4539 Cond = ISD::SETNE;
4540 } else {
4541 // (srl (ctlz x), 5) != 0 -> X == 0
4542 // (srl (ctlz x), 5) == 1 -> X == 0
4543 Cond = ISD::SETEQ;
4544 }
4545 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4546 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4547 Cond);
4548 }
4549 }
4550 }
4551 }
4552
4553 // FIXME: Support vectors.
4554 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4555 const APInt &C1 = N1C->getAPIntValue();
4556
4557 // (zext x) == C --> x == (trunc C)
4558 // (sext x) == C --> x == (trunc C)
4559 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4560 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4561 unsigned MinBits = N0.getValueSizeInBits();
4562 SDValue PreExt;
4563 bool Signed = false;
4564 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4565 // ZExt
4566 MinBits = N0->getOperand(0).getValueSizeInBits();
4567 PreExt = N0->getOperand(0);
4568 } else if (N0->getOpcode() == ISD::AND) {
4569 // DAGCombine turns costly ZExts into ANDs
4570 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4571 if ((C->getAPIntValue()+1).isPowerOf2()) {
4572 MinBits = C->getAPIntValue().countr_one();
4573 PreExt = N0->getOperand(0);
4574 }
4575 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4576 // SExt
4577 MinBits = N0->getOperand(0).getValueSizeInBits();
4578 PreExt = N0->getOperand(0);
4579 Signed = true;
4580 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4581 // ZEXTLOAD / SEXTLOAD
4582 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4583 MinBits = LN0->getMemoryVT().getSizeInBits();
4584 PreExt = N0;
4585 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4586 Signed = true;
4587 MinBits = LN0->getMemoryVT().getSizeInBits();
4588 PreExt = N0;
4589 }
4590 }
4591
4592 // Figure out how many bits we need to preserve this constant.
4593 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4594
4595 // Make sure we're not losing bits from the constant.
4596 if (MinBits > 0 &&
4597 MinBits < C1.getBitWidth() &&
4598 MinBits >= ReqdBits) {
4599 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4600 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4601 // Will get folded away.
4602 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4603 if (MinBits == 1 && C1 == 1)
4604 // Invert the condition.
4605 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4607 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4608 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4609 }
4610
4611 // If truncating the setcc operands is not desirable, we can still
4612 // simplify the expression in some cases:
4613 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4614 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4615 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4616 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4617 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4618 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4619 SDValue TopSetCC = N0->getOperand(0);
4620 unsigned N0Opc = N0->getOpcode();
4621 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4622 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4623 TopSetCC.getOpcode() == ISD::SETCC &&
4624 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4625 (isConstFalseVal(N1) ||
4626 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4627
4628 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4629 (!N1C->isZero() && Cond == ISD::SETNE);
4630
4631 if (!Inverse)
4632 return TopSetCC;
4633
4635 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4636 TopSetCC.getOperand(0).getValueType());
4637 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4638 TopSetCC.getOperand(1),
4639 InvCond);
4640 }
4641 }
4642 }
4643
4644 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4645 // equality or unsigned, and all 1 bits of the const are in the same
4646 // partial word, see if we can shorten the load.
4647 if (DCI.isBeforeLegalize() &&
4649 N0.getOpcode() == ISD::AND && C1 == 0 &&
4650 N0.getNode()->hasOneUse() &&
4651 isa<LoadSDNode>(N0.getOperand(0)) &&
4652 N0.getOperand(0).getNode()->hasOneUse() &&
4653 isa<ConstantSDNode>(N0.getOperand(1))) {
4654 auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4655 APInt bestMask;
4656 unsigned bestWidth = 0, bestOffset = 0;
4657 if (Lod->isSimple() && Lod->isUnindexed() &&
4658 (Lod->getMemoryVT().isByteSized() ||
4659 isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4660 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4661 unsigned origWidth = N0.getValueSizeInBits();
4662 unsigned maskWidth = origWidth;
4663 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4664 // 8 bits, but have to be careful...
4665 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4666 origWidth = Lod->getMemoryVT().getSizeInBits();
4667 const APInt &Mask = N0.getConstantOperandAPInt(1);
4668 // Only consider power-of-2 widths (and at least one byte) as candiates
4669 // for the narrowed load.
4670 for (unsigned width = 8; width < origWidth; width *= 2) {
4671 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4672 if (!shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT))
4673 continue;
4674 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4675 // Avoid accessing any padding here for now (we could use memWidth
4676 // instead of origWidth here otherwise).
4677 unsigned maxOffset = origWidth - width;
4678 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4679 if (Mask.isSubsetOf(newMask)) {
4680 unsigned ptrOffset =
4681 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4682 unsigned IsFast = 0;
4683 Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4685 *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4686 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4687 IsFast) {
4688 bestOffset = ptrOffset / 8;
4689 bestMask = Mask.lshr(offset);
4690 bestWidth = width;
4691 break;
4692 }
4693 }
4694 newMask <<= 8;
4695 }
4696 if (bestWidth)
4697 break;
4698 }
4699 }
4700 if (bestWidth) {
4701 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4702 SDValue Ptr = Lod->getBasePtr();
4703 if (bestOffset != 0)
4704 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4705 SDValue NewLoad =
4706 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4707 Lod->getPointerInfo().getWithOffset(bestOffset),
4708 Lod->getOriginalAlign());
4709 SDValue And =
4710 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4711 DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4712 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4713 }
4714 }
4715
4716 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4717 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4718 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4719
4720 // If the comparison constant has bits in the upper part, the
4721 // zero-extended value could never match.
4723 C1.getBitWidth() - InSize))) {
4724 switch (Cond) {
4725 case ISD::SETUGT:
4726 case ISD::SETUGE:
4727 case ISD::SETEQ:
4728 return DAG.getConstant(0, dl, VT);
4729 case ISD::SETULT:
4730 case ISD::SETULE:
4731 case ISD::SETNE:
4732 return DAG.getConstant(1, dl, VT);
4733 case ISD::SETGT:
4734 case ISD::SETGE:
4735 // True if the sign bit of C1 is set.
4736 return DAG.getConstant(C1.isNegative(), dl, VT);
4737 case ISD::SETLT:
4738 case ISD::SETLE:
4739 // True if the sign bit of C1 isn't set.
4740 return DAG.getConstant(C1.isNonNegative(), dl, VT);
4741 default:
4742 break;
4743 }
4744 }
4745
4746 // Otherwise, we can perform the comparison with the low bits.
4747 switch (Cond) {
4748 case ISD::SETEQ:
4749 case ISD::SETNE:
4750 case ISD::SETUGT:
4751 case ISD::SETUGE:
4752 case ISD::SETULT:
4753 case ISD::SETULE: {
4754 EVT newVT = N0.getOperand(0).getValueType();
4755 if (DCI.isBeforeLegalizeOps() ||
4756 (isOperationLegal(ISD::SETCC, newVT) &&
4757 isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
4758 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4759 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4760
4761 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4762 NewConst, Cond);
4763 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4764 }
4765 break;
4766 }
4767 default:
4768 break; // todo, be more careful with signed comparisons
4769 }
4770 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4771 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4772 !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
4773 OpVT)) {
4774 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4775 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4776 EVT ExtDstTy = N0.getValueType();
4777 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4778
4779 // If the constant doesn't fit into the number of bits for the source of
4780 // the sign extension, it is impossible for both sides to be equal.
4781 if (C1.getSignificantBits() > ExtSrcTyBits)
4782 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4783
4784 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4785 ExtDstTy != ExtSrcTy && "Unexpected types!");
4786 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4787 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4788 DAG.getConstant(Imm, dl, ExtDstTy));
4789 if (!DCI.isCalledByLegalizer())
4790 DCI.AddToWorklist(ZextOp.getNode());
4791 // Otherwise, make this a use of a zext.
4792 return DAG.getSetCC(dl, VT, ZextOp,
4793 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4794 } else if ((N1C->isZero() || N1C->isOne()) &&
4795 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4796 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
4797 // excluded as they are handled below whilst checking for foldBooleans.
4798 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
4799 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4800 (N0.getValueType() == MVT::i1 ||
4804 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4805 if (TrueWhenTrue)
4806 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
4807 // Invert the condition.
4808 if (N0.getOpcode() == ISD::SETCC) {
4809 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
4811 if (DCI.isBeforeLegalizeOps() ||
4813 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
4814 }
4815 }
4816
4817 if ((N0.getOpcode() == ISD::XOR ||
4818 (N0.getOpcode() == ISD::AND &&
4819 N0.getOperand(0).getOpcode() == ISD::XOR &&
4820 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
4821 isOneConstant(N0.getOperand(1))) {
4822 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
4823 // can only do this if the top bits are known zero.
4824 unsigned BitWidth = N0.getValueSizeInBits();
4825 if (DAG.MaskedValueIsZero(N0,
4827 BitWidth-1))) {
4828 // Okay, get the un-inverted input value.
4829 SDValue Val;
4830 if (N0.getOpcode() == ISD::XOR) {
4831 Val = N0.getOperand(0);
4832 } else {
4833 assert(N0.getOpcode() == ISD::AND &&
4834 N0.getOperand(0).getOpcode() == ISD::XOR);
4835 // ((X^1)&1)^1 -> X & 1
4836 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
4837 N0.getOperand(0).getOperand(0),
4838 N0.getOperand(1));
4839 }
4840
4841 return DAG.getSetCC(dl, VT, Val, N1,
4843 }
4844 } else if (N1C->isOne()) {
4845 SDValue Op0 = N0;
4846 if (Op0.getOpcode() == ISD::TRUNCATE)
4847 Op0 = Op0.getOperand(0);
4848
4849 if ((Op0.getOpcode() == ISD::XOR) &&
4850 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
4851 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
4852 SDValue XorLHS = Op0.getOperand(0);
4853 SDValue XorRHS = Op0.getOperand(1);
4854 // Ensure that the input setccs return an i1 type or 0/1 value.
4855 if (Op0.getValueType() == MVT::i1 ||
4860 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4862 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
4863 }
4864 }
4865 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
4866 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4867 if (Op0.getValueType().bitsGT(VT))
4868 Op0 = DAG.getNode(ISD::AND, dl, VT,
4869 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
4870 DAG.getConstant(1, dl, VT));
4871 else if (Op0.getValueType().bitsLT(VT))
4872 Op0 = DAG.getNode(ISD::AND, dl, VT,
4873 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
4874 DAG.getConstant(1, dl, VT));
4875
4876 return DAG.getSetCC(dl, VT, Op0,
4877 DAG.getConstant(0, dl, Op0.getValueType()),
4879 }
4880 if (Op0.getOpcode() == ISD::AssertZext &&
4881 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
4882 return DAG.getSetCC(dl, VT, Op0,
4883 DAG.getConstant(0, dl, Op0.getValueType()),
4885 }
4886 }
4887
4888 // Given:
4889 // icmp eq/ne (urem %x, %y), 0
4890 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4891 // icmp eq/ne %x, 0
4892 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
4893 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4894 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
4895 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
4896 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
4897 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4898 }
4899
4900 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4901 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4902 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4903 N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
4904 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
4905 N1C && N1C->isAllOnes()) {
4906 return DAG.getSetCC(dl, VT, N0.getOperand(0),
4907 DAG.getConstant(0, dl, OpVT),
4909 }
4910
4911 if (SDValue V =
4912 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
4913 return V;
4914 }
4915
4916 // These simplifications apply to splat vectors as well.
4917 // TODO: Handle more splat vector cases.
4918 if (auto *N1C = isConstOrConstSplat(N1)) {
4919 const APInt &C1 = N1C->getAPIntValue();
4920
4921 APInt MinVal, MaxVal;
4922 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
4924 MinVal = APInt::getSignedMinValue(OperandBitSize);
4925 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
4926 } else {
4927 MinVal = APInt::getMinValue(OperandBitSize);
4928 MaxVal = APInt::getMaxValue(OperandBitSize);
4929 }
4930
4931 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
4932 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
4933 // X >= MIN --> true
4934 if (C1 == MinVal)
4935 return DAG.getBoolConstant(true, dl, VT, OpVT);
4936
4937 if (!VT.isVector()) { // TODO: Support this for vectors.
4938 // X >= C0 --> X > (C0 - 1)
4939 APInt C = C1 - 1;
4941 if ((DCI.isBeforeLegalizeOps() ||
4942 isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4943 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4944 isLegalICmpImmediate(C.getSExtValue())))) {
4945 return DAG.getSetCC(dl, VT, N0,
4946 DAG.getConstant(C, dl, N1.getValueType()),
4947 NewCC);
4948 }
4949 }
4950 }
4951
4952 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
4953 // X <= MAX --> true
4954 if (C1 == MaxVal)
4955 return DAG.getBoolConstant(true, dl, VT, OpVT);
4956
4957 // X <= C0 --> X < (C0 + 1)
4958 if (!VT.isVector()) { // TODO: Support this for vectors.
4959 APInt C = C1 + 1;
4961 if ((DCI.isBeforeLegalizeOps() ||
4962 isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4963 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4964 isLegalICmpImmediate(C.getSExtValue())))) {
4965 return DAG.getSetCC(dl, VT, N0,
4966 DAG.getConstant(C, dl, N1.getValueType()),
4967 NewCC);
4968 }
4969 }
4970 }
4971
4972 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
4973 if (C1 == MinVal)
4974 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
4975
4976 // TODO: Support this for vectors after legalize ops.
4977 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4978 // Canonicalize setlt X, Max --> setne X, Max
4979 if (C1 == MaxVal)
4980 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4981
4982 // If we have setult X, 1, turn it into seteq X, 0
4983 if (C1 == MinVal+1)
4984 return DAG.getSetCC(dl, VT, N0,
4985 DAG.getConstant(MinVal, dl, N0.getValueType()),
4986 ISD::SETEQ);
4987 }
4988 }
4989
4990 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
4991 if (C1 == MaxVal)
4992 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
4993
4994 // TODO: Support this for vectors after legalize ops.
4995 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4996 // Canonicalize setgt X, Min --> setne X, Min
4997 if (C1 == MinVal)
4998 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4999
5000 // If we have setugt X, Max-1, turn it into seteq X, Max
5001 if (C1 == MaxVal-1)
5002 return DAG.getSetCC(dl, VT, N0,
5003 DAG.getConstant(MaxVal, dl, N0.getValueType()),
5004 ISD::SETEQ);
5005 }
5006 }
5007
5008 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5009 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5010 if (C1.isZero())
5011 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5012 VT, N0, N1, Cond, DCI, dl))
5013 return CC;
5014
5015 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5016 // For example, when high 32-bits of i64 X are known clear:
5017 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5018 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5019 bool CmpZero = N1C->isZero();
5020 bool CmpNegOne = N1C->isAllOnes();
5021 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5022 // Match or(lo,shl(hi,bw/2)) pattern.
5023 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5024 unsigned EltBits = V.getScalarValueSizeInBits();
5025 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5026 return false;
5027 SDValue LHS = V.getOperand(0);
5028 SDValue RHS = V.getOperand(1);
5029 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5030 // Unshifted element must have zero upperbits.
5031 if (RHS.getOpcode() == ISD::SHL &&
5032 isa<ConstantSDNode>(RHS.getOperand(1)) &&
5033 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5034 DAG.MaskedValueIsZero(LHS, HiBits)) {
5035 Lo = LHS;
5036 Hi = RHS.getOperand(0);
5037 return true;
5038 }
5039 if (LHS.getOpcode() == ISD::SHL &&
5040 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5041 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5042 DAG.MaskedValueIsZero(RHS, HiBits)) {
5043 Lo = RHS;
5044 Hi = LHS.getOperand(0);
5045 return true;
5046 }
5047 return false;
5048 };
5049
5050 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5051 unsigned EltBits = N0.getScalarValueSizeInBits();
5052 unsigned HalfBits = EltBits / 2;
5053 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5054 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5055 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5056 SDValue NewN0 =
5057 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5058 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5059 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5060 };
5061
5062 SDValue Lo, Hi;
5063 if (IsConcat(N0, Lo, Hi))
5064 return MergeConcat(Lo, Hi);
5065
5066 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5067 SDValue Lo0, Lo1, Hi0, Hi1;
5068 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5069 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5070 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5071 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5072 }
5073 }
5074 }
5075 }
5076
5077 // If we have "setcc X, C0", check to see if we can shrink the immediate
5078 // by changing cc.
5079 // TODO: Support this for vectors after legalize ops.
5080 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5081 // SETUGT X, SINTMAX -> SETLT X, 0
5082 // SETUGE X, SINTMIN -> SETLT X, 0
5083 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5084 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5085 return DAG.getSetCC(dl, VT, N0,
5086 DAG.getConstant(0, dl, N1.getValueType()),
5087 ISD::SETLT);
5088
5089 // SETULT X, SINTMIN -> SETGT X, -1
5090 // SETULE X, SINTMAX -> SETGT X, -1
5091 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5092 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5093 return DAG.getSetCC(dl, VT, N0,
5094 DAG.getAllOnesConstant(dl, N1.getValueType()),
5095 ISD::SETGT);
5096 }
5097 }
5098
5099 // Back to non-vector simplifications.
5100 // TODO: Can we do these for vector splats?
5101 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5102 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5103 const APInt &C1 = N1C->getAPIntValue();
5104 EVT ShValTy = N0.getValueType();
5105
5106 // Fold bit comparisons when we can. This will result in an
5107 // incorrect value when boolean false is negative one, unless
5108 // the bitsize is 1 in which case the false value is the same
5109 // in practice regardless of the representation.
5110 if ((VT.getSizeInBits() == 1 ||
5112 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5113 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5114 N0.getOpcode() == ISD::AND) {
5115 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5116 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5117 // Perform the xform if the AND RHS is a single bit.
5118 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5119 if (AndRHS->getAPIntValue().isPowerOf2() &&
5120 !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
5121 return DAG.getNode(
5122 ISD::TRUNCATE, dl, VT,
5123 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5124 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5125 }
5126 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5127 // (X & 8) == 8 --> (X & 8) >> 3
5128 // Perform the xform if C1 is a single bit.
5129 unsigned ShCt = C1.logBase2();
5130 if (C1.isPowerOf2() &&
5131 !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
5132 return DAG.getNode(
5133 ISD::TRUNCATE, dl, VT,
5134 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5135 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5136 }
5137 }
5138 }
5139 }
5140
5141 if (C1.getSignificantBits() <= 64 &&
5143 // (X & -256) == 256 -> (X >> 8) == 1
5144 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5145 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5146 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5147 const APInt &AndRHSC = AndRHS->getAPIntValue();
5148 if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {
5149 unsigned ShiftBits = AndRHSC.countr_zero();
5150 if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5151 SDValue Shift = DAG.getNode(
5152 ISD::SRL, dl, ShValTy, N0.getOperand(0),
5153 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5154 SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
5155 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5156 }
5157 }
5158 }
5159 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5160 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5161 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5162 // X < 0x100000000 -> (X >> 32) < 1
5163 // X >= 0x100000000 -> (X >> 32) >= 1
5164 // X <= 0x0ffffffff -> (X >> 32) < 1
5165 // X > 0x0ffffffff -> (X >> 32) >= 1
5166 unsigned ShiftBits;
5167 APInt NewC = C1;
5168 ISD::CondCode NewCond = Cond;
5169 if (AdjOne) {
5170 ShiftBits = C1.countr_one();
5171 NewC = NewC + 1;
5172 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5173 } else {
5174 ShiftBits = C1.countr_zero();
5175 }
5176 NewC.lshrInPlace(ShiftBits);
5177 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5179 !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5180 SDValue Shift =
5181 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5182 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5183 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5184 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5185 }
5186 }
5187 }
5188 }
5189
5190 if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
5191 auto *CFP = cast<ConstantFPSDNode>(N1);
5192 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5193
5194 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5195 // constant if knowing that the operand is non-nan is enough. We prefer to
5196 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5197 // materialize 0.0.
5198 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5199 return DAG.getSetCC(dl, VT, N0, N0, Cond);
5200
5201 // setcc (fneg x), C -> setcc swap(pred) x, -C
5202 if (N0.getOpcode() == ISD::FNEG) {
5204 if (DCI.isBeforeLegalizeOps() ||
5205 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5206 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5207 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5208 }
5209 }
5210
5211 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5213 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5214 bool IsFabs = N0.getOpcode() == ISD::FABS;
5215 SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5216 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5217 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5218 : (IsFabs ? fcInf : fcPosInf);
5219 if (Cond == ISD::SETUEQ)
5220 Flag |= fcNan;
5221 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5222 DAG.getTargetConstant(Flag, dl, MVT::i32));
5223 }
5224 }
5225
5226 // If the condition is not legal, see if we can find an equivalent one
5227 // which is legal.
5229 // If the comparison was an awkward floating-point == or != and one of
5230 // the comparison operands is infinity or negative infinity, convert the
5231 // condition to a less-awkward <= or >=.
5232 if (CFP->getValueAPF().isInfinity()) {
5233 bool IsNegInf = CFP->getValueAPF().isNegative();
5235 switch (Cond) {
5236 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5237 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5238 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5239 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5240 default: break;
5241 }
5242 if (NewCond != ISD::SETCC_INVALID &&
5243 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5244 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5245 }
5246 }
5247 }
5248
5249 if (N0 == N1) {
5250 // The sext(setcc()) => setcc() optimization relies on the appropriate
5251 // constant being emitted.
5252 assert(!N0.getValueType().isInteger() &&
5253 "Integer types should be handled by FoldSetCC");
5254
5255 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5256 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5257 if (UOF == 2) // FP operators that are undefined on NaNs.
5258 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5259 if (UOF == unsigned(EqTrue))
5260 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5261 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5262 // if it is not already.
5263 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5264 if (NewCond != Cond &&
5265 (DCI.isBeforeLegalizeOps() ||
5266 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5267 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5268 }
5269
5270 // ~X > ~Y --> Y > X
5271 // ~X < ~Y --> Y < X
5272 // ~X < C --> X > ~C
5273 // ~X > C --> X < ~C
5274 if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5275 N0.getValueType().isInteger()) {
5276 if (isBitwiseNot(N0)) {
5277 if (isBitwiseNot(N1))
5278 return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5279
5282 SDValue Not = DAG.getNOT(dl, N1, OpVT);
5283 return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5284 }
5285 }
5286 }
5287
5288 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5289 N0.getValueType().isInteger()) {
5290 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5291 N0.getOpcode() == ISD::XOR) {
5292 // Simplify (X+Y) == (X+Z) --> Y == Z
5293 if (N0.getOpcode() == N1.getOpcode()) {
5294 if (N0.getOperand(0) == N1.getOperand(0))
5295 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5296 if (N0.getOperand(1) == N1.getOperand(1))
5297 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5298 if (isCommutativeBinOp(N0.getOpcode())) {
5299 // If X op Y == Y op X, try other combinations.
5300 if (N0.getOperand(0) == N1.getOperand(1))
5301 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5302 Cond);
5303 if (N0.getOperand(1) == N1.getOperand(0))
5304 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5305 Cond);
5306 }
5307 }
5308
5309 // If RHS is a legal immediate value for a compare instruction, we need
5310 // to be careful about increasing register pressure needlessly.
5311 bool LegalRHSImm = false;
5312
5313 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5314 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5315 // Turn (X+C1) == C2 --> X == C2-C1
5316 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5317 return DAG.getSetCC(
5318 dl, VT, N0.getOperand(0),
5319 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5320 dl, N0.getValueType()),
5321 Cond);
5322
5323 // Turn (X^C1) == C2 --> X == C1^C2
5324 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5325 return DAG.getSetCC(
5326 dl, VT, N0.getOperand(0),
5327 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5328 dl, N0.getValueType()),
5329 Cond);
5330 }
5331
5332 // Turn (C1-X) == C2 --> X == C1-C2
5333 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5334 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5335 return DAG.getSetCC(
5336 dl, VT, N0.getOperand(1),
5337 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5338 dl, N0.getValueType()),
5339 Cond);
5340
5341 // Could RHSC fold directly into a compare?
5342 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5343 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5344 }
5345
5346 // (X+Y) == X --> Y == 0 and similar folds.
5347 // Don't do this if X is an immediate that can fold into a cmp
5348 // instruction and X+Y has other uses. It could be an induction variable
5349 // chain, and the transform would increase register pressure.
5350 if (!LegalRHSImm || N0.hasOneUse())
5351 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5352 return V;
5353 }
5354
5355 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5356 N1.getOpcode() == ISD::XOR)
5357 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5358 return V;
5359
5360 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5361 return V;
5362 }
5363
5364 // Fold remainder of division by a constant.
5365 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5366 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5367 // When division is cheap or optimizing for minimum size,
5368 // fall through to DIVREM creation by skipping this fold.
5369 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5370 if (N0.getOpcode() == ISD::UREM) {
5371 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5372 return Folded;
5373 } else if (N0.getOpcode() == ISD::SREM) {
5374 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5375 return Folded;
5376 }
5377 }
5378 }
5379
5380 // Fold away ALL boolean setcc's.
5381 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5382 SDValue Temp;
5383 switch (Cond) {
5384 default: llvm_unreachable("Unknown integer setcc!");
5385 case ISD::SETEQ: // X == Y -> ~(X^Y)
5386 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5387 N0 = DAG.getNOT(dl, Temp, OpVT);
5388 if (!DCI.isCalledByLegalizer())
5389 DCI.AddToWorklist(Temp.getNode());
5390 break;
5391 case ISD::SETNE: // X != Y --> (X^Y)
5392 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5393 break;
5394 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5395 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5396 Temp = DAG.getNOT(dl, N0, OpVT);
5397 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5398 if (!DCI.isCalledByLegalizer())
5399 DCI.AddToWorklist(Temp.getNode());
5400 break;
5401 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5402 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5403 Temp = DAG.getNOT(dl, N1, OpVT);
5404 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5405 if (!DCI.isCalledByLegalizer())
5406 DCI.AddToWorklist(Temp.getNode());
5407 break;
5408 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5409 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5410 Temp = DAG.getNOT(dl, N0, OpVT);
5411 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5412 if (!DCI.isCalledByLegalizer())
5413 DCI.AddToWorklist(Temp.getNode());
5414 break;
5415 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5416 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5417 Temp = DAG.getNOT(dl, N1, OpVT);
5418 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5419 break;
5420 }
5421 if (VT.getScalarType() != MVT::i1) {
5422 if (!DCI.isCalledByLegalizer())
5423 DCI.AddToWorklist(N0.getNode());
5424 // FIXME: If running after legalize, we probably can't do this.
5426 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5427 }
5428 return N0;
5429 }
5430
5431 // Could not fold it.
5432 return SDValue();
5433}
5434
5435/// Returns true (and the GlobalValue and the offset) if the node is a
5436/// GlobalAddress + offset.
5438 int64_t &Offset) const {
5439
5440 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5441
5442 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5443 GA = GASD->getGlobal();
5444 Offset += GASD->getOffset();
5445 return true;
5446 }
5447
5448 if (N->getOpcode() == ISD::ADD) {
5449 SDValue N1 = N->getOperand(0);
5450 SDValue N2 = N->getOperand(1);
5451 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5452 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5453 Offset += V->getSExtValue();
5454 return true;
5455 }
5456 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5457 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5458 Offset += V->getSExtValue();
5459 return true;
5460 }
5461 }
5462 }
5463
5464 return false;
5465}
5466
5468 DAGCombinerInfo &DCI) const {
5469 // Default implementation: no optimization.
5470 return SDValue();
5471}
5472
5473//===----------------------------------------------------------------------===//
5474// Inline Assembler Implementation Methods
5475//===----------------------------------------------------------------------===//
5476
5479 unsigned S = Constraint.size();
5480
5481 if (S == 1) {
5482 switch (Constraint[0]) {
5483 default: break;
5484 case 'r':
5485 return C_RegisterClass;
5486 case 'm': // memory
5487 case 'o': // offsetable
5488 case 'V': // not offsetable
5489 return C_Memory;
5490 case 'p': // Address.
5491 return C_Address;
5492 case 'n': // Simple Integer
5493 case 'E': // Floating Point Constant
5494 case 'F': // Floating Point Constant
5495 return C_Immediate;
5496 case 'i': // Simple Integer or Relocatable Constant
5497 case 's': // Relocatable Constant
5498 case 'X': // Allow ANY value.
5499 case 'I': // Target registers.
5500 case 'J':
5501 case 'K':
5502 case 'L':
5503 case 'M':
5504 case 'N':
5505 case 'O':
5506 case 'P':
5507 case '<':
5508 case '>':
5509 return C_Other;
5510 }
5511 }
5512
5513 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5514 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5515 return C_Memory;
5516 return C_Register;
5517 }
5518 return C_Unknown;
5519}
5520
5521/// Try to replace an X constraint, which matches anything, with another that
5522/// has more specific requirements based on the type of the corresponding
5523/// operand.
5524const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5525 if (ConstraintVT.isInteger())
5526 return "r";
5527 if (ConstraintVT.isFloatingPoint())
5528 return "f"; // works for many targets
5529 return nullptr;
5530}
5531
5533 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5534 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5535 return SDValue();
5536}
5537
5538/// Lower the specified operand into the Ops vector.
5539/// If it is invalid, don't add anything to Ops.
5541 StringRef Constraint,
5542 std::vector<SDValue> &Ops,
5543 SelectionDAG &DAG) const {
5544
5545 if (Constraint.size() > 1)
5546 return;
5547
5548 char ConstraintLetter = Constraint[0];
5549 switch (ConstraintLetter) {
5550 default: break;
5551 case 'X': // Allows any operand
5552 case 'i': // Simple Integer or Relocatable Constant
5553 case 'n': // Simple Integer
5554 case 's': { // Relocatable Constant
5555
5557 uint64_t Offset = 0;
5558
5559 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5560 // etc., since getelementpointer is variadic. We can't use
5561 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5562 // while in this case the GA may be furthest from the root node which is
5563 // likely an ISD::ADD.
5564 while (true) {
5565 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5566 // gcc prints these as sign extended. Sign extend value to 64 bits
5567 // now; without this it would get ZExt'd later in
5568 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5569 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5570 BooleanContent BCont = getBooleanContents(MVT::i64);
5571 ISD::NodeType ExtOpc =
5572 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5573 int64_t ExtVal =
5574 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5575 Ops.push_back(
5576 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5577 return;
5578 }
5579 if (ConstraintLetter != 'n') {
5580 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5581 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5582 GA->getValueType(0),
5583 Offset + GA->getOffset()));
5584 return;
5585 }
5586 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5587 Ops.push_back(DAG.getTargetBlockAddress(
5588 BA->getBlockAddress(), BA->getValueType(0),
5589 Offset + BA->getOffset(), BA->getTargetFlags()));
5590 return;
5591 }
5592 if (isa<BasicBlockSDNode>(Op)) {
5593 Ops.push_back(Op);
5594 return;
5595 }
5596 }
5597 const unsigned OpCode = Op.getOpcode();
5598 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5599 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5600 Op = Op.getOperand(1);
5601 // Subtraction is not commutative.
5602 else if (OpCode == ISD::ADD &&
5603 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5604 Op = Op.getOperand(0);
5605 else
5606 return;
5607 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5608 continue;
5609 }
5610 return;
5611 }
5612 break;
5613 }
5614 }
5615}
5616
5618 const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5619}
5620
5621std::pair<unsigned, const TargetRegisterClass *>
5623 StringRef Constraint,
5624 MVT VT) const {
5625 if (!Constraint.starts_with("{"))
5626 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5627 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5628
5629 // Remove the braces from around the name.
5630 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5631
5632 std::pair<unsigned, const TargetRegisterClass *> R =
5633 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5634
5635 // Figure out which register class contains this reg.
5636 for (const TargetRegisterClass *RC : RI->regclasses()) {
5637 // If none of the value types for this register class are valid, we
5638 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5639 if (!isLegalRC(*RI, *RC))
5640 continue;
5641
5642 for (const MCPhysReg &PR : *RC) {
5643 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5644 std::pair<unsigned, const TargetRegisterClass *> S =
5645 std::make_pair(PR, RC);
5646
5647 // If this register class has the requested value type, return it,
5648 // otherwise keep searching and return the first class found
5649 // if no other is found which explicitly has the requested type.
5650 if (RI->isTypeLegalForClass(*RC, VT))
5651 return S;
5652 if (!R.second)
5653 R = S;
5654 }
5655 }
5656 }
5657
5658 return R;
5659}
5660
5661//===----------------------------------------------------------------------===//
5662// Constraint Selection.
5663
5664/// Return true of this is an input operand that is a matching constraint like
5665/// "4".
5667 assert(!ConstraintCode.empty() && "No known constraint!");
5668 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5669}
5670
5671/// If this is an input matching constraint, this method returns the output
5672/// operand it matches.
5674 assert(!ConstraintCode.empty() && "No known constraint!");
5675 return atoi(ConstraintCode.c_str());
5676}
5677
5678/// Split up the constraint string from the inline assembly value into the
5679/// specific constraints and their prefixes, and also tie in the associated
5680/// operand values.
5681/// If this returns an empty vector, and if the constraint string itself
5682/// isn't empty, there was an error parsing.
5685 const TargetRegisterInfo *TRI,
5686 const CallBase &Call) const {
5687 /// Information about all of the constraints.
5688 AsmOperandInfoVector ConstraintOperands;
5689 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5690 unsigned maCount = 0; // Largest number of multiple alternative constraints.
5691
5692 // Do a prepass over the constraints, canonicalizing them, and building up the
5693 // ConstraintOperands list.
5694 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5695 unsigned ResNo = 0; // ResNo - The result number of the next output.
5696 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5697
5698 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5699 ConstraintOperands.emplace_back(std::move(CI));
5700 AsmOperandInfo &OpInfo = ConstraintOperands.back();
5701
5702 // Update multiple alternative constraint count.
5703 if (OpInfo.multipleAlternatives.size() > maCount)
5704 maCount = OpInfo.multipleAlternatives.size();
5705
5706 OpInfo.ConstraintVT = MVT::Other;
5707
5708 // Compute the value type for each operand.
5709 switch (OpInfo.Type) {
5711 // Indirect outputs just consume an argument.
5712 if (OpInfo.isIndirect) {
5713 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5714 break;
5715 }
5716
5717 // The return value of the call is this value. As such, there is no
5718 // corresponding argument.
5719 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5720 if (auto *STy = dyn_cast<StructType>(Call.getType())) {
5721 OpInfo.ConstraintVT =
5722 getSimpleValueType(DL, STy->getElementType(ResNo));
5723 } else {
5724 assert(ResNo == 0 && "Asm only has one result!");
5725 OpInfo.ConstraintVT =
5726 getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
5727 }
5728 ++ResNo;
5729 break;
5730 case InlineAsm::isInput:
5731 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5732 break;
5733 case InlineAsm::isLabel:
5734 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5735 ++LabelNo;
5736 continue;
5738 // Nothing to do.
5739 break;
5740 }
5741
5742 if (OpInfo.CallOperandVal) {
5743 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5744 if (OpInfo.isIndirect) {
5745 OpTy = Call.getParamElementType(ArgNo);
5746 assert(OpTy && "Indirect operand must have elementtype attribute");
5747 }
5748
5749 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5750 if (StructType *STy = dyn_cast<StructType>(OpTy))
5751 if (STy->getNumElements() == 1)
5752 OpTy = STy->getElementType(0);
5753
5754 // If OpTy is not a single value, it may be a struct/union that we
5755 // can tile with integers.
5756 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5757 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
5758 switch (BitSize) {
5759 default: break;
5760 case 1:
5761 case 8:
5762 case 16:
5763 case 32:
5764 case 64:
5765 case 128:
5766 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
5767 break;
5768 }
5769 }
5770
5771 EVT VT = getAsmOperandValueType(DL, OpTy, true);
5772 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5773 ArgNo++;
5774 }
5775 }
5776
5777 // If we have multiple alternative constraints, select the best alternative.
5778 if (!ConstraintOperands.empty()) {
5779 if (maCount) {
5780 unsigned bestMAIndex = 0;
5781 int bestWeight = -1;
5782 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
5783 int weight = -1;
5784 unsigned maIndex;
5785 // Compute the sums of the weights for each alternative, keeping track
5786 // of the best (highest weight) one so far.
5787 for (maIndex = 0; maIndex < maCount; ++maIndex) {
5788 int weightSum = 0;
5789 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5790 cIndex != eIndex; ++cIndex) {
5791 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5792 if (OpInfo.Type == InlineAsm::isClobber)
5793 continue;
5794
5795 // If this is an output operand with a matching input operand,
5796 // look up the matching input. If their types mismatch, e.g. one
5797 // is an integer, the other is floating point, or their sizes are
5798 // different, flag it as an maCantMatch.
5799 if (OpInfo.hasMatchingInput()) {
5800 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5801 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5802 if ((OpInfo.ConstraintVT.isInteger() !=
5803 Input.ConstraintVT.isInteger()) ||
5804 (OpInfo.ConstraintVT.getSizeInBits() !=
5805 Input.ConstraintVT.getSizeInBits())) {
5806 weightSum = -1; // Can't match.
5807 break;
5808 }
5809 }
5810 }
5811 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
5812 if (weight == -1) {
5813 weightSum = -1;
5814 break;
5815 }
5816 weightSum += weight;
5817 }
5818 // Update best.
5819 if (weightSum > bestWeight) {
5820 bestWeight = weightSum;
5821 bestMAIndex = maIndex;
5822 }
5823 }
5824
5825 // Now select chosen alternative in each constraint.
5826 for (AsmOperandInfo &cInfo : ConstraintOperands)
5827 if (cInfo.Type != InlineAsm::isClobber)
5828 cInfo.selectAlternative(bestMAIndex);
5829 }
5830 }
5831
5832 // Check and hook up tied operands, choose constraint code to use.
5833 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5834 cIndex != eIndex; ++cIndex) {
5835 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5836
5837 // If this is an output operand with a matching input operand, look up the
5838 // matching input. If their types mismatch, e.g. one is an integer, the
5839 // other is floating point, or their sizes are different, flag it as an
5840 // error.
5841 if (OpInfo.hasMatchingInput()) {
5842 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5843
5844 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5845 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5846 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
5847 OpInfo.ConstraintVT);
5848 std::pair<unsigned, const TargetRegisterClass *> InputRC =
5849 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
5850 Input.ConstraintVT);
5851 if ((OpInfo.ConstraintVT.isInteger() !=
5852 Input.ConstraintVT.isInteger()) ||
5853 (MatchRC.second != InputRC.second)) {
5854 report_fatal_error("Unsupported asm: input constraint"
5855 " with a matching output constraint of"
5856 " incompatible type!");
5857 }
5858 }
5859 }
5860 }
5861
5862 return ConstraintOperands;
5863}
5864
5865/// Return a number indicating our preference for chosing a type of constraint
5866/// over another, for the purpose of sorting them. Immediates are almost always
5867/// preferrable (when they can be emitted). A higher return value means a
5868/// stronger preference for one constraint type relative to another.
5869/// FIXME: We should prefer registers over memory but doing so may lead to
5870/// unrecoverable register exhaustion later.
5871/// https://github.com/llvm/llvm-project/issues/20571
5873 switch (CT) {
5876 return 4;
5879 return 3;
5881 return 2;
5883 return 1;
5885 return 0;
5886 }
5887 llvm_unreachable("Invalid constraint type");
5888}
5889
5890/// Examine constraint type and operand type and determine a weight value.
5891/// This object must already have been set up with the operand type
5892/// and the current alternative constraint selected.
5895 AsmOperandInfo &info, int maIndex) const {
5897 if (maIndex >= (int)info.multipleAlternatives.size())
5898 rCodes = &info.Codes;
5899 else
5900 rCodes = &info.multipleAlternatives[maIndex].Codes;
5901 ConstraintWeight BestWeight = CW_Invalid;
5902
5903 // Loop over the options, keeping track of the most general one.
5904 for (const std::string &rCode : *rCodes) {
5905 ConstraintWeight weight =
5906 getSingleConstraintMatchWeight(info, rCode.c_str());
5907 if (weight > BestWeight)
5908 BestWeight = weight;
5909 }
5910
5911 return BestWeight;
5912}
5913
5914/// Examine constraint type and operand type and determine a weight value.
5915/// This object must already have been set up with the operand type
5916/// and the current alternative constraint selected.
5919 AsmOperandInfo &info, const char *constraint) const {
5920 ConstraintWeight weight = CW_Invalid;
5921 Value *CallOperandVal = info.CallOperandVal;
5922 // If we don't have a value, we can't do a match,
5923 // but allow it at the lowest weight.
5924 if (!CallOperandVal)
5925 return CW_Default;
5926 // Look at the constraint type.
5927 switch (*constraint) {
5928 case 'i': // immediate integer.
5929 case 'n': // immediate integer with a known value.
5930 if (isa<ConstantInt>(CallOperandVal))
5931 weight = CW_Constant;
5932 break;
5933 case 's': // non-explicit intregal immediate.
5934 if (isa<GlobalValue>(CallOperandVal))
5935 weight = CW_Constant;
5936 break;
5937 case 'E': // immediate float if host format.
5938 case 'F': // immediate float.
5939 if (isa<ConstantFP>(CallOperandVal))
5940 weight = CW_Constant;
5941 break;
5942 case '<': // memory operand with autodecrement.
5943 case '>': // memory operand with autoincrement.
5944 case 'm': // memory operand.
5945 case 'o': // offsettable memory operand
5946 case 'V': // non-offsettable memory operand
5947 weight = CW_Memory;
5948 break;
5949 case 'r': // general register.
5950 case 'g': // general register, memory operand or immediate integer.
5951 // note: Clang converts "g" to "imr".
5952 if (CallOperandVal->getType()->isIntegerTy())
5953 weight = CW_Register;
5954 break;
5955 case 'X': // any operand.
5956 default:
5957 weight = CW_Default;
5958 break;
5959 }
5960 return weight;
5961}
5962
5963/// If there are multiple different constraints that we could pick for this
5964/// operand (e.g. "imr") try to pick the 'best' one.
5965/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
5966/// into seven classes:
5967/// Register -> one specific register
5968/// RegisterClass -> a group of regs
5969/// Memory -> memory
5970/// Address -> a symbolic memory reference
5971/// Immediate -> immediate values
5972/// Other -> magic values (such as "Flag Output Operands")
5973/// Unknown -> something we don't recognize yet and can't handle
5974/// Ideally, we would pick the most specific constraint possible: if we have
5975/// something that fits into a register, we would pick it. The problem here
5976/// is that if we have something that could either be in a register or in
5977/// memory that use of the register could cause selection of *other*
5978/// operands to fail: they might only succeed if we pick memory. Because of
5979/// this the heuristic we use is:
5980///
5981/// 1) If there is an 'other' constraint, and if the operand is valid for
5982/// that constraint, use it. This makes us take advantage of 'i'
5983/// constraints when available.
5984/// 2) Otherwise, pick the most general constraint present. This prefers
5985/// 'm' over 'r', for example.
5986///
5988 TargetLowering::AsmOperandInfo &OpInfo) const {
5989 ConstraintGroup Ret;
5990
5991 Ret.reserve(OpInfo.Codes.size());
5992 for (StringRef Code : OpInfo.Codes) {
5993 TargetLowering::ConstraintType CType = getConstraintType(Code);
5994
5995 // Indirect 'other' or 'immediate' constraints are not allowed.
5996 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
5997 CType == TargetLowering::C_Register ||
5999 continue;
6000
6001 // Things with matching constraints can only be registers, per gcc
6002 // documentation. This mainly affects "g" constraints.
6003 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6004 continue;
6005
6006 Ret.emplace_back(Code, CType);
6007 }
6008
6009 std::stable_sort(
6010 Ret.begin(), Ret.end(), [](ConstraintPair a, ConstraintPair b) {
6011 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6012 });
6013
6014 return Ret;
6015}
6016
6017/// If we have an immediate, see if we can lower it. Return true if we can,
6018/// false otherwise.
6020 SDValue Op, SelectionDAG *DAG,
6021 const TargetLowering &TLI) {
6022
6023 assert((P.second == TargetLowering::C_Other ||
6024 P.second == TargetLowering::C_Immediate) &&
6025 "need immediate or other");
6026
6027 if (!Op.getNode())
6028 return false;
6029
6030 std::vector<SDValue> ResultOps;
6031 TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6032 return !ResultOps.empty();
6033}
6034
6035/// Determines the constraint code and constraint type to use for the specific
6036/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6038 SDValue Op,
6039 SelectionDAG *DAG) const {
6040 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6041
6042 // Single-letter constraints ('r') are very common.
6043 if (OpInfo.Codes.size() == 1) {
6044 OpInfo.ConstraintCode = OpInfo.Codes[0];
6045 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6046 } else {
6047 ConstraintGroup G = getConstraintPreferences(OpInfo);
6048 if (G.empty())
6049 return;
6050
6051 unsigned BestIdx = 0;
6052 for (const unsigned E = G.size();
6053 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6054 G[BestIdx].second == TargetLowering::C_Immediate);
6055 ++BestIdx) {
6056 if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6057 break;
6058 // If we're out of constraints, just pick the first one.
6059 if (BestIdx + 1 == E) {
6060 BestIdx = 0;
6061 break;
6062 }
6063 }
6064
6065 OpInfo.ConstraintCode = G[BestIdx].first;
6066 OpInfo.ConstraintType = G[BestIdx].second;
6067 }
6068
6069 // 'X' matches anything.
6070 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6071 // Constants are handled elsewhere. For Functions, the type here is the
6072 // type of the result, which is not what we want to look at; leave them
6073 // alone.
6074 Value *v = OpInfo.CallOperandVal;
6075 if (isa<ConstantInt>(v) || isa<Function>(v)) {
6076 return;
6077 }
6078
6079 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6080 OpInfo.ConstraintCode = "i";
6081 return;
6082 }
6083
6084 // Otherwise, try to resolve it to something we know about by looking at
6085 // the actual operand type.
6086 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6087 OpInfo.ConstraintCode = Repl;
6088 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6089 }
6090 }
6091}
6092
6093/// Given an exact SDIV by a constant, create a multiplication
6094/// with the multiplicative inverse of the constant.
6095/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6097 const SDLoc &dl, SelectionDAG &DAG,
6098 SmallVectorImpl<SDNode *> &Created) {
6099 SDValue Op0 = N->getOperand(0);
6100 SDValue Op1 = N->getOperand(1);
6101 EVT VT = N->getValueType(0);
6102 EVT SVT = VT.getScalarType();
6103 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6104 EVT ShSVT = ShVT.getScalarType();
6105
6106 bool UseSRA = false;
6107 SmallVector<SDValue, 16> Shifts, Factors;
6108
6109 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6110 if (C->isZero())
6111 return false;
6112 APInt Divisor = C->getAPIntValue();
6113 unsigned Shift = Divisor.countr_zero();
6114 if (Shift) {
6115 Divisor.ashrInPlace(Shift);
6116 UseSRA = true;
6117 }
6118 APInt Factor = Divisor.multiplicativeInverse();
6119 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6120 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6121 return true;
6122 };
6123
6124 // Collect all magic values from the build vector.
6125 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6126 return SDValue();
6127
6128 SDValue Shift, Factor;
6129 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6130 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6131 Factor = DAG.getBuildVector(VT, dl, Factors);
6132 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6133 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6134 "Expected matchUnaryPredicate to return one element for scalable "
6135 "vectors");
6136 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6137 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6138 } else {
6139 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6140 Shift = Shifts[0];
6141 Factor = Factors[0];
6142 }
6143
6144 SDValue Res = Op0;
6145 if (UseSRA) {
6146 SDNodeFlags Flags;
6147 Flags.setExact(true);
6148 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
6149 Created.push_back(Res.getNode());
6150 }
6151
6152 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6153}
6154
6155/// Given an exact UDIV by a constant, create a multiplication
6156/// with the multiplicative inverse of the constant.
6157/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6159 const SDLoc &dl, SelectionDAG &DAG,
6160 SmallVectorImpl<SDNode *> &Created) {
6161 EVT VT = N->getValueType(0);
6162 EVT SVT = VT.getScalarType();
6163 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6164 EVT ShSVT = ShVT.getScalarType();
6165
6166 bool UseSRL = false;
6167 SmallVector<SDValue, 16> Shifts, Factors;
6168
6169 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6170 if (C->isZero())
6171 return false;
6172 APInt Divisor = C->getAPIntValue();
6173 unsigned Shift = Divisor.countr_zero();
6174 if (Shift) {
6175 Divisor.lshrInPlace(Shift);
6176 UseSRL = true;
6177 }
6178 // Calculate the multiplicative inverse modulo BW.
6179 APInt Factor = Divisor.multiplicativeInverse();
6180 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6181 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6182 return true;
6183 };
6184
6185 SDValue Op1 = N->getOperand(1);
6186
6187 // Collect all magic values from the build vector.
6188 if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6189 return SDValue();
6190
6191 SDValue Shift, Factor;
6192 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6193 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6194 Factor = DAG.getBuildVector(VT, dl, Factors);
6195 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6196 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6197 "Expected matchUnaryPredicate to return one element for scalable "
6198 "vectors");
6199 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6200 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6201 } else {
6202 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6203 Shift = Shifts[0];
6204 Factor = Factors[0];
6205 }
6206
6207 SDValue Res = N->getOperand(0);
6208 if (UseSRL) {
6209 SDNodeFlags Flags;
6210 Flags.setExact(true);
6211 Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, Flags);
6212 Created.push_back(Res.getNode());
6213 }
6214
6215 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6216}
6217
6219 SelectionDAG &DAG,
6220 SmallVectorImpl<SDNode *> &Created) const {
6222 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6223 if (TLI.isIntDivCheap(N->getValueType(0), Attr))
6224 return SDValue(N, 0); // Lower SDIV as SDIV
6225 return SDValue();
6226}
6227
6228SDValue
6230 SelectionDAG &DAG,
6231 SmallVectorImpl<SDNode *> &Created) const {
6233 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6234 if (TLI.isIntDivCheap(N->getValueType(0), Attr))
6235 return SDValue(N, 0); // Lower SREM as SREM
6236 return SDValue();
6237}
6238
6239/// Build sdiv by power-of-2 with conditional move instructions
6240/// Ref: "Hacker's Delight" by Henry Warren 10-1
6241/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6242/// bgez x, label
6243/// add x, x, 2**k-1
6244/// label:
6245/// sra res, x, k
6246/// neg res, res (when the divisor is negative)
6248 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6249 SmallVectorImpl<SDNode *> &Created) const {
6250 unsigned Lg2 = Divisor.countr_zero();
6251 EVT VT = N->getValueType(0);
6252
6253 SDLoc DL(N);
6254 SDValue N0 = N->getOperand(0);
6255 SDValue Zero = DAG.getConstant(0, DL, VT);
6256 APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6257 SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6258
6259 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6260 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6261 SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6262 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6263 SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6264
6265 Created.push_back(Cmp.getNode());
6266 Created.push_back(Add.getNode());
6267 Created.push_back(CMov.getNode());
6268
6269 // Divide by pow2.
6270 SDValue SRA =
6271 DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT));
6272
6273 // If we're dividing by a positive value, we're done. Otherwise, we must
6274 // negate the result.
6275 if (Divisor.isNonNegative())
6276 return SRA;
6277
6278 Created.push_back(SRA.getNode());
6279 return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6280}
6281
6282/// Given an ISD::SDIV node expressing a divide by constant,
6283/// return a DAG expression to select that will generate the same value by
6284/// multiplying by a magic number.
6285/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6287 bool IsAfterLegalization,
6288 SmallVectorImpl<SDNode *> &Created) const {
6289 SDLoc dl(N);
6290 EVT VT = N->getValueType(0);
6291 EVT SVT = VT.getScalarType();
6292 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6293 EVT ShSVT = ShVT.getScalarType();
6294 unsigned EltBits = VT.getScalarSizeInBits();
6295 EVT MulVT;
6296
6297 // Check to see if we can do this.
6298 // FIXME: We should be more aggressive here.
6299 if (!isTypeLegal(VT)) {
6300 // Limit this to simple scalars for now.
6301 if (VT.isVector() || !VT.isSimple())
6302 return SDValue();
6303
6304 // If this type will be promoted to a large enough type with a legal
6305 // multiply operation, we can go ahead and do this transform.
6307 return SDValue();
6308
6309 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6310 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6311 !isOperationLegal(ISD::MUL, MulVT))
6312 return SDValue();
6313 }
6314
6315 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6316 if (N->getFlags().hasExact())
6317 return BuildExactSDIV(*this, N, dl, DAG, Created);
6318
6319 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6320
6321 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6322 if (C->isZero())
6323 return false;
6324
6325 const APInt &Divisor = C->getAPIntValue();
6327 int NumeratorFactor = 0;
6328 int ShiftMask = -1;
6329
6330 if (Divisor.isOne() || Divisor.isAllOnes()) {
6331 // If d is +1/-1, we just multiply the numerator by +1/-1.
6332 NumeratorFactor = Divisor.getSExtValue();
6333 magics.Magic = 0;
6334 magics.ShiftAmount = 0;
6335 ShiftMask = 0;
6336 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6337 // If d > 0 and m < 0, add the numerator.
6338 NumeratorFactor = 1;
6339 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6340 // If d < 0 and m > 0, subtract the numerator.
6341 NumeratorFactor = -1;
6342 }
6343
6344 MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6345 Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
6346 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6347 ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
6348 return true;
6349 };
6350
6351 SDValue N0 = N->getOperand(0);
6352 SDValue N1 = N->getOperand(1);
6353
6354 // Collect the shifts / magic values from each element.
6355 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
6356 return SDValue();
6357
6358 SDValue MagicFactor, Factor, Shift, ShiftMask;
6359 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6360 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6361 Factor = DAG.getBuildVector(VT, dl, Factors);
6362 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6363 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6364 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6365 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6366 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6367 "Expected matchUnaryPredicate to return one element for scalable "
6368 "vectors");
6369 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6370 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6371 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6372 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6373 } else {
6374 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6375 MagicFactor = MagicFactors[0];
6376 Factor = Factors[0];
6377 Shift = Shifts[0];
6378 ShiftMask = ShiftMasks[0];
6379 }
6380
6381 // Multiply the numerator (operand 0) by the magic value.
6382 // FIXME: We should support doing a MUL in a wider type.
6383 auto GetMULHS = [&](SDValue X, SDValue Y) {
6384 // If the type isn't legal, use a wider mul of the type calculated
6385 // earlier.
6386 if (!isTypeLegal(VT)) {
6387 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6388 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6389 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6390 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6391 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6392 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6393 }
6394
6395 if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6396 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6397 if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6398 SDValue LoHi =
6399 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6400 return SDValue(LoHi.getNode(), 1);
6401 }
6402 // If type twice as wide legal, widen and use a mul plus a shift.
6403 unsigned Size = VT.getScalarSizeInBits();
6404 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6405 if (VT.isVector())
6406 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6408 if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6409 X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
6410 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6411 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6412 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6413 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6414 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6415 }
6416 return SDValue();
6417 };
6418
6419 SDValue Q = GetMULHS(N0, MagicFactor);
6420 if (!Q)
6421 return SDValue();
6422
6423 Created.push_back(Q.getNode());
6424
6425 // (Optionally) Add/subtract the numerator using Factor.
6426 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6427 Created.push_back(Factor.getNode());
6428 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6429 Created.push_back(Q.getNode());
6430
6431 // Shift right algebraic by shift value.
6432 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6433 Created.push_back(Q.getNode());
6434
6435 // Extract the sign bit, mask it and add it to the quotient.
6436 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6437 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6438 Created.push_back(T.getNode());
6439 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6440 Created.push_back(T.getNode());
6441 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6442}
6443
6444/// Given an ISD::UDIV node expressing a divide by constant,
6445/// return a DAG expression to select that will generate the same value by
6446/// multiplying by a magic number.
6447/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6449 bool IsAfterLegalization,
6450 SmallVectorImpl<SDNode *> &Created) const {
6451 SDLoc dl(N);
6452 EVT VT = N->getValueType(0);
6453 EVT SVT = VT.getScalarType();
6454 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6455 EVT ShSVT = ShVT.getScalarType();
6456 unsigned EltBits = VT.getScalarSizeInBits();
6457 EVT MulVT;
6458
6459 // Check to see if we can do this.
6460 // FIXME: We should be more aggressive here.
6461 if (!isTypeLegal(VT)) {
6462 // Limit this to simple scalars for now.
6463 if (VT.isVector() || !VT.isSimple())
6464 return SDValue();
6465
6466 // If this type will be promoted to a large enough type with a legal
6467 // multiply operation, we can go ahead and do this transform.
6469 return SDValue();
6470
6471 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6472 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6473 !isOperationLegal(ISD::MUL, MulVT))
6474 return SDValue();
6475 }
6476
6477 // If the udiv has an 'exact' bit we can use a simpler lowering.
6478 if (N->getFlags().hasExact())
6479 return BuildExactUDIV(*this, N, dl, DAG, Created);
6480
6481 SDValue N0 = N->getOperand(0);
6482 SDValue N1 = N->getOperand(1);
6483
6484 // Try to use leading zeros of the dividend to reduce the multiplier and
6485 // avoid expensive fixups.
6486 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6487
6488 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6489 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6490
6491 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6492 if (C->isZero())
6493 return false;
6494 const APInt& Divisor = C->getAPIntValue();
6495
6496 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6497
6498 // Magic algorithm doesn't work for division by 1. We need to emit a select
6499 // at the end.
6500 if (Divisor.isOne()) {
6501 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6502 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6503 } else {
6506 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
6507
6508 MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6509
6510 assert(magics.PreShift < Divisor.getBitWidth() &&
6511 "We shouldn't generate an undefined shift!");
6512 assert(magics.PostShift < Divisor.getBitWidth() &&
6513 "We shouldn't generate an undefined shift!");
6514 assert((!magics.IsAdd || magics.PreShift == 0) &&
6515 "Unexpected pre-shift");
6516 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6517 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6518 NPQFactor = DAG.getConstant(
6519 magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
6520 : APInt::getZero(EltBits),
6521 dl, SVT);
6522 UseNPQ |= magics.IsAdd;
6523 UsePreShift |= magics.PreShift != 0;
6524 UsePostShift |= magics.PostShift != 0;
6525 }
6526
6527 PreShifts.push_back(PreShift);
6528 MagicFactors.push_back(MagicFactor);
6529 NPQFactors.push_back(NPQFactor);
6530 PostShifts.push_back(PostShift);
6531 return true;
6532 };
6533
6534 // Collect the shifts/magic values from each element.
6535 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6536 return SDValue();
6537
6538 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6539 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6540 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6541 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6542 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6543 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6544 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6545 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6546 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6547 "Expected matchUnaryPredicate to return one for scalable vectors");
6548 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6549 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6550 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6551 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6552 } else {
6553 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6554 PreShift = PreShifts[0];
6555 MagicFactor = MagicFactors[0];
6556 PostShift = PostShifts[0];
6557 }
6558
6559 SDValue Q = N0;
6560 if (UsePreShift) {
6561 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6562 Created.push_back(Q.getNode());
6563 }
6564
6565 // FIXME: We should support doing a MUL in a wider type.
6566 auto GetMULHU = [&](SDValue X, SDValue Y) {
6567 // If the type isn't legal, use a wider mul of the type calculated
6568 // earlier.
6569 if (!isTypeLegal(VT)) {
6570 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6571 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6572 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6573 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6574 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6575 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6576 }
6577
6578 if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6579 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6580 if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6581 SDValue LoHi =
6582 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6583 return SDValue(LoHi.getNode(), 1);
6584 }
6585 // If type twice as wide legal, widen and use a mul plus a shift.
6586 unsigned Size = VT.getScalarSizeInBits();
6587 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6588 if (VT.isVector())
6589 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6591 if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6592 X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6593 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6594 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6595 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6596 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6597 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6598 }
6599 return SDValue(); // No mulhu or equivalent
6600 };
6601
6602 // Multiply the numerator (operand 0) by the magic value.
6603 Q = GetMULHU(Q, MagicFactor);
6604 if (!Q)
6605 return SDValue();
6606
6607 Created.push_back(Q.getNode());
6608
6609 if (UseNPQ) {
6610 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6611 Created.push_back(NPQ.getNode());
6612
6613 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6614 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6615 if (VT.isVector())
6616 NPQ = GetMULHU(NPQ, NPQFactor);
6617 else
6618 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6619
6620 Created.push_back(NPQ.getNode());
6621
6622 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6623 Created.push_back(Q.getNode());
6624 }
6625
6626 if (UsePostShift) {
6627 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6628 Created.push_back(Q.getNode());
6629 }
6630
6631 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6632
6633 SDValue One = DAG.getConstant(1, dl, VT);
6634 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6635 return DAG.getSelect(dl, VT, IsOne, N0, Q);
6636}
6637
6638/// If all values in Values that *don't* match the predicate are same 'splat'
6639/// value, then replace all values with that splat value.
6640/// Else, if AlternativeReplacement was provided, then replace all values that
6641/// do match predicate with AlternativeReplacement value.
6642static void
6644 std::function<bool(SDValue)> Predicate,
6645 SDValue AlternativeReplacement = SDValue()) {
6646 SDValue Replacement;
6647 // Is there a value for which the Predicate does *NOT* match? What is it?
6648 auto SplatValue = llvm::find_if_not(Values, Predicate);
6649 if (SplatValue != Values.end()) {
6650 // Does Values consist only of SplatValue's and values matching Predicate?
6651 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6652 return Value == *SplatValue || Predicate(Value);
6653 })) // Then we shall replace values matching predicate with SplatValue.
6654 Replacement = *SplatValue;
6655 }
6656 if (!Replacement) {
6657 // Oops, we did not find the "baseline" splat value.
6658 if (!AlternativeReplacement)
6659 return; // Nothing to do.
6660 // Let's replace with provided value then.
6661 Replacement = AlternativeReplacement;
6662 }
6663 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6664}
6665
6666/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6667/// where the divisor is constant and the comparison target is zero,
6668/// return a DAG expression that will generate the same comparison result
6669/// using only multiplications, additions and shifts/rotations.
6670/// Ref: "Hacker's Delight" 10-17.
6671SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6672 SDValue CompTargetNode,
6674 DAGCombinerInfo &DCI,
6675 const SDLoc &DL) const {
6677 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6678 DCI, DL, Built)) {
6679 for (SDNode *N : Built)
6680 DCI.AddToWorklist(N);
6681 return Folded;
6682 }
6683
6684 return SDValue();
6685}
6686
6687SDValue
6688TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6689 SDValue CompTargetNode, ISD::CondCode Cond,
6690 DAGCombinerInfo &DCI, const SDLoc &DL,
6691 SmallVectorImpl<SDNode *> &Created) const {
6692 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6693 // - D must be constant, with D = D0 * 2^K where D0 is odd
6694 // - P is the multiplicative inverse of D0 modulo 2^W
6695 // - Q = floor(((2^W) - 1) / D)
6696 // where W is the width of the common type of N and D.
6697 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6698 "Only applicable for (in)equality comparisons.");
6699
6700 SelectionDAG &DAG = DCI.DAG;
6701
6702 EVT VT = REMNode.getValueType();
6703 EVT SVT = VT.getScalarType();
6704 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6705 EVT ShSVT = ShVT.getScalarType();
6706
6707 // If MUL is unavailable, we cannot proceed in any case.
6708 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6709 return SDValue();
6710
6711 bool ComparingWithAllZeros = true;
6712 bool AllComparisonsWithNonZerosAreTautological = true;
6713 bool HadTautologicalLanes = false;
6714 bool AllLanesAreTautological = true;
6715 bool HadEvenDivisor = false;
6716 bool AllDivisorsArePowerOfTwo = true;
6717 bool HadTautologicalInvertedLanes = false;
6718 SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
6719
6720 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6721 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6722 if (CDiv->isZero())
6723 return false;
6724
6725 const APInt &D = CDiv->getAPIntValue();
6726 const APInt &Cmp = CCmp->getAPIntValue();
6727
6728 ComparingWithAllZeros &= Cmp.isZero();
6729
6730 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6731 // if C2 is not less than C1, the comparison is always false.
6732 // But we will only be able to produce the comparison that will give the
6733 // opposive tautological answer. So this lane would need to be fixed up.
6734 bool TautologicalInvertedLane = D.ule(Cmp);
6735 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6736
6737 // If all lanes are tautological (either all divisors are ones, or divisor
6738 // is not greater than the constant we are comparing with),
6739 // we will prefer to avoid the fold.
6740 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6741 HadTautologicalLanes |= TautologicalLane;
6742 AllLanesAreTautological &= TautologicalLane;
6743
6744 // If we are comparing with non-zero, we need'll need to subtract said
6745 // comparison value from the LHS. But there is no point in doing that if
6746 // every lane where we are comparing with non-zero is tautological..
6747 if (!Cmp.isZero())
6748 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6749
6750 // Decompose D into D0 * 2^K
6751 unsigned K = D.countr_zero();
6752 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6753 APInt D0 = D.lshr(K);
6754
6755 // D is even if it has trailing zeros.
6756 HadEvenDivisor |= (K != 0);
6757 // D is a power-of-two if D0 is one.
6758 // If all divisors are power-of-two, we will prefer to avoid the fold.
6759 AllDivisorsArePowerOfTwo &= D0.isOne();
6760
6761 // P = inv(D0, 2^W)
6762 // 2^W requires W + 1 bits, so we have to extend and then truncate.
6763 unsigned W = D.getBitWidth();
6765 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6766
6767 // Q = floor((2^W - 1) u/ D)
6768 // R = ((2^W - 1) u% D)
6769 APInt Q, R;
6771
6772 // If we are comparing with zero, then that comparison constant is okay,
6773 // else it may need to be one less than that.
6774 if (Cmp.ugt(R))
6775 Q -= 1;
6776
6778 "We are expecting that K is always less than all-ones for ShSVT");
6779
6780 // If the lane is tautological the result can be constant-folded.
6781 if (TautologicalLane) {
6782 // Set P and K amount to a bogus values so we can try to splat them.
6783 P = 0;
6784 K = -1;
6785 // And ensure that comparison constant is tautological,
6786 // it will always compare true/false.
6787 Q = -1;
6788 }
6789
6790 PAmts.push_back(DAG.getConstant(P, DL, SVT));
6791 KAmts.push_back(
6792 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
6793 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6794 return true;
6795 };
6796
6797 SDValue N = REMNode.getOperand(0);
6798 SDValue D = REMNode.getOperand(1);
6799
6800 // Collect the values from each element.
6801 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
6802 return SDValue();
6803
6804 // If all lanes are tautological, the result can be constant-folded.
6805 if (AllLanesAreTautological)
6806 return SDValue();
6807
6808 // If this is a urem by a powers-of-two, avoid the fold since it can be
6809 // best implemented as a bit test.
6810 if (AllDivisorsArePowerOfTwo)
6811 return SDValue();
6812
6813 SDValue PVal, KVal, QVal;
6814 if (D.getOpcode() == ISD::BUILD_VECTOR) {
6815 if (HadTautologicalLanes) {
6816 // Try to turn PAmts into a splat, since we don't care about the values
6817 // that are currently '0'. If we can't, just keep '0'`s.
6819 // Try to turn KAmts into a splat, since we don't care about the values
6820 // that are currently '-1'. If we can't, change them to '0'`s.
6822 DAG.getConstant(0, DL, ShSVT));
6823 }
6824
6825 PVal = DAG.getBuildVector(VT, DL, PAmts);
6826 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
6827 QVal = DAG.getBuildVector(VT, DL, QAmts);
6828 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6829 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
6830 "Expected matchBinaryPredicate to return one element for "
6831 "SPLAT_VECTORs");
6832 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
6833 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
6834 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
6835 } else {
6836 PVal = PAmts[0];
6837 KVal = KAmts[0];
6838 QVal = QAmts[0];
6839 }
6840
6841 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6842 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
6843 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
6844 assert(CompTargetNode.getValueType() == N.getValueType() &&
6845 "Expecting that the types on LHS and RHS of comparisons match.");
6846 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
6847 }
6848
6849 // (mul N, P)
6850 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
6851 Created.push_back(Op0.getNode());
6852
6853 // Rotate right only if any divisor was even. We avoid rotates for all-odd
6854 // divisors as a performance improvement, since rotating by 0 is a no-op.
6855 if (HadEvenDivisor) {
6856 // We need ROTR to do this.
6857 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
6858 return SDValue();
6859 // UREM: (rotr (mul N, P), K)
6860 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
6861 Created.push_back(Op0.getNode());
6862 }
6863
6864 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
6865 SDValue NewCC =
6866 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
6868 if (!HadTautologicalInvertedLanes)
6869 return NewCC;
6870
6871 // If any lanes previously compared always-false, the NewCC will give
6872 // always-true result for them, so we need to fixup those lanes.
6873 // Or the other way around for inequality predicate.
6874 assert(VT.isVector() && "Can/should only get here for vectors.");
6875 Created.push_back(NewCC.getNode());
6876
6877 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6878 // if C2 is not less than C1, the comparison is always false.
6879 // But we have produced the comparison that will give the
6880 // opposive tautological answer. So these lanes would need to be fixed up.
6881 SDValue TautologicalInvertedChannels =
6882 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
6883 Created.push_back(TautologicalInvertedChannels.getNode());
6884
6885 // NOTE: we avoid letting illegal types through even if we're before legalize
6886 // ops – legalization has a hard time producing good code for this.
6887 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
6888 // If we have a vector select, let's replace the comparison results in the
6889 // affected lanes with the correct tautological result.
6890 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
6891 DL, SETCCVT, SETCCVT);
6892 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
6893 Replacement, NewCC);
6894 }
6895
6896 // Else, we can just invert the comparison result in the appropriate lanes.
6897 //
6898 // NOTE: see the note above VSELECT above.
6899 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
6900 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
6901 TautologicalInvertedChannels);
6902
6903 return SDValue(); // Don't know how to lower.
6904}
6905
6906/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
6907/// where the divisor is constant and the comparison target is zero,
6908/// return a DAG expression that will generate the same comparison result
6909/// using only multiplications, additions and shifts/rotations.
6910/// Ref: "Hacker's Delight" 10-17.
6911SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
6912 SDValue CompTargetNode,
6914 DAGCombinerInfo &DCI,
6915 const SDLoc &DL) const {
6917 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6918 DCI, DL, Built)) {
6919 assert(Built.size() <= 7 && "Max size prediction failed.");
6920 for (SDNode *N : Built)
6921 DCI.AddToWorklist(N);
6922 return Folded;
6923 }
6924
6925 return SDValue();
6926}
6927
6928SDValue
6929TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
6930 SDValue CompTargetNode, ISD::CondCode Cond,
6931 DAGCombinerInfo &DCI, const SDLoc &DL,
6932 SmallVectorImpl<SDNode *> &Created) const {
6933 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
6934 // Fold:
6935 // (seteq/ne (srem N, D), 0)
6936 // To:
6937 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
6938 //
6939 // - D must be constant, with D = D0 * 2^K where D0 is odd
6940 // - P is the multiplicative inverse of D0 modulo 2^W
6941 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
6942 // - Q = floor((2 * A) / (2^K))
6943 // where W is the width of the common type of N and D.
6944 //
6945 // When D is a power of two (and thus D0 is 1), the normal
6946 // formula for A and Q don't apply, because the derivation
6947 // depends on D not dividing 2^(W-1), and thus theorem ZRS
6948 // does not apply. This specifically fails when N = INT_MIN.
6949 //
6950 // Instead, for power-of-two D, we use:
6951 // - A = 2^(W-1)
6952 // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
6953 // - Q = 2^(W-K) - 1
6954 // |-> Test that the top K bits are zero after rotation
6955 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6956 "Only applicable for (in)equality comparisons.");
6957
6958 SelectionDAG &DAG = DCI.DAG;
6959
6960 EVT VT = REMNode.getValueType();
6961 EVT SVT = VT.getScalarType();
6962 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6963 EVT ShSVT = ShVT.getScalarType();
6964
6965 // If we are after ops legalization, and MUL is unavailable, we can not
6966 // proceed.
6967 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6968 return SDValue();
6969
6970 // TODO: Could support comparing with non-zero too.
6971 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
6972 if (!CompTarget || !CompTarget->isZero())
6973 return SDValue();
6974
6975 bool HadIntMinDivisor = false;
6976 bool HadOneDivisor = false;
6977 bool AllDivisorsAreOnes = true;
6978 bool HadEvenDivisor = false;
6979 bool NeedToApplyOffset = false;
6980 bool AllDivisorsArePowerOfTwo = true;
6981 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
6982
6983 auto BuildSREMPattern = [&](ConstantSDNode *C) {
6984 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6985 if (C->isZero())
6986 return false;
6987
6988 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
6989
6990 // WARNING: this fold is only valid for positive divisors!
6991 APInt D = C->getAPIntValue();
6992 if (D.isNegative())
6993 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
6994
6995 HadIntMinDivisor |= D.isMinSignedValue();
6996
6997 // If all divisors are ones, we will prefer to avoid the fold.
6998 HadOneDivisor |= D.isOne();
6999 AllDivisorsAreOnes &= D.isOne();
7000
7001 // Decompose D into D0 * 2^K
7002 unsigned K = D.countr_zero();
7003 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7004 APInt D0 = D.lshr(K);
7005
7006 if (!D.isMinSignedValue()) {
7007 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
7008 // we don't care about this lane in this fold, we'll special-handle it.
7009 HadEvenDivisor |= (K != 0);
7010 }
7011
7012 // D is a power-of-two if D0 is one. This includes INT_MIN.
7013 // If all divisors are power-of-two, we will prefer to avoid the fold.
7014 AllDivisorsArePowerOfTwo &= D0.isOne();
7015
7016 // P = inv(D0, 2^W)
7017 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7018 unsigned W = D.getBitWidth();
7020 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7021
7022 // A = floor((2^(W - 1) - 1) / D0) & -2^K
7024 A.clearLowBits(K);
7025
7026 if (!D.isMinSignedValue()) {
7027 // If divisor INT_MIN, then we don't care about this lane in this fold,
7028 // we'll special-handle it.
7029 NeedToApplyOffset |= A != 0;
7030 }
7031
7032 // Q = floor((2 * A) / (2^K))
7033 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7034
7036 "We are expecting that A is always less than all-ones for SVT");
7038 "We are expecting that K is always less than all-ones for ShSVT");
7039
7040 // If D was a power of two, apply the alternate constant derivation.
7041 if (D0.isOne()) {
7042 // A = 2^(W-1)
7044 // - Q = 2^(W-K) - 1
7045 Q = APInt::getAllOnes(W - K).zext(W);
7046 }
7047
7048 // If the divisor is 1 the result can be constant-folded. Likewise, we
7049 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
7050 if (D.isOne()) {
7051 // Set P, A and K to a bogus values so we can try to splat them.
7052 P = 0;
7053 A = -1;
7054 K = -1;
7055
7056 // x ?% 1 == 0 <--> true <--> x u<= -1
7057 Q = -1;
7058 }
7059
7060 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7061 AAmts.push_back(DAG.getConstant(A, DL, SVT));
7062 KAmts.push_back(
7063 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
7064 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7065 return true;
7066 };
7067
7068 SDValue N = REMNode.getOperand(0);
7069 SDValue D = REMNode.getOperand(1);
7070
7071 // Collect the values from each element.
7072 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7073 return SDValue();
7074
7075 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7076 if (AllDivisorsAreOnes)
7077 return SDValue();
7078
7079 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7080 // since it can be best implemented as a bit test.
7081 if (AllDivisorsArePowerOfTwo)
7082 return SDValue();
7083
7084 SDValue PVal, AVal, KVal, QVal;
7085 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7086 if (HadOneDivisor) {
7087 // Try to turn PAmts into a splat, since we don't care about the values
7088 // that are currently '0'. If we can't, just keep '0'`s.
7090 // Try to turn AAmts into a splat, since we don't care about the
7091 // values that are currently '-1'. If we can't, change them to '0'`s.
7093 DAG.getConstant(0, DL, SVT));
7094 // Try to turn KAmts into a splat, since we don't care about the values
7095 // that are currently '-1'. If we can't, change them to '0'`s.
7097 DAG.getConstant(0, DL, ShSVT));
7098 }
7099
7100 PVal = DAG.getBuildVector(VT, DL, PAmts);
7101 AVal = DAG.getBuildVector(VT, DL, AAmts);
7102 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7103 QVal = DAG.getBuildVector(VT, DL, QAmts);
7104 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7105 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7106 QAmts.size() == 1 &&
7107 "Expected matchUnaryPredicate to return one element for scalable "
7108 "vectors");
7109 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7110 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7111 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7112 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7113 } else {
7114 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7115 PVal = PAmts[0];
7116 AVal = AAmts[0];
7117 KVal = KAmts[0];
7118 QVal = QAmts[0];
7119 }
7120
7121 // (mul N, P)
7122 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7123 Created.push_back(Op0.getNode());
7124
7125 if (NeedToApplyOffset) {
7126 // We need ADD to do this.
7127 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7128 return SDValue();
7129
7130 // (add (mul N, P), A)
7131 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7132 Created.push_back(Op0.getNode());
7133 }
7134
7135 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7136 // divisors as a performance improvement, since rotating by 0 is a no-op.
7137 if (HadEvenDivisor) {
7138 // We need ROTR to do this.
7139 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7140 return SDValue();
7141 // SREM: (rotr (add (mul N, P), A), K)
7142 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7143 Created.push_back(Op0.getNode());
7144 }
7145
7146 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7147 SDValue Fold =
7148 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7150
7151 // If we didn't have lanes with INT_MIN divisor, then we're done.
7152 if (!HadIntMinDivisor)
7153 return Fold;
7154
7155 // That fold is only valid for positive divisors. Which effectively means,
7156 // it is invalid for INT_MIN divisors. So if we have such a lane,
7157 // we must fix-up results for said lanes.
7158 assert(VT.isVector() && "Can/should only get here for vectors.");
7159
7160 // NOTE: we avoid letting illegal types through even if we're before legalize
7161 // ops – legalization has a hard time producing good code for the code that
7162 // follows.
7163 if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7167 return SDValue();
7168
7169 Created.push_back(Fold.getNode());
7170
7171 SDValue IntMin = DAG.getConstant(
7173 SDValue IntMax = DAG.getConstant(
7175 SDValue Zero =
7177
7178 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7179 SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7180 Created.push_back(DivisorIsIntMin.getNode());
7181
7182 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7183 SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7184 Created.push_back(Masked.getNode());
7185 SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7186 Created.push_back(MaskedIsZero.getNode());
7187
7188 // To produce final result we need to blend 2 vectors: 'SetCC' and
7189 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7190 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7191 // constant-folded, select can get lowered to a shuffle with constant mask.
7192 SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7193 MaskedIsZero, Fold);
7194
7195 return Blended;
7196}
7197
7200 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
7201 DAG.getContext()->emitError("argument to '__builtin_return_address' must "
7202 "be a constant integer");
7203 return true;
7204 }
7205
7206 return false;
7207}
7208
7210 const DenormalMode &Mode) const {
7211 SDLoc DL(Op);
7212 EVT VT = Op.getValueType();
7213 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7214 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7215
7216 // This is specifically a check for the handling of denormal inputs, not the
7217 // result.
7218 if (Mode.Input == DenormalMode::PreserveSign ||
7219 Mode.Input == DenormalMode::PositiveZero) {
7220 // Test = X == 0.0
7221 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7222 }
7223
7224 // Testing it with denormal inputs to avoid wrong estimate.
7225 //
7226 // Test = fabs(X) < SmallestNormal
7227 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
7228 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7229 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7230 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
7231 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
7232}
7233
7235 bool LegalOps, bool OptForSize,
7237 unsigned Depth) const {
7238 // fneg is removable even if it has multiple uses.
7239 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7241 return Op.getOperand(0);
7242 }
7243
7244 // Don't recurse exponentially.
7246 return SDValue();
7247
7248 // Pre-increment recursion depth for use in recursive calls.
7249 ++Depth;
7250 const SDNodeFlags Flags = Op->getFlags();
7251 const TargetOptions &Options = DAG.getTarget().Options;
7252 EVT VT = Op.getValueType();
7253 unsigned Opcode = Op.getOpcode();
7254
7255 // Don't allow anything with multiple uses unless we know it is free.
7256 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7257 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7258 isFPExtFree(VT, Op.getOperand(0).getValueType());
7259 if (!IsFreeExtend)
7260 return SDValue();
7261 }
7262
7263 auto RemoveDeadNode = [&](SDValue N) {
7264 if (N && N.getNode()->use_empty())
7265 DAG.RemoveDeadNode(N.getNode());
7266 };
7267
7268 SDLoc DL(Op);
7269
7270 // Because getNegatedExpression can delete nodes we need a handle to keep
7271 // temporary nodes alive in case the recursion manages to create an identical
7272 // node.
7273 std::list<HandleSDNode> Handles;
7274
7275 switch (Opcode) {
7276 case ISD::ConstantFP: {
7277 // Don't invert constant FP values after legalization unless the target says
7278 // the negated constant is legal.
7279 bool IsOpLegal =
7281 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7282 OptForSize);
7283
7284 if (LegalOps && !IsOpLegal)
7285 break;
7286
7287 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7288 V.changeSign();
7289 SDValue CFP = DAG.getConstantFP(V, DL, VT);
7290
7291 // If we already have the use of the negated floating constant, it is free
7292 // to negate it even it has multiple uses.
7293 if (!Op.hasOneUse() && CFP.use_empty())
7294 break;
7296 return CFP;
7297 }
7298 case ISD::BUILD_VECTOR: {
7299 // Only permit BUILD_VECTOR of constants.
7300 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7301 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7302 }))
7303 break;
7304
7305 bool IsOpLegal =
7308 llvm::all_of(Op->op_values(), [&](SDValue N) {
7309 return N.isUndef() ||
7310 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7311 OptForSize);
7312 });
7313
7314 if (LegalOps && !IsOpLegal)
7315 break;
7316
7318 for (SDValue C : Op->op_values()) {
7319 if (C.isUndef()) {
7320 Ops.push_back(C);
7321 continue;
7322 }
7323 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7324 V.changeSign();
7325 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7326 }
7328 return DAG.getBuildVector(VT, DL, Ops);
7329 }
7330 case ISD::FADD: {
7331 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7332 break;
7333
7334 // After operation legalization, it might not be legal to create new FSUBs.
7335 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7336 break;
7337 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7338
7339 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7341 SDValue NegX =
7342 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7343 // Prevent this node from being deleted by the next call.
7344 if (NegX)
7345 Handles.emplace_back(NegX);
7346
7347 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7349 SDValue NegY =
7350 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7351
7352 // We're done with the handles.
7353 Handles.clear();
7354
7355 // Negate the X if its cost is less or equal than Y.
7356 if (NegX && (CostX <= CostY)) {
7357 Cost = CostX;
7358 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7359 if (NegY != N)
7360 RemoveDeadNode(NegY);
7361 return N;
7362 }
7363
7364 // Negate the Y if it is not expensive.
7365 if (NegY) {
7366 Cost = CostY;
7367 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7368 if (NegX != N)
7369 RemoveDeadNode(NegX);
7370 return N;
7371 }
7372 break;
7373 }
7374 case ISD::FSUB: {
7375 // We can't turn -(A-B) into B-A when we honor signed zeros.
7376 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7377 break;
7378
7379 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7380 // fold (fneg (fsub 0, Y)) -> Y
7381 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7382 if (C->isZero()) {
7384 return Y;
7385 }
7386
7387 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7389 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7390 }
7391 case ISD::FMUL:
7392 case ISD::FDIV: {
7393 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7394
7395 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7397 SDValue NegX =
7398 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7399 // Prevent this node from being deleted by the next call.
7400 if (NegX)
7401 Handles.emplace_back(NegX);
7402
7403 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7405 SDValue NegY =
7406 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7407
7408 // We're done with the handles.
7409 Handles.clear();
7410
7411 // Negate the X if its cost is less or equal than Y.
7412 if (NegX && (CostX <= CostY)) {
7413 Cost = CostX;
7414 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7415 if (NegY != N)
7416 RemoveDeadNode(NegY);
7417 return N;
7418 }
7419
7420 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7421 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7422 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7423 break;
7424
7425 // Negate the Y if it is not expensive.
7426 if (NegY) {
7427 Cost = CostY;
7428 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7429 if (NegX != N)
7430 RemoveDeadNode(NegX);
7431 return N;
7432 }
7433 break;
7434 }
7435 case ISD::FMA:
7436 case ISD::FMAD: {
7437 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7438 break;
7439
7440 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7442 SDValue NegZ =
7443 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7444 // Give up if fail to negate the Z.
7445 if (!NegZ)
7446 break;
7447
7448 // Prevent this node from being deleted by the next two calls.
7449 Handles.emplace_back(NegZ);
7450
7451 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7453 SDValue NegX =
7454 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7455 // Prevent this node from being deleted by the next call.
7456 if (NegX)
7457 Handles.emplace_back(NegX);
7458
7459 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7461 SDValue NegY =
7462 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7463
7464 // We're done with the handles.
7465 Handles.clear();
7466
7467 // Negate the X if its cost is less or equal than Y.
7468 if (NegX && (CostX <= CostY)) {
7469 Cost = std::min(CostX, CostZ);
7470 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7471 if (NegY != N)
7472 RemoveDeadNode(NegY);
7473 return N;
7474 }
7475
7476 // Negate the Y if it is not expensive.
7477 if (NegY) {
7478 Cost = std::min(CostY, CostZ);
7479 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7480 if (NegX != N)
7481 RemoveDeadNode(NegX);
7482 return N;
7483 }
7484 break;
7485 }
7486
7487 case ISD::FP_EXTEND:
7488 case ISD::FSIN:
7489 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7490 OptForSize, Cost, Depth))
7491 return DAG.getNode(Opcode, DL, VT, NegV);
7492 break;
7493 case ISD::FP_ROUND:
7494 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7495 OptForSize, Cost, Depth))
7496 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7497 break;
7498 case ISD::SELECT:
7499 case ISD::VSELECT: {
7500 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7501 // iff at least one cost is cheaper and the other is neutral/cheaper
7502 SDValue LHS = Op.getOperand(1);
7504 SDValue NegLHS =
7505 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7506 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7507 RemoveDeadNode(NegLHS);
7508 break;
7509 }
7510
7511 // Prevent this node from being deleted by the next call.
7512 Handles.emplace_back(NegLHS);
7513
7514 SDValue RHS = Op.getOperand(2);
7516 SDValue NegRHS =
7517 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7518
7519 // We're done with the handles.
7520 Handles.clear();
7521
7522 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7523 (CostLHS != NegatibleCost::Cheaper &&
7524 CostRHS != NegatibleCost::Cheaper)) {
7525 RemoveDeadNode(NegLHS);
7526 RemoveDeadNode(NegRHS);
7527 break;
7528 }
7529
7530 Cost = std::min(CostLHS, CostRHS);
7531 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7532 }
7533 }
7534
7535 return SDValue();
7536}
7537
7538//===----------------------------------------------------------------------===//
7539// Legalization Utilities
7540//===----------------------------------------------------------------------===//
7541
7542bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7543 SDValue LHS, SDValue RHS,
7545 EVT HiLoVT, SelectionDAG &DAG,
7546 MulExpansionKind Kind, SDValue LL,
7547 SDValue LH, SDValue RL, SDValue RH) const {
7548 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7549 Opcode == ISD::SMUL_LOHI);
7550
7551 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7553 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7555 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7557 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7559
7560 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7561 return false;
7562
7563 unsigned OuterBitSize = VT.getScalarSizeInBits();
7564 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7565
7566 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7567 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7568 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7569
7570 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7571 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7572 bool Signed) -> bool {
7573 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7574 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7575 Hi = SDValue(Lo.getNode(), 1);
7576 return true;
7577 }
7578 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7579 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7580 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7581 return true;
7582 }
7583 return false;
7584 };
7585
7586 SDValue Lo, Hi;
7587
7588 if (!LL.getNode() && !RL.getNode() &&
7590 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7591 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7592 }
7593
7594 if (!LL.getNode())
7595 return false;
7596
7597 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7598 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7599 DAG.MaskedValueIsZero(RHS, HighMask)) {
7600 // The inputs are both zero-extended.
7601 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7602 Result.push_back(Lo);
7603 Result.push_back(Hi);
7604 if (Opcode != ISD::MUL) {
7605 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7606 Result.push_back(Zero);
7607 Result.push_back(Zero);
7608 }
7609 return true;
7610 }
7611 }
7612
7613 if (!VT.isVector() && Opcode == ISD::MUL &&
7614 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7615 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7616 // The input values are both sign-extended.
7617 // TODO non-MUL case?
7618 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7619 Result.push_back(Lo);
7620 Result.push_back(Hi);
7621 return true;
7622 }
7623 }
7624
7625 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7626 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7627
7628 if (!LH.getNode() && !RH.getNode() &&
7631 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7632 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7633 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7634 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7635 }
7636
7637 if (!LH.getNode())
7638 return false;
7639
7640 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7641 return false;
7642
7643 Result.push_back(Lo);
7644
7645 if (Opcode == ISD::MUL) {
7646 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7647 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7648 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7649 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7650 Result.push_back(Hi);
7651 return true;
7652 }
7653
7654 // Compute the full width result.
7655 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7656 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7657 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7658 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7659 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7660 };
7661
7662 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7663 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7664 return false;
7665
7666 // This is effectively the add part of a multiply-add of half-sized operands,
7667 // so it cannot overflow.
7668 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7669
7670 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7671 return false;
7672
7673 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7674 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7675
7676 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7678 if (UseGlue)
7679 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7680 Merge(Lo, Hi));
7681 else
7682 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7683 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7684
7685 SDValue Carry = Next.getValue(1);
7686 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7687 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7688
7689 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7690 return false;
7691
7692 if (UseGlue)
7693 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7694 Carry);
7695 else
7696 Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
7697 Zero, Carry);
7698
7699 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7700
7701 if (Opcode == ISD::SMUL_LOHI) {
7702 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7703 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7704 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
7705
7706 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7707 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
7708 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
7709 }
7710
7711 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7712 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7713 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7714 return true;
7715}
7716
7718 SelectionDAG &DAG, MulExpansionKind Kind,
7719 SDValue LL, SDValue LH, SDValue RL,
7720 SDValue RH) const {
7722 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
7723 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
7724 DAG, Kind, LL, LH, RL, RH);
7725 if (Ok) {
7726 assert(Result.size() == 2);
7727 Lo = Result[0];
7728 Hi = Result[1];
7729 }
7730 return Ok;
7731}
7732
7733// Optimize unsigned division or remainder by constants for types twice as large
7734// as a legal VT.
7735//
7736// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7737// can be computed
7738// as:
7739// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7740// Remainder = Sum % Constant
7741// This is based on "Remainder by Summing Digits" from Hacker's Delight.
7742//
7743// For division, we can compute the remainder using the algorithm described
7744// above, subtract it from the dividend to get an exact multiple of Constant.
7745// Then multiply that exact multiply by the multiplicative inverse modulo
7746// (1 << (BitWidth / 2)) to get the quotient.
7747
7748// If Constant is even, we can shift right the dividend and the divisor by the
7749// number of trailing zeros in Constant before applying the remainder algorithm.
7750// If we're after the quotient, we can subtract this value from the shifted
7751// dividend and multiply by the multiplicative inverse of the shifted divisor.
7752// If we want the remainder, we shift the value left by the number of trailing
7753// zeros and add the bits that were shifted out of the dividend.
7756 EVT HiLoVT, SelectionDAG &DAG,
7757 SDValue LL, SDValue LH) const {
7758 unsigned Opcode = N->getOpcode();
7759 EVT VT = N->getValueType(0);
7760
7761 // TODO: Support signed division/remainder.
7762 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
7763 return false;
7764 assert(
7765 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
7766 "Unexpected opcode");
7767
7768 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
7769 if (!CN)
7770 return false;
7771
7772 APInt Divisor = CN->getAPIntValue();
7773 unsigned BitWidth = Divisor.getBitWidth();
7774 unsigned HBitWidth = BitWidth / 2;
7776 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
7777
7778 // Divisor needs to less than (1 << HBitWidth).
7779 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
7780 if (Divisor.uge(HalfMaxPlus1))
7781 return false;
7782
7783 // We depend on the UREM by constant optimization in DAGCombiner that requires
7784 // high multiply.
7785 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
7787 return false;
7788
7789 // Don't expand if optimizing for size.
7790 if (DAG.shouldOptForSize())
7791 return false;
7792
7793 // Early out for 0 or 1 divisors.
7794 if (Divisor.ule(1))
7795 return false;
7796
7797 // If the divisor is even, shift it until it becomes odd.
7798 unsigned TrailingZeros = 0;
7799 if (!Divisor[0]) {
7800 TrailingZeros = Divisor.countr_zero();
7801 Divisor.lshrInPlace(TrailingZeros);
7802 }
7803
7804 SDLoc dl(N);
7805 SDValue Sum;
7806 SDValue PartialRem;
7807
7808 // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
7809 // then add in the carry.
7810 // TODO: If we can't split it in half, we might be able to split into 3 or
7811 // more pieces using a smaller bit width.
7812 if (HalfMaxPlus1.urem(Divisor).isOne()) {
7813 assert(!LL == !LH && "Expected both input halves or no input halves!");
7814 if (!LL)
7815 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
7816
7817 // Shift the input by the number of TrailingZeros in the divisor. The
7818 // shifted out bits will be added to the remainder later.
7819 if (TrailingZeros) {
7820 // Save the shifted off bits if we need the remainder.
7821 if (Opcode != ISD::UDIV) {
7822 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7823 PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
7824 DAG.getConstant(Mask, dl, HiLoVT));
7825 }
7826
7827 LL = DAG.getNode(
7828 ISD::OR, dl, HiLoVT,
7829 DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
7830 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
7831 DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
7832 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
7833 HiLoVT, dl)));
7834 LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
7835 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7836 }
7837
7838 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
7839 EVT SetCCType =
7840 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
7842 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
7843 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
7844 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
7845 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
7846 } else {
7847 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
7848 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
7849 // If the boolean for the target is 0 or 1, we can add the setcc result
7850 // directly.
7851 if (getBooleanContents(HiLoVT) ==
7853 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
7854 else
7855 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
7856 DAG.getConstant(0, dl, HiLoVT));
7857 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
7858 }
7859 }
7860
7861 // If we didn't find a sum, we can't do the expansion.
7862 if (!Sum)
7863 return false;
7864
7865 // Perform a HiLoVT urem on the Sum using truncated divisor.
7866 SDValue RemL =
7867 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
7868 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
7869 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
7870
7871 if (Opcode != ISD::UREM) {
7872 // Subtract the remainder from the shifted dividend.
7873 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
7874 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
7875
7876 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
7877
7878 // Multiply by the multiplicative inverse of the divisor modulo
7879 // (1 << BitWidth).
7880 APInt MulFactor = Divisor.multiplicativeInverse();
7881
7882 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
7883 DAG.getConstant(MulFactor, dl, VT));
7884
7885 // Split the quotient into low and high parts.
7886 SDValue QuotL, QuotH;
7887 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
7888 Result.push_back(QuotL);
7889 Result.push_back(QuotH);
7890 }
7891
7892 if (Opcode != ISD::UDIV) {
7893 // If we shifted the input, shift the remainder left and add the bits we
7894 // shifted off the input.
7895 if (TrailingZeros) {
7896 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7897 RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
7898 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7899 RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
7900 }
7901 Result.push_back(RemL);
7902 Result.push_back(DAG.getConstant(0, dl, HiLoVT));
7903 }
7904
7905 return true;
7906}
7907
7908// Check that (every element of) Z is undef or not an exact multiple of BW.
7909static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
7911 Z,
7912 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
7913 true);
7914}
7915
7917 EVT VT = Node->getValueType(0);
7918 SDValue ShX, ShY;
7919 SDValue ShAmt, InvShAmt;
7920 SDValue X = Node->getOperand(0);
7921 SDValue Y = Node->getOperand(1);
7922 SDValue Z = Node->getOperand(2);
7923 SDValue Mask = Node->getOperand(3);
7924 SDValue VL = Node->getOperand(4);
7925
7926 unsigned BW = VT.getScalarSizeInBits();
7927 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
7928 SDLoc DL(SDValue(Node, 0));
7929
7930 EVT ShVT = Z.getValueType();
7931 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7932 // fshl: X << C | Y >> (BW - C)
7933 // fshr: X << (BW - C) | Y >> C
7934 // where C = Z % BW is not zero
7935 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7936 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7937 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
7938 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
7939 VL);
7940 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
7941 VL);
7942 } else {
7943 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7944 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7945 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
7946 if (isPowerOf2_32(BW)) {
7947 // Z % BW -> Z & (BW - 1)
7948 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
7949 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7950 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
7951 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
7952 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
7953 } else {
7954 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7955 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7956 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
7957 }
7958
7959 SDValue One = DAG.getConstant(1, DL, ShVT);
7960 if (IsFSHL) {
7961 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
7962 SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
7963 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
7964 } else {
7965 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
7966 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
7967 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
7968 }
7969 }
7970 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
7971}
7972
7974 SelectionDAG &DAG) const {
7975 if (Node->isVPOpcode())
7976 return expandVPFunnelShift(Node, DAG);
7977
7978 EVT VT = Node->getValueType(0);
7979
7980 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
7984 return SDValue();
7985
7986 SDValue X = Node->getOperand(0);
7987 SDValue Y = Node->getOperand(1);
7988 SDValue Z = Node->getOperand(2);
7989
7990 unsigned BW = VT.getScalarSizeInBits();
7991 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
7992 SDLoc DL(SDValue(Node, 0));
7993
7994 EVT ShVT = Z.getValueType();
7995
7996 // If a funnel shift in the other direction is more supported, use it.
7997 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
7998 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
7999 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
8000 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8001 // fshl X, Y, Z -> fshr X, Y, -Z
8002 // fshr X, Y, Z -> fshl X, Y, -Z
8003 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8004 Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
8005 } else {
8006 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8007 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8008 SDValue One = DAG.getConstant(1, DL, ShVT);
8009 if (IsFSHL) {
8010 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8011 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8012 } else {
8013 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8014 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8015 }
8016 Z = DAG.getNOT(DL, Z, ShVT);
8017 }
8018 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8019 }
8020
8021 SDValue ShX, ShY;
8022 SDValue ShAmt, InvShAmt;
8023 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8024 // fshl: X << C | Y >> (BW - C)
8025 // fshr: X << (BW - C) | Y >> C
8026 // where C = Z % BW is not zero
8027 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8028 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8029 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8030 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8031 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8032 } else {
8033 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8034 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8035 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8036 if (isPowerOf2_32(BW)) {
8037 // Z % BW -> Z & (BW - 1)
8038 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8039 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8040 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8041 } else {
8042 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8043 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8044 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8045 }
8046
8047 SDValue One = DAG.getConstant(1, DL, ShVT);
8048 if (IsFSHL) {
8049 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8050 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8051 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8052 } else {
8053 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8054 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8055 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8056 }
8057 }
8058 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8059}
8060
8061// TODO: Merge with expandFunnelShift.
8062SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
8063 SelectionDAG &DAG) const {
8064 EVT VT = Node->getValueType(0);
8065 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8066 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8067 SDValue Op0 = Node->getOperand(0);
8068 SDValue Op1 = Node->getOperand(1);
8069 SDLoc DL(SDValue(Node, 0));
8070
8071 EVT ShVT = Op1.getValueType();
8072 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8073
8074 // If a rotate in the other direction is more supported, use it.
8075 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8076 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8077 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8078 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8079 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8080 }
8081
8082 if (!AllowVectorOps && VT.isVector() &&
8088 return SDValue();
8089
8090 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8091 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8092 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8093 SDValue ShVal;
8094 SDValue HsVal;
8095 if (isPowerOf2_32(EltSizeInBits)) {
8096 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8097 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8098 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8099 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8100 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8101 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8102 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8103 } else {
8104 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8105 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8106 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8107 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8108 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8109 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8110 SDValue One = DAG.getConstant(1, DL, ShVT);
8111 HsVal =
8112 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8113 }
8114 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8115}
8116
8118 SelectionDAG &DAG) const {
8119 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8120 EVT VT = Node->getValueType(0);
8121 unsigned VTBits = VT.getScalarSizeInBits();
8122 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8123
8124 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8125 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8126 SDValue ShOpLo = Node->getOperand(0);
8127 SDValue ShOpHi = Node->getOperand(1);
8128 SDValue ShAmt = Node->getOperand(2);
8129 EVT ShAmtVT = ShAmt.getValueType();
8130 EVT ShAmtCCVT =
8131 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8132 SDLoc dl(Node);
8133
8134 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8135 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8136 // away during isel.
8137 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8138 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8139 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8140 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8141 : DAG.getConstant(0, dl, VT);
8142
8143 SDValue Tmp2, Tmp3;
8144 if (IsSHL) {
8145 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8146 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8147 } else {
8148 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8149 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8150 }
8151
8152 // If the shift amount is larger or equal than the width of a part we don't
8153 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8154 // values for large shift amounts.
8155 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8156 DAG.getConstant(VTBits, dl, ShAmtVT));
8157 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8158 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8159
8160 if (IsSHL) {
8161 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8162 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8163 } else {
8164 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8165 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8166 }
8167}
8168
8170 SelectionDAG &DAG) const {
8171 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8172 SDValue Src = Node->getOperand(OpNo);
8173 EVT SrcVT = Src.getValueType();
8174 EVT DstVT = Node->getValueType(0);
8175 SDLoc dl(SDValue(Node, 0));
8176
8177 // FIXME: Only f32 to i64 conversions are supported.
8178 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8179 return false;
8180
8181 if (Node->isStrictFPOpcode())
8182 // When a NaN is converted to an integer a trap is allowed. We can't
8183 // use this expansion here because it would eliminate that trap. Other
8184 // traps are also allowed and cannot be eliminated. See
8185 // IEEE 754-2008 sec 5.8.
8186 return false;
8187
8188 // Expand f32 -> i64 conversion
8189 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8190 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8191 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8192 EVT IntVT = SrcVT.changeTypeToInteger();
8193 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8194
8195 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8196 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8197 SDValue Bias = DAG.getConstant(127, dl, IntVT);
8198 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8199 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8200 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8201
8202 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8203
8204 SDValue ExponentBits = DAG.getNode(
8205 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8206 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8207 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8208
8209 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8210 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8211 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8212 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8213
8214 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8215 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8216 DAG.getConstant(0x00800000, dl, IntVT));
8217
8218 R = DAG.getZExtOrTrunc(R, dl, DstVT);
8219
8220 R = DAG.getSelectCC(
8221 dl, Exponent, ExponentLoBit,
8222 DAG.getNode(ISD::SHL, dl, DstVT, R,
8223 DAG.getZExtOrTrunc(
8224 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8225 dl, IntShVT)),
8226 DAG.getNode(ISD::SRL, dl, DstVT, R,
8227 DAG.getZExtOrTrunc(
8228 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8229 dl, IntShVT)),
8230 ISD::SETGT);
8231
8232 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8233 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8234
8235 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8236 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8237 return true;
8238}
8239
8241 SDValue &Chain,
8242 SelectionDAG &DAG) const {
8243 SDLoc dl(SDValue(Node, 0));
8244 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8245 SDValue Src = Node->getOperand(OpNo);
8246
8247 EVT SrcVT = Src.getValueType();
8248 EVT DstVT = Node->getValueType(0);
8249 EVT SetCCVT =
8250 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8251 EVT DstSetCCVT =
8252 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8253
8254 // Only expand vector types if we have the appropriate vector bit operations.
8255 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8257 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8259 return false;
8260
8261 // If the maximum float value is smaller then the signed integer range,
8262 // the destination signmask can't be represented by the float, so we can
8263 // just use FP_TO_SINT directly.
8264 const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
8265 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8266 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8268 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8269 if (Node->isStrictFPOpcode()) {
8270 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8271 { Node->getOperand(0), Src });
8272 Chain = Result.getValue(1);
8273 } else
8274 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8275 return true;
8276 }
8277
8278 // Don't expand it if there isn't cheap fsub instruction.
8280 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8281 return false;
8282
8283 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8284 SDValue Sel;
8285
8286 if (Node->isStrictFPOpcode()) {
8287 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8288 Node->getOperand(0), /*IsSignaling*/ true);
8289 Chain = Sel.getValue(1);
8290 } else {
8291 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8292 }
8293
8294 bool Strict = Node->isStrictFPOpcode() ||
8295 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8296
8297 if (Strict) {
8298 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8299 // signmask then offset (the result of which should be fully representable).
8300 // Sel = Src < 0x8000000000000000
8301 // FltOfs = select Sel, 0, 0x8000000000000000
8302 // IntOfs = select Sel, 0, 0x8000000000000000
8303 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8304
8305 // TODO: Should any fast-math-flags be set for the FSUB?
8306 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8307 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8308 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8309 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8310 DAG.getConstant(0, dl, DstVT),
8311 DAG.getConstant(SignMask, dl, DstVT));
8312 SDValue SInt;
8313 if (Node->isStrictFPOpcode()) {
8314 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8315 { Chain, Src, FltOfs });
8316 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8317 { Val.getValue(1), Val });
8318 Chain = SInt.getValue(1);
8319 } else {
8320 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8321 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8322 }
8323 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8324 } else {
8325 // Expand based on maximum range of FP_TO_SINT:
8326 // True = fp_to_sint(Src)
8327 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8328 // Result = select (Src < 0x8000000000000000), True, False
8329
8330 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8331 // TODO: Should any fast-math-flags be set for the FSUB?
8332 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8333 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8334 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8335 DAG.getConstant(SignMask, dl, DstVT));
8336 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8337 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8338 }
8339 return true;
8340}
8341
8343 SDValue &Chain,
8344 SelectionDAG &DAG) const {
8345 // This transform is not correct for converting 0 when rounding mode is set
8346 // to round toward negative infinity which will produce -0.0. So disable under
8347 // strictfp.
8348 if (Node->isStrictFPOpcode())
8349 return false;
8350
8351 SDValue Src = Node->getOperand(0);
8352 EVT SrcVT = Src.getValueType();
8353 EVT DstVT = Node->getValueType(0);
8354
8355 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8356 return false;
8357
8358 // Only expand vector types if we have the appropriate vector bit operations.
8359 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8364 return false;
8365
8366 SDLoc dl(SDValue(Node, 0));
8367 EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
8368
8369 // Implementation of unsigned i64 to f64 following the algorithm in
8370 // __floatundidf in compiler_rt. This implementation performs rounding
8371 // correctly in all rounding modes with the exception of converting 0
8372 // when rounding toward negative infinity. In that case the fsub will produce
8373 // -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
8374 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8375 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8376 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8377 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8378 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8379 SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
8380
8381 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8382 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8383 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
8384 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
8385 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8386 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8387 SDValue HiSub =
8388 DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8389 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8390 return true;
8391}
8392
8393SDValue
8395 SelectionDAG &DAG) const {
8396 unsigned Opcode = Node->getOpcode();
8397 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8398 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8399 "Wrong opcode");
8400
8401 if (Node->getFlags().hasNoNaNs()) {
8402 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8403 SDValue Op1 = Node->getOperand(0);
8404 SDValue Op2 = Node->getOperand(1);
8405 SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);
8406 // Copy FMF flags, but always set the no-signed-zeros flag
8407 // as this is implied by the FMINNUM/FMAXNUM semantics.
8408 SDNodeFlags Flags = Node->getFlags();
8409 Flags.setNoSignedZeros(true);
8410 SelCC->setFlags(Flags);
8411 return SelCC;
8412 }
8413
8414 return SDValue();
8415}
8416
8418 SelectionDAG &DAG) const {
8419 SDLoc dl(Node);
8420 unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
8422 EVT VT = Node->getValueType(0);
8423
8424 if (VT.isScalableVector())
8426 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8427
8428 if (isOperationLegalOrCustom(NewOp, VT)) {
8429 SDValue Quiet0 = Node->getOperand(0);
8430 SDValue Quiet1 = Node->getOperand(1);
8431
8432 if (!Node->getFlags().hasNoNaNs()) {
8433 // Insert canonicalizes if it's possible we need to quiet to get correct
8434 // sNaN behavior.
8435 if (!DAG.isKnownNeverSNaN(Quiet0)) {
8436 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8437 Node->getFlags());
8438 }
8439 if (!DAG.isKnownNeverSNaN(Quiet1)) {
8440 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8441 Node->getFlags());
8442 }
8443 }
8444
8445 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8446 }
8447
8448 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8449 // instead if there are no NaNs and there can't be an incompatible zero
8450 // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8451 if ((Node->getFlags().hasNoNaNs() ||
8452 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8453 DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
8454 (Node->getFlags().hasNoSignedZeros() ||
8455 DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
8456 DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
8457 unsigned IEEE2018Op =
8458 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8459 if (isOperationLegalOrCustom(IEEE2018Op, VT))
8460 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8461 Node->getOperand(1), Node->getFlags());
8462 }
8463
8464 if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8465 return SelCC;
8466
8467 return SDValue();
8468}
8469
8471 SelectionDAG &DAG) const {
8472 SDLoc DL(N);
8473 SDValue LHS = N->getOperand(0);
8474 SDValue RHS = N->getOperand(1);
8475 unsigned Opc = N->getOpcode();
8476 EVT VT = N->getValueType(0);
8477 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8478 bool IsMax = Opc == ISD::FMAXIMUM;
8479 SDNodeFlags Flags = N->getFlags();
8480
8481 // First, implement comparison not propagating NaN. If no native fmin or fmax
8482 // available, use plain select with setcc instead.
8484 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8485 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8486
8487 // FIXME: We should probably define fminnum/fmaxnum variants with correct
8488 // signed zero behavior.
8489 bool MinMaxMustRespectOrderedZero = false;
8490
8491 if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
8492 MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
8493 MinMaxMustRespectOrderedZero = true;
8494 } else if (isOperationLegalOrCustom(CompOpc, VT)) {
8495 MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
8496 } else {
8498 return DAG.UnrollVectorOp(N);
8499
8500 // NaN (if exists) will be propagated later, so orderness doesn't matter.
8501 SDValue Compare =
8502 DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
8503 MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
8504 }
8505
8506 // Propagate any NaN of both operands
8507 if (!N->getFlags().hasNoNaNs() &&
8508 (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
8509 ConstantFP *FPNaN = ConstantFP::get(
8511 MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
8512 DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
8513 }
8514
8515 // fminimum/fmaximum requires -0.0 less than +0.0
8516 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8518 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8519 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
8520 SDValue TestZero =
8521 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8522 SDValue LCmp = DAG.getSelect(
8523 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8524 MinMax, Flags);
8525 SDValue RCmp = DAG.getSelect(
8526 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
8527 LCmp, Flags);
8528 MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8529 }
8530
8531 return MinMax;
8532}
8533
8534/// Returns a true value if if this FPClassTest can be performed with an ordered
8535/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8536/// std::nullopt if it cannot be performed as a compare with 0.
8537static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8538 const fltSemantics &Semantics,
8539 const MachineFunction &MF) {
8540 FPClassTest OrderedMask = Test & ~fcNan;
8541 FPClassTest NanTest = Test & fcNan;
8542 bool IsOrdered = NanTest == fcNone;
8543 bool IsUnordered = NanTest == fcNan;
8544
8545 // Skip cases that are testing for only a qnan or snan.
8546 if (!IsOrdered && !IsUnordered)
8547 return std::nullopt;
8548
8549 if (OrderedMask == fcZero &&
8550 MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
8551 return IsOrdered;
8552 if (OrderedMask == (fcZero | fcSubnormal) &&
8553 MF.getDenormalMode(Semantics).inputsAreZero())
8554 return IsOrdered;
8555 return std::nullopt;
8556}
8557
8560 const SDLoc &DL,
8561 SelectionDAG &DAG) const {
8562 EVT OperandVT = Op.getValueType();
8563 assert(OperandVT.isFloatingPoint());
8564
8565 // Degenerated cases.
8566 if (Test == fcNone)
8567 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8568 if ((Test & fcAllFlags) == fcAllFlags)
8569 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
8570
8571 // PPC double double is a pair of doubles, of which the higher part determines
8572 // the value class.
8573 if (OperandVT == MVT::ppcf128) {
8574 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8575 DAG.getConstant(1, DL, MVT::i32));
8576 OperandVT = MVT::f64;
8577 }
8578
8579 // Some checks may be represented as inversion of simpler check, for example
8580 // "inf|normal|subnormal|zero" => !"nan".
8581 bool IsInverted = false;
8582 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test)) {
8583 IsInverted = true;
8584 Test = InvertedCheck;
8585 }
8586
8587 // Floating-point type properties.
8588 EVT ScalarFloatVT = OperandVT.getScalarType();
8589 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
8590 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8591 bool IsF80 = (ScalarFloatVT == MVT::f80);
8592
8593 // Some checks can be implemented using float comparisons, if floating point
8594 // exceptions are ignored.
8595 if (Flags.hasNoFPExcept() &&
8597 ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
8598 ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
8599
8600 if (std::optional<bool> IsCmp0 =
8601 isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction());
8602 IsCmp0 && (isCondCodeLegalOrCustom(
8603 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8604 OperandVT.getScalarType().getSimpleVT()))) {
8605
8606 // If denormals could be implicitly treated as 0, this is not equivalent
8607 // to a compare with 0 since it will also be true for denormals.
8608 return DAG.getSetCC(DL, ResultVT, Op,
8609 DAG.getConstantFP(0.0, DL, OperandVT),
8610 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8611 }
8612
8613 if (Test == fcNan &&
8615 OperandVT.getScalarType().getSimpleVT())) {
8616 return DAG.getSetCC(DL, ResultVT, Op, Op,
8617 IsInverted ? ISD::SETO : ISD::SETUO);
8618 }
8619
8620 if (Test == fcInf &&
8622 OperandVT.getScalarType().getSimpleVT()) &&
8624 // isinf(x) --> fabs(x) == inf
8625 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8626 SDValue Inf =
8627 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8628 return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8629 IsInverted ? ISD::SETUNE : ISD::SETOEQ);
8630 }
8631 }
8632
8633 // In the general case use integer operations.
8634 unsigned BitSize = OperandVT.getScalarSizeInBits();
8635 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
8636 if (OperandVT.isVector())
8637 IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
8638 OperandVT.getVectorElementCount());
8639 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
8640
8641 // Various masks.
8642 APInt SignBit = APInt::getSignMask(BitSize);
8643 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
8644 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
8645 const unsigned ExplicitIntBitInF80 = 63;
8646 APInt ExpMask = Inf;
8647 if (IsF80)
8648 ExpMask.clearBit(ExplicitIntBitInF80);
8649 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
8650 APInt QNaNBitMask =
8651 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
8652 APInt InvertionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
8653
8654 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
8655 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
8656 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
8657 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
8658 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
8659 SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT);
8660
8661 SDValue Res;
8662 const auto appendResult = [&](SDValue PartialRes) {
8663 if (PartialRes) {
8664 if (Res)
8665 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
8666 else
8667 Res = PartialRes;
8668 }
8669 };
8670
8671 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
8672 const auto getIntBitIsSet = [&]() -> SDValue {
8673 if (!IntBitIsSetV) {
8674 APInt IntBitMask(BitSize, 0);
8675 IntBitMask.setBit(ExplicitIntBitInF80);
8676 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
8677 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
8678 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
8679 }
8680 return IntBitIsSetV;
8681 };
8682
8683 // Split the value into sign bit and absolute value.
8684 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
8685 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
8686 DAG.getConstant(0.0, DL, IntVT), ISD::SETLT);
8687
8688 // Tests that involve more than one class should be processed first.
8689 SDValue PartialRes;
8690
8691 if (IsF80)
8692 ; // Detect finite numbers of f80 by checking individual classes because
8693 // they have different settings of the explicit integer bit.
8694 else if ((Test & fcFinite) == fcFinite) {
8695 // finite(V) ==> abs(V) < exp_mask
8696 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8697 Test &= ~fcFinite;
8698 } else if ((Test & fcFinite) == fcPosFinite) {
8699 // finite(V) && V > 0 ==> V < exp_mask
8700 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
8701 Test &= ~fcPosFinite;
8702 } else if ((Test & fcFinite) == fcNegFinite) {
8703 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
8704 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8705 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8706 Test &= ~fcNegFinite;
8707 }
8708 appendResult(PartialRes);
8709
8710 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
8711 // fcZero | fcSubnormal => test all exponent bits are 0
8712 // TODO: Handle sign bit specific cases
8713 if (PartialCheck == (fcZero | fcSubnormal)) {
8714 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
8715 SDValue ExpIsZero =
8716 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8717 appendResult(ExpIsZero);
8718 Test &= ~PartialCheck & fcAllFlags;
8719 }
8720 }
8721
8722 // Check for individual classes.
8723
8724 if (unsigned PartialCheck = Test & fcZero) {
8725 if (PartialCheck == fcPosZero)
8726 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
8727 else if (PartialCheck == fcZero)
8728 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
8729 else // ISD::fcNegZero
8730 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
8731 appendResult(PartialRes);
8732 }
8733
8734 if (unsigned PartialCheck = Test & fcSubnormal) {
8735 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
8736 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
8737 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
8738 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
8739 SDValue VMinusOneV =
8740 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
8741 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
8742 if (PartialCheck == fcNegSubnormal)
8743 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8744 appendResult(PartialRes);
8745 }
8746
8747 if (unsigned PartialCheck = Test & fcInf) {
8748 if (PartialCheck == fcPosInf)
8749 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
8750 else if (PartialCheck == fcInf)
8751 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
8752 else { // ISD::fcNegInf
8753 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
8754 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
8755 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
8756 }
8757 appendResult(PartialRes);
8758 }
8759
8760 if (unsigned PartialCheck = Test & fcNan) {
8761 APInt InfWithQnanBit = Inf | QNaNBitMask;
8762 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
8763 if (PartialCheck == fcNan) {
8764 // isnan(V) ==> abs(V) > int(inf)
8765 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
8766 if (IsF80) {
8767 // Recognize unsupported values as NaNs for compatibility with glibc.
8768 // In them (exp(V)==0) == int_bit.
8769 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
8770 SDValue ExpIsZero =
8771 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8772 SDValue IsPseudo =
8773 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
8774 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
8775 }
8776 } else if (PartialCheck == fcQNan) {
8777 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
8778 PartialRes =
8779 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
8780 } else { // ISD::fcSNan
8781 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
8782 // abs(V) < (unsigned(Inf) | quiet_bit)
8783 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
8784 SDValue IsNotQnan =
8785 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
8786 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
8787 }
8788 appendResult(PartialRes);
8789 }
8790
8791 if (unsigned PartialCheck = Test & fcNormal) {
8792 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
8793 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
8794 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
8795 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
8796 APInt ExpLimit = ExpMask - ExpLSB;
8797 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
8798 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
8799 if (PartialCheck == fcNegNormal)
8800 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8801 else if (PartialCheck == fcPosNormal) {
8802 SDValue PosSignV =
8803 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask);
8804 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
8805 }
8806 if (IsF80)
8807 PartialRes =
8808 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
8809 appendResult(PartialRes);
8810 }
8811
8812 if (!Res)
8813 return DAG.getConstant(IsInverted, DL, ResultVT);
8814 if (IsInverted)
8815 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask);
8816 return Res;
8817}
8818
8819// Only expand vector types if we have the appropriate vector bit operations.
8820static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
8821 assert(VT.isVector() && "Expected vector type");
8822 unsigned Len = VT.getScalarSizeInBits();
8823 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
8826 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
8828}
8829
8831 SDLoc dl(Node);
8832 EVT VT = Node->getValueType(0);
8833 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8834 SDValue Op = Node->getOperand(0);
8835 unsigned Len = VT.getScalarSizeInBits();
8836 assert(VT.isInteger() && "CTPOP not implemented for this type.");
8837
8838 // TODO: Add support for irregular type lengths.
8839 if (!(Len <= 128 && Len % 8 == 0))
8840 return SDValue();
8841
8842 // Only expand vector types if we have the appropriate vector bit operations.
8843 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
8844 return SDValue();
8845
8846 // This is the "best" algorithm from
8847 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8848 SDValue Mask55 =
8849 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
8850 SDValue Mask33 =
8851 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
8852 SDValue Mask0F =
8853 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
8854
8855 // v = v - ((v >> 1) & 0x55555555...)
8856 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
8857 DAG.getNode(ISD::AND, dl, VT,
8858 DAG.getNode(ISD::SRL, dl, VT, Op,
8859 DAG.getConstant(1, dl, ShVT)),
8860 Mask55));
8861 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8862 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
8863 DAG.getNode(ISD::AND, dl, VT,
8864 DAG.getNode(ISD::SRL, dl, VT, Op,
8865 DAG.getConstant(2, dl, ShVT)),
8866 Mask33));
8867 // v = (v + (v >> 4)) & 0x0F0F0F0F...
8868 Op = DAG.getNode(ISD::AND, dl, VT,
8869 DAG.getNode(ISD::ADD, dl, VT, Op,
8870 DAG.getNode(ISD::SRL, dl, VT, Op,
8871 DAG.getConstant(4, dl, ShVT))),
8872 Mask0F);
8873
8874 if (Len <= 8)
8875 return Op;
8876
8877 // Avoid the multiply if we only have 2 bytes to add.
8878 // TODO: Only doing this for scalars because vectors weren't as obviously
8879 // improved.
8880 if (Len == 16 && !VT.isVector()) {
8881 // v = (v + (v >> 8)) & 0x00FF;
8882 return DAG.getNode(ISD::AND, dl, VT,
8883 DAG.getNode(ISD::ADD, dl, VT, Op,
8884 DAG.getNode(ISD::SRL, dl, VT, Op,
8885 DAG.getConstant(8, dl, ShVT))),
8886 DAG.getConstant(0xFF, dl, VT));
8887 }
8888
8889 // v = (v * 0x01010101...) >> (Len - 8)
8890 SDValue V;
8893 SDValue Mask01 =
8894 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
8895 V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
8896 } else {
8897 V = Op;
8898 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
8899 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
8900 V = DAG.getNode(ISD::ADD, dl, VT, V,
8901 DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
8902 }
8903 }
8904 return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
8905}
8906
8908 SDLoc dl(Node);
8909 EVT VT = Node->getValueType(0);
8910 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8911 SDValue Op = Node->getOperand(0);
8912 SDValue Mask = Node->getOperand(1);
8913 SDValue VL = Node->getOperand(2);
8914 unsigned Len = VT.getScalarSizeInBits();
8915 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
8916
8917 // TODO: Add support for irregular type lengths.
8918 if (!(Len <= 128 && Len % 8 == 0))
8919 return SDValue();
8920
8921 // This is same algorithm of expandCTPOP from
8922 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8923 SDValue Mask55 =
8924 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
8925 SDValue Mask33 =
8926 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
8927 SDValue Mask0F =
8928 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
8929
8930 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
8931
8932 // v = v - ((v >> 1) & 0x55555555...)
8933 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
8934 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
8935 DAG.getConstant(1, dl, ShVT), Mask, VL),
8936 Mask55, Mask, VL);
8937 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
8938
8939 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8940 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
8941 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
8942 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
8943 DAG.getConstant(2, dl, ShVT), Mask, VL),
8944 Mask33, Mask, VL);
8945 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
8946
8947 // v = (v + (v >> 4)) & 0x0F0F0F0F...
8948 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
8949 Mask, VL),
8950 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
8951 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
8952
8953 if (Len <= 8)
8954 return Op;
8955
8956 // v = (v * 0x01010101...) >> (Len - 8)
8957 SDValue V;
8959 ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
8960 SDValue Mask01 =
8961 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
8962 V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
8963 } else {
8964 V = Op;
8965 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
8966 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
8967 V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
8968 DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
8969 Mask, VL);
8970 }
8971 }
8972 return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
8973 Mask, VL);
8974}
8975
8977 SDLoc dl(Node);
8978 EVT VT = Node->getValueType(0);
8979 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8980 SDValue Op = Node->getOperand(0);
8981 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8982
8983 // If the non-ZERO_UNDEF version is supported we can use that instead.
8984 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
8986 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
8987
8988 // If the ZERO_UNDEF version is supported use that and handle the zero case.
8990 EVT SetCCVT =
8991 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8992 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
8993 SDValue Zero = DAG.getConstant(0, dl, VT);
8994 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
8995 return DAG.getSelect(dl, VT, SrcIsZero,
8996 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
8997 }
8998
8999 // Only expand vector types if we have the appropriate vector bit operations.
9000 // This includes the operations needed to expand CTPOP if it isn't supported.
9001 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9003 !canExpandVectorCTPOP(*this, VT)) ||
9006 return SDValue();
9007
9008 // for now, we do this:
9009 // x = x | (x >> 1);
9010 // x = x | (x >> 2);
9011 // ...
9012 // x = x | (x >>16);
9013 // x = x | (x >>32); // for 64-bit input
9014 // return popcount(~x);
9015 //
9016 // Ref: "Hacker's Delight" by Henry Warren
9017 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9018 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9019 Op = DAG.getNode(ISD::OR, dl, VT, Op,
9020 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
9021 }
9022 Op = DAG.getNOT(dl, Op, VT);
9023 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
9024}
9025
9027 SDLoc dl(Node);
9028 EVT VT = Node->getValueType(0);
9029 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9030 SDValue Op = Node->getOperand(0);
9031 SDValue Mask = Node->getOperand(1);
9032 SDValue VL = Node->getOperand(2);
9033 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9034
9035 // do this:
9036 // x = x | (x >> 1);
9037 // x = x | (x >> 2);
9038 // ...
9039 // x = x | (x >>16);
9040 // x = x | (x >>32); // for 64-bit input
9041 // return popcount(~x);
9042 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9043 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9044 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
9045 DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
9046 VL);
9047 }
9048 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getConstant(-1, dl, VT), Mask,
9049 VL);
9050 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
9051}
9052
9054 const SDLoc &DL, EVT VT, SDValue Op,
9055 unsigned BitWidth) const {
9056 if (BitWidth != 32 && BitWidth != 64)
9057 return SDValue();
9058 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
9059 : APInt(64, 0x0218A392CD3D5DBFULL);
9060 const DataLayout &TD = DAG.getDataLayout();
9061 MachinePointerInfo PtrInfo =
9063 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
9064 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
9065 SDValue Lookup = DAG.getNode(
9066 ISD::SRL, DL, VT,
9067 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
9068 DAG.getConstant(DeBruijn, DL, VT)),
9069 DAG.getConstant(ShiftAmt, DL, VT));
9071
9073 for (unsigned i = 0; i < BitWidth; i++) {
9074 APInt Shl = DeBruijn.shl(i);
9075 APInt Lshr = Shl.lshr(ShiftAmt);
9076 Table[Lshr.getZExtValue()] = i;
9077 }
9078
9079 // Create a ConstantArray in Constant Pool
9080 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
9081 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
9082 TD.getPrefTypeAlign(CA->getType()));
9083 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
9084 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
9085 PtrInfo, MVT::i8);
9086 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9087 return ExtLoad;
9088
9089 EVT SetCCVT =
9090 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9091 SDValue Zero = DAG.getConstant(0, DL, VT);
9092 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
9093 return DAG.getSelect(DL, VT, SrcIsZero,
9094 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
9095}
9096
9098 SDLoc dl(Node);
9099 EVT VT = Node->getValueType(0);
9100 SDValue Op = Node->getOperand(0);
9101 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9102
9103 // If the non-ZERO_UNDEF version is supported we can use that instead.
9104 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9106 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
9107
9108 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9110 EVT SetCCVT =
9111 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9112 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
9113 SDValue Zero = DAG.getConstant(0, dl, VT);
9114 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9115 return DAG.getSelect(dl, VT, SrcIsZero,
9116 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
9117 }
9118
9119 // Only expand vector types if we have the appropriate vector bit operations.
9120 // This includes the operations needed to expand CTPOP if it isn't supported.
9121 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9124 !canExpandVectorCTPOP(*this, VT)) ||
9128 return SDValue();
9129
9130 // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
9131 if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
9133 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
9134 return V;
9135
9136 // for now, we use: { return popcount(~x & (x - 1)); }
9137 // unless the target has ctlz but not ctpop, in which case we use:
9138 // { return 32 - nlz(~x & (x-1)); }
9139 // Ref: "Hacker's Delight" by Henry Warren
9140 SDValue Tmp = DAG.getNode(
9141 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
9142 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
9143
9144 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9146 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
9147 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
9148 }
9149
9150 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
9151}
9152
9154 SDValue Op = Node->getOperand(0);
9155 SDValue Mask = Node->getOperand(1);
9156 SDValue VL = Node->getOperand(2);
9157 SDLoc dl(Node);
9158 EVT VT = Node->getValueType(0);
9159
9160 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9161 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
9162 DAG.getConstant(-1, dl, VT), Mask, VL);
9163 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
9164 DAG.getConstant(1, dl, VT), Mask, VL);
9165 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
9166 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
9167}
9168
9170 SelectionDAG &DAG) const {
9171 // %cond = to_bool_vec %source
9172 // %splat = splat /*val=*/VL
9173 // %tz = step_vector
9174 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
9175 // %r = vp.reduce.umin %v
9176 SDLoc DL(N);
9177 SDValue Source = N->getOperand(0);
9178 SDValue Mask = N->getOperand(1);
9179 SDValue EVL = N->getOperand(2);
9180 EVT SrcVT = Source.getValueType();
9181 EVT ResVT = N->getValueType(0);
9182 EVT ResVecVT =
9183 EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
9184
9185 // Convert to boolean vector.
9186 if (SrcVT.getScalarType() != MVT::i1) {
9187 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
9188 SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
9189 SrcVT.getVectorElementCount());
9190 Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
9191 DAG.getCondCode(ISD::SETNE), Mask, EVL);
9192 }
9193
9194 SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
9195 SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
9196 SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
9197 SDValue Select =
9198 DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
9199 return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
9200}
9201
9203 bool IsNegative) const {
9204 SDLoc dl(N);
9205 EVT VT = N->getValueType(0);
9206 SDValue Op = N->getOperand(0);
9207
9208 // abs(x) -> smax(x,sub(0,x))
9209 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9211 SDValue Zero = DAG.getConstant(0, dl, VT);
9212 Op = DAG.getFreeze(Op);
9213 return DAG.getNode(ISD::SMAX, dl, VT, Op,
9214 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9215 }
9216
9217 // abs(x) -> umin(x,sub(0,x))
9218 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9220 SDValue Zero = DAG.getConstant(0, dl, VT);
9221 Op = DAG.getFreeze(Op);
9222 return DAG.getNode(ISD::UMIN, dl, VT, Op,
9223 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9224 }
9225
9226 // 0 - abs(x) -> smin(x, sub(0,x))
9227 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
9229 SDValue Zero = DAG.getConstant(0, dl, VT);
9230 Op = DAG.getFreeze(Op);
9231 return DAG.getNode(ISD::SMIN, dl, VT, Op,
9232 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9233 }
9234
9235 // Only expand vector types if we have the appropriate vector operations.
9236 if (VT.isVector() &&
9238 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
9239 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
9241 return SDValue();
9242
9243 Op = DAG.getFreeze(Op);
9244 SDValue Shift = DAG.getNode(
9245 ISD::SRA, dl, VT, Op,
9246 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9247 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
9248
9249 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9250 if (!IsNegative)
9251 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
9252
9253 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9254 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
9255}
9256
9258 SDLoc dl(N);
9259 EVT VT = N->getValueType(0);
9260 SDValue LHS = DAG.getFreeze(N->getOperand(0));
9261 SDValue RHS = DAG.getFreeze(N->getOperand(1));
9262 bool IsSigned = N->getOpcode() == ISD::ABDS;
9263
9264 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9265 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9266 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9267 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9268 if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
9269 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
9270 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
9271 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9272 }
9273
9274 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9275 if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT))
9276 return DAG.getNode(ISD::OR, dl, VT,
9277 DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
9278 DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
9279
9280 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9282 SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
9283
9284 // Branchless expansion iff cmp result is allbits:
9285 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9286 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9287 if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
9288 SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
9289 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
9290 return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
9291 }
9292
9293 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9294 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9295 return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9296 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9297}
9298
9300 SDLoc dl(N);
9301 EVT VT = N->getValueType(0);
9302 SDValue LHS = N->getOperand(0);
9303 SDValue RHS = N->getOperand(1);
9304
9305 unsigned Opc = N->getOpcode();
9306 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
9307 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
9308 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9309 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
9310 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
9311 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9312 assert((Opc == ISD::AVGFLOORS || Opc == ISD::AVGCEILS ||
9313 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
9314 "Unknown AVG node");
9315
9316 // If the operands are already extended, we can add+shift.
9317 bool IsExt =
9318 (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
9319 DAG.ComputeNumSignBits(RHS) >= 2) ||
9320 (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
9322 if (IsExt) {
9323 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
9324 if (!IsFloor)
9325 Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
9326 return DAG.getNode(ShiftOpc, dl, VT, Sum,
9327 DAG.getShiftAmountConstant(1, VT, dl));
9328 }
9329
9330 // For scalars, see if we can efficiently extend/truncate to use add+shift.
9331 if (VT.isScalarInteger()) {
9332 unsigned BW = VT.getScalarSizeInBits();
9333 EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
9334 if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
9335 LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
9336 RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
9337 SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
9338 if (!IsFloor)
9339 Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
9340 DAG.getConstant(1, dl, ExtVT));
9341 // Just use SRL as we will be truncating away the extended sign bits.
9342 Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
9343 DAG.getShiftAmountConstant(1, ExtVT, dl));
9344 return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
9345 }
9346 }
9347
9348 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
9349 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
9350 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
9351 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
9352 LHS = DAG.getFreeze(LHS);
9353 RHS = DAG.getFreeze(RHS);
9354 SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
9355 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
9356 SDValue Shift =
9357 DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
9358 return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
9359}
9360
9362 SDLoc dl(N);
9363 EVT VT = N->getValueType(0);
9364 SDValue Op = N->getOperand(0);
9365
9366 if (!VT.isSimple())
9367 return SDValue();
9368
9369 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9370 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9371 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9372 default:
9373 return SDValue();
9374 case MVT::i16:
9375 // Use a rotate by 8. This can be further expanded if necessary.
9376 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9377 case MVT::i32:
9378 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9379 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
9380 DAG.getConstant(0xFF00, dl, VT));
9381 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
9382 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9383 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
9384 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9385 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9386 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9387 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9388 case MVT::i64:
9389 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9390 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
9391 DAG.getConstant(255ULL<<8, dl, VT));
9392 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
9393 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
9394 DAG.getConstant(255ULL<<16, dl, VT));
9395 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
9396 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
9397 DAG.getConstant(255ULL<<24, dl, VT));
9398 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
9399 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9400 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
9401 DAG.getConstant(255ULL<<24, dl, VT));
9402 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9403 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
9404 DAG.getConstant(255ULL<<16, dl, VT));
9405 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
9406 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
9407 DAG.getConstant(255ULL<<8, dl, VT));
9408 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9409 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
9410 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
9411 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9412 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9413 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
9414 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9415 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
9416 }
9417}
9418
9420 SDLoc dl(N);
9421 EVT VT = N->getValueType(0);
9422 SDValue Op = N->getOperand(0);
9423 SDValue Mask = N->getOperand(1);
9424 SDValue EVL = N->getOperand(2);
9425
9426 if (!VT.isSimple())
9427 return SDValue();
9428
9429 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9430 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9431 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9432 default:
9433 return SDValue();
9434 case MVT::i16:
9435 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9436 Mask, EVL);
9437 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9438 Mask, EVL);
9439 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
9440 case MVT::i32:
9441 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9442 Mask, EVL);
9443 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
9444 Mask, EVL);
9445 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
9446 Mask, EVL);
9447 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9448 Mask, EVL);
9449 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9450 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
9451 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9452 Mask, EVL);
9453 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9454 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9455 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9456 case MVT::i64:
9457 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9458 Mask, EVL);
9459 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9460 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9461 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
9462 Mask, EVL);
9463 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9464 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9465 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
9466 Mask, EVL);
9467 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9468 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9469 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
9470 Mask, EVL);
9471 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9472 Mask, EVL);
9473 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
9474 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9475 Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9476 Mask, EVL);
9477 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
9478 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9479 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
9480 Mask, EVL);
9481 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9482 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9483 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9484 Mask, EVL);
9485 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
9486 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
9487 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9488 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9489 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
9490 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9491 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
9492 }
9493}
9494
9496 SDLoc dl(N);
9497 EVT VT = N->getValueType(0);
9498 SDValue Op = N->getOperand(0);
9499 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9500 unsigned Sz = VT.getScalarSizeInBits();
9501
9502 SDValue Tmp, Tmp2, Tmp3;
9503
9504 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9505 // and finally the i1 pairs.
9506 // TODO: We can easily support i4/i2 legal types if any target ever does.
9507 if (Sz >= 8 && isPowerOf2_32(Sz)) {
9508 // Create the masks - repeating the pattern every byte.
9509 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9510 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9511 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9512
9513 // BSWAP if the type is wider than a single byte.
9514 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
9515
9516 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9517 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
9518 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
9519 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
9520 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
9521 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9522
9523 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9524 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
9525 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
9526 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
9527 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
9528 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9529
9530 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9531 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
9532 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
9533 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
9534 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
9535 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9536 return Tmp;
9537 }
9538
9539 Tmp = DAG.getConstant(0, dl, VT);
9540 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
9541 if (I < J)
9542 Tmp2 =
9543 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
9544 else
9545 Tmp2 =
9546 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
9547
9548 APInt Shift = APInt::getOneBitSet(Sz, J);
9549 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
9550 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
9551 }
9552
9553 return Tmp;
9554}
9555
9557 assert(N->getOpcode() == ISD::VP_BITREVERSE);
9558
9559 SDLoc dl(N);
9560 EVT VT = N->getValueType(0);
9561 SDValue Op = N->getOperand(0);
9562 SDValue Mask = N->getOperand(1);
9563 SDValue EVL = N->getOperand(2);
9564 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9565 unsigned Sz = VT.getScalarSizeInBits();
9566
9567 SDValue Tmp, Tmp2, Tmp3;
9568
9569 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9570 // and finally the i1 pairs.
9571 // TODO: We can easily support i4/i2 legal types if any target ever does.
9572 if (Sz >= 8 && isPowerOf2_32(Sz)) {
9573 // Create the masks - repeating the pattern every byte.
9574 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9575 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9576 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9577
9578 // BSWAP if the type is wider than a single byte.
9579 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
9580
9581 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9582 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
9583 Mask, EVL);
9584 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9585 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
9586 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
9587 Mask, EVL);
9588 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
9589 Mask, EVL);
9590 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9591
9592 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9593 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
9594 Mask, EVL);
9595 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9596 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
9597 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
9598 Mask, EVL);
9599 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
9600 Mask, EVL);
9601 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9602
9603 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9604 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
9605 Mask, EVL);
9606 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9607 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
9608 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
9609 Mask, EVL);
9610 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
9611 Mask, EVL);
9612 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9613 return Tmp;
9614 }
9615 return SDValue();
9616}
9617
9618std::pair<SDValue, SDValue>
9620 SelectionDAG &DAG) const {
9621 SDLoc SL(LD);
9622 SDValue Chain = LD->getChain();
9623 SDValue BasePTR = LD->getBasePtr();
9624 EVT SrcVT = LD->getMemoryVT();
9625 EVT DstVT = LD->getValueType(0);
9626 ISD::LoadExtType ExtType = LD->getExtensionType();
9627
9628 if (SrcVT.isScalableVector())
9629 report_fatal_error("Cannot scalarize scalable vector loads");
9630
9631 unsigned NumElem = SrcVT.getVectorNumElements();
9632
9633 EVT SrcEltVT = SrcVT.getScalarType();
9634 EVT DstEltVT = DstVT.getScalarType();
9635
9636 // A vector must always be stored in memory as-is, i.e. without any padding
9637 // between the elements, since various code depend on it, e.g. in the
9638 // handling of a bitcast of a vector type to int, which may be done with a
9639 // vector store followed by an integer load. A vector that does not have
9640 // elements that are byte-sized must therefore be stored as an integer
9641 // built out of the extracted vector elements.
9642 if (!SrcEltVT.isByteSized()) {
9643 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
9644 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
9645
9646 unsigned NumSrcBits = SrcVT.getSizeInBits();
9647 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
9648
9649 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
9650 SDValue SrcEltBitMask = DAG.getConstant(
9651 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
9652
9653 // Load the whole vector and avoid masking off the top bits as it makes
9654 // the codegen worse.
9655 SDValue Load =
9656 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
9657 LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
9658 LD->getMemOperand()->getFlags(), LD->getAAInfo());
9659
9661 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9662 unsigned ShiftIntoIdx =
9663 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9664 SDValue ShiftAmount = DAG.getShiftAmountConstant(
9665 ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
9666 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
9667 SDValue Elt =
9668 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
9669 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
9670
9671 if (ExtType != ISD::NON_EXTLOAD) {
9672 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
9673 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
9674 }
9675
9676 Vals.push_back(Scalar);
9677 }
9678
9679 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9680 return std::make_pair(Value, Load.getValue(1));
9681 }
9682
9683 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
9684 assert(SrcEltVT.isByteSized());
9685
9687 SmallVector<SDValue, 8> LoadChains;
9688
9689 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9690 SDValue ScalarLoad =
9691 DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
9692 LD->getPointerInfo().getWithOffset(Idx * Stride),
9693 SrcEltVT, LD->getOriginalAlign(),
9694 LD->getMemOperand()->getFlags(), LD->getAAInfo());
9695
9696 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
9697
9698 Vals.push_back(ScalarLoad.getValue(0));
9699 LoadChains.push_back(ScalarLoad.getValue(1));
9700 }
9701
9702 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
9703 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9704
9705 return std::make_pair(Value, NewChain);
9706}
9707
9709 SelectionDAG &DAG) const {
9710 SDLoc SL(ST);
9711
9712 SDValue Chain = ST->getChain();
9713 SDValue BasePtr = ST->getBasePtr();
9714 SDValue Value = ST->getValue();
9715 EVT StVT = ST->getMemoryVT();
9716
9717 if (StVT.isScalableVector())
9718 report_fatal_error("Cannot scalarize scalable vector stores");
9719
9720 // The type of the data we want to save
9721 EVT RegVT = Value.getValueType();
9722 EVT RegSclVT = RegVT.getScalarType();
9723
9724 // The type of data as saved in memory.
9725 EVT MemSclVT = StVT.getScalarType();
9726
9727 unsigned NumElem = StVT.getVectorNumElements();
9728
9729 // A vector must always be stored in memory as-is, i.e. without any padding
9730 // between the elements, since various code depend on it, e.g. in the
9731 // handling of a bitcast of a vector type to int, which may be done with a
9732 // vector store followed by an integer load. A vector that does not have
9733 // elements that are byte-sized must therefore be stored as an integer
9734 // built out of the extracted vector elements.
9735 if (!MemSclVT.isByteSized()) {
9736 unsigned NumBits = StVT.getSizeInBits();
9737 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
9738
9739 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
9740
9741 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9742 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
9743 DAG.getVectorIdxConstant(Idx, SL));
9744 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
9745 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
9746 unsigned ShiftIntoIdx =
9747 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9748 SDValue ShiftAmount =
9749 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
9750 SDValue ShiftedElt =
9751 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
9752 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
9753 }
9754
9755 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
9756 ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
9757 ST->getAAInfo());
9758 }
9759
9760 // Store Stride in bytes
9761 unsigned Stride = MemSclVT.getSizeInBits() / 8;
9762 assert(Stride && "Zero stride!");
9763 // Extract each of the elements from the original vector and save them into
9764 // memory individually.
9766 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9767 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
9768 DAG.getVectorIdxConstant(Idx, SL));
9769
9770 SDValue Ptr =
9771 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
9772
9773 // This scalar TruncStore may be illegal, but we legalize it later.
9774 SDValue Store = DAG.getTruncStore(
9775 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
9776 MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
9777 ST->getAAInfo());
9778
9779 Stores.push_back(Store);
9780 }
9781
9782 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
9783}
9784
9785std::pair<SDValue, SDValue>
9787 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
9788 "unaligned indexed loads not implemented!");
9789 SDValue Chain = LD->getChain();
9790 SDValue Ptr = LD->getBasePtr();
9791 EVT VT = LD->getValueType(0);
9792 EVT LoadedVT = LD->getMemoryVT();
9793 SDLoc dl(LD);
9794 auto &MF = DAG.getMachineFunction();
9795
9796 if (VT.isFloatingPoint() || VT.isVector()) {
9797 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
9798 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
9799 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
9800 LoadedVT.isVector()) {
9801 // Scalarize the load and let the individual components be handled.
9802 return scalarizeVectorLoad(LD, DAG);
9803 }
9804
9805 // Expand to a (misaligned) integer load of the same size,
9806 // then bitconvert to floating point or vector.
9807 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
9808 LD->getMemOperand());
9809 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
9810 if (LoadedVT != VT)
9811 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
9812 ISD::ANY_EXTEND, dl, VT, Result);
9813
9814 return std::make_pair(Result, newLoad.getValue(1));
9815 }
9816
9817 // Copy the value to a (aligned) stack slot using (unaligned) integer
9818 // loads and stores, then do a (aligned) load from the stack slot.
9819 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
9820 unsigned LoadedBytes = LoadedVT.getStoreSize();
9821 unsigned RegBytes = RegVT.getSizeInBits() / 8;
9822 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
9823
9824 // Make sure the stack slot is also aligned for the register type.
9825 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
9826 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
9828 SDValue StackPtr = StackBase;
9829 unsigned Offset = 0;
9830
9831 EVT PtrVT = Ptr.getValueType();
9832 EVT StackPtrVT = StackPtr.getValueType();
9833
9834 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
9835 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
9836
9837 // Do all but one copies using the full register width.
9838 for (unsigned i = 1; i < NumRegs; i++) {
9839 // Load one integer register's worth from the original location.
9840 SDValue Load = DAG.getLoad(
9841 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
9842 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
9843 LD->getAAInfo());
9844 // Follow the load with a store to the stack slot. Remember the store.
9845 Stores.push_back(DAG.getStore(
9846 Load.getValue(1), dl, Load, StackPtr,
9847 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
9848 // Increment the pointers.
9849 Offset += RegBytes;
9850
9851 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
9852 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
9853 }
9854
9855 // The last copy may be partial. Do an extending load.
9856 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
9857 8 * (LoadedBytes - Offset));
9858 SDValue Load =
9859 DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
9860 LD->getPointerInfo().getWithOffset(Offset), MemVT,
9861 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
9862 LD->getAAInfo());
9863 // Follow the load with a store to the stack slot. Remember the store.
9864 // On big-endian machines this requires a truncating store to ensure
9865 // that the bits end up in the right place.
9866 Stores.push_back(DAG.getTruncStore(
9867 Load.getValue(1), dl, Load, StackPtr,
9868 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
9869
9870 // The order of the stores doesn't matter - say it with a TokenFactor.
9871 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9872
9873 // Finally, perform the original load only redirected to the stack slot.
9874 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
9875 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
9876 LoadedVT);
9877
9878 // Callers expect a MERGE_VALUES node.
9879 return std::make_pair(Load, TF);
9880 }
9881
9882 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
9883 "Unaligned load of unsupported type.");
9884
9885 // Compute the new VT that is half the size of the old one. This is an
9886 // integer MVT.
9887 unsigned NumBits = LoadedVT.getSizeInBits();
9888 EVT NewLoadedVT;
9889 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
9890 NumBits >>= 1;
9891
9892 Align Alignment = LD->getOriginalAlign();
9893 unsigned IncrementSize = NumBits / 8;
9894 ISD::LoadExtType HiExtType = LD->getExtensionType();
9895
9896 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
9897 if (HiExtType == ISD::NON_EXTLOAD)
9898 HiExtType = ISD::ZEXTLOAD;
9899
9900 // Load the value in two parts
9901 SDValue Lo, Hi;
9902 if (DAG.getDataLayout().isLittleEndian()) {
9903 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
9904 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9905 LD->getAAInfo());
9906
9907 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
9908 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
9909 LD->getPointerInfo().getWithOffset(IncrementSize),
9910 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9911 LD->getAAInfo());
9912 } else {
9913 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
9914 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9915 LD->getAAInfo());
9916
9917 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
9918 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
9919 LD->getPointerInfo().getWithOffset(IncrementSize),
9920 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9921 LD->getAAInfo());
9922 }
9923
9924 // aggregate the two parts
9925 SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
9926 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
9927 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
9928
9929 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
9930 Hi.getValue(1));
9931
9932 return std::make_pair(Result, TF);
9933}
9934
9936 SelectionDAG &DAG) const {
9937 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
9938 "unaligned indexed stores not implemented!");
9939 SDValue Chain = ST->getChain();
9940 SDValue Ptr = ST->getBasePtr();
9941 SDValue Val = ST->getValue();
9942 EVT VT = Val.getValueType();
9943 Align Alignment = ST->getOriginalAlign();
9944 auto &MF = DAG.getMachineFunction();
9945 EVT StoreMemVT = ST->getMemoryVT();
9946
9947 SDLoc dl(ST);
9948 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
9949 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
9950 if (isTypeLegal(intVT)) {
9951 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
9952 StoreMemVT.isVector()) {
9953 // Scalarize the store and let the individual components be handled.
9954 SDValue Result = scalarizeVectorStore(ST, DAG);
9955 return Result;
9956 }
9957 // Expand to a bitconvert of the value to the integer type of the
9958 // same size, then a (misaligned) int store.
9959 // FIXME: Does not handle truncating floating point stores!
9960 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
9961 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
9962 Alignment, ST->getMemOperand()->getFlags());
9963 return Result;
9964 }
9965 // Do a (aligned) store to a stack slot, then copy from the stack slot
9966 // to the final destination using (unaligned) integer loads and stores.
9967 MVT RegVT = getRegisterType(
9968 *DAG.getContext(),
9969 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
9970 EVT PtrVT = Ptr.getValueType();
9971 unsigned StoredBytes = StoreMemVT.getStoreSize();
9972 unsigned RegBytes = RegVT.getSizeInBits() / 8;
9973 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
9974
9975 // Make sure the stack slot is also aligned for the register type.
9976 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
9977 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
9978
9979 // Perform the original store, only redirected to the stack slot.
9980 SDValue Store = DAG.getTruncStore(
9981 Chain, dl, Val, StackPtr,
9982 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
9983
9984 EVT StackPtrVT = StackPtr.getValueType();
9985
9986 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
9987 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
9989 unsigned Offset = 0;
9990
9991 // Do all but one copies using the full register width.
9992 for (unsigned i = 1; i < NumRegs; i++) {
9993 // Load one integer register's worth from the stack slot.
9994 SDValue Load = DAG.getLoad(
9995 RegVT, dl, Store, StackPtr,
9996 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
9997 // Store it to the final location. Remember the store.
9998 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
9999 ST->getPointerInfo().getWithOffset(Offset),
10000 ST->getOriginalAlign(),
10001 ST->getMemOperand()->getFlags()));
10002 // Increment the pointers.
10003 Offset += RegBytes;
10004 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10005 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10006 }
10007
10008 // The last store may be partial. Do a truncating store. On big-endian
10009 // machines this requires an extending load from the stack slot to ensure
10010 // that the bits are in the right place.
10011 EVT LoadMemVT =
10012 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
10013
10014 // Load from the stack slot.
10015 SDValue Load = DAG.getExtLoad(
10016 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
10017 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
10018
10019 Stores.push_back(
10020 DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
10021 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
10022 ST->getOriginalAlign(),
10023 ST->getMemOperand()->getFlags(), ST->getAAInfo()));
10024 // The order of the stores doesn't matter - say it with a TokenFactor.
10025 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10026 return Result;
10027 }
10028
10029 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10030 "Unaligned store of unknown type.");
10031 // Get the half-size VT
10032 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
10033 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10034 unsigned IncrementSize = NumBits / 8;
10035
10036 // Divide the stored value in two parts.
10037 SDValue ShiftAmount =
10038 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
10039 SDValue Lo = Val;
10040 // If Val is a constant, replace the upper bits with 0. The SRL will constant
10041 // fold and not use the upper bits. A smaller constant may be easier to
10042 // materialize.
10043 if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
10044 Lo = DAG.getNode(
10045 ISD::AND, dl, VT, Lo,
10046 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
10047 VT));
10048 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
10049
10050 // Store the two parts
10051 SDValue Store1, Store2;
10052 Store1 = DAG.getTruncStore(Chain, dl,
10053 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10054 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
10055 ST->getMemOperand()->getFlags());
10056
10057 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10058 Store2 = DAG.getTruncStore(
10059 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10060 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
10061 ST->getMemOperand()->getFlags(), ST->getAAInfo());
10062
10063 SDValue Result =
10064 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
10065 return Result;
10066}
10067
10068SDValue
10070 const SDLoc &DL, EVT DataVT,
10071 SelectionDAG &DAG,
10072 bool IsCompressedMemory) const {
10073 SDValue Increment;
10074 EVT AddrVT = Addr.getValueType();
10075 EVT MaskVT = Mask.getValueType();
10076 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10077 "Incompatible types of Data and Mask");
10078 if (IsCompressedMemory) {
10079 if (DataVT.isScalableVector())
10081 "Cannot currently handle compressed memory with scalable vectors");
10082 // Incrementing the pointer according to number of '1's in the mask.
10083 EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
10084 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
10085 if (MaskIntVT.getSizeInBits() < 32) {
10086 MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
10087 MaskIntVT = MVT::i32;
10088 }
10089
10090 // Count '1's with POPCNT.
10091 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
10092 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
10093 // Scale is an element size in bytes.
10094 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
10095 AddrVT);
10096 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
10097 } else if (DataVT.isScalableVector()) {
10098 Increment = DAG.getVScale(DL, AddrVT,
10099 APInt(AddrVT.getFixedSizeInBits(),
10100 DataVT.getStoreSize().getKnownMinValue()));
10101 } else
10102 Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
10103
10104 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
10105}
10106
10108 EVT VecVT, const SDLoc &dl,
10109 ElementCount SubEC) {
10110 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10111 "Cannot index a scalable vector within a fixed-width vector");
10112
10113 unsigned NElts = VecVT.getVectorMinNumElements();
10114 unsigned NumSubElts = SubEC.getKnownMinValue();
10115 EVT IdxVT = Idx.getValueType();
10116
10117 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10118 // If this is a constant index and we know the value plus the number of the
10119 // elements in the subvector minus one is less than the minimum number of
10120 // elements then it's safe to return Idx.
10121 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
10122 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
10123 return Idx;
10124 SDValue VS =
10125 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
10126 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
10127 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
10128 DAG.getConstant(NumSubElts, dl, IdxVT));
10129 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
10130 }
10131 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
10132 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
10133 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
10134 DAG.getConstant(Imm, dl, IdxVT));
10135 }
10136 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
10137 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
10138 DAG.getConstant(MaxIndex, dl, IdxVT));
10139}
10140
10142 SDValue VecPtr, EVT VecVT,
10143 SDValue Index) const {
10144 return getVectorSubVecPointer(
10145 DAG, VecPtr, VecVT,
10147 Index);
10148}
10149
10151 SDValue VecPtr, EVT VecVT,
10152 EVT SubVecVT,
10153 SDValue Index) const {
10154 SDLoc dl(Index);
10155 // Make sure the index type is big enough to compute in.
10156 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
10157
10158 EVT EltVT = VecVT.getVectorElementType();
10159
10160 // Calculate the element offset and add it to the pointer.
10161 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
10162 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
10163 "Converting bits to bytes lost precision");
10164 assert(SubVecVT.getVectorElementType() == EltVT &&
10165 "Sub-vector must be a vector with matching element type");
10166 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
10167 SubVecVT.getVectorElementCount());
10168
10169 EVT IdxVT = Index.getValueType();
10170 if (SubVecVT.isScalableVector())
10171 Index =
10172 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10173 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
10174
10175 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10176 DAG.getConstant(EltSize, dl, IdxVT));
10177 return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
10178}
10179
10180//===----------------------------------------------------------------------===//
10181// Implementation of Emulated TLS Model
10182//===----------------------------------------------------------------------===//
10183
10185 SelectionDAG &DAG) const {
10186 // Access to address of TLS varialbe xyz is lowered to a function call:
10187 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10188 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10189 PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
10190 SDLoc dl(GA);
10191
10192 ArgListTy Args;
10193 ArgListEntry Entry;
10194 std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
10195 Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
10196 StringRef EmuTlsVarName(NameString);
10197 GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
10198 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10199 Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
10200 Entry.Ty = VoidPtrType;
10201 Args.push_back(Entry);
10202
10203 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
10204
10206 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10207 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
10208 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10209
10210 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10211 // At last for X86 targets, maybe good for other targets too?
10213 MFI.setAdjustsStack(true); // Is this only for X86 target?
10214 MFI.setHasCalls(true);
10215
10216 assert((GA->getOffset() == 0) &&
10217 "Emulated TLS must have zero offset in GlobalAddressSDNode");
10218 return CallResult.first;
10219}
10220
10222 SelectionDAG &DAG) const {
10223 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10224 if (!isCtlzFast())
10225 return SDValue();
10226 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10227 SDLoc dl(Op);
10228 if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
10229 EVT VT = Op.getOperand(0).getValueType();
10230 SDValue Zext = Op.getOperand(0);
10231 if (VT.bitsLT(MVT::i32)) {
10232 VT = MVT::i32;
10233 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
10234 }
10235 unsigned Log2b = Log2_32(VT.getSizeInBits());
10236 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
10237 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
10238 DAG.getConstant(Log2b, dl, MVT::i32));
10239 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
10240 }
10241 return SDValue();
10242}
10243
10245 SDValue Op0 = Node->getOperand(0);
10246 SDValue Op1 = Node->getOperand(1);
10247 EVT VT = Op0.getValueType();
10248 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10249 unsigned Opcode = Node->getOpcode();
10250 SDLoc DL(Node);
10251
10252 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10253 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
10255 Op0 = DAG.getFreeze(Op0);
10256 SDValue Zero = DAG.getConstant(0, DL, VT);
10257 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10258 DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
10259 }
10260
10261 // umin(x,y) -> sub(x,usubsat(x,y))
10262 // TODO: Missing freeze(Op0)?
10263 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
10265 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10266 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
10267 }
10268
10269 // umax(x,y) -> add(x,usubsat(y,x))
10270 // TODO: Missing freeze(Op0)?
10271 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
10273 return DAG.getNode(ISD::ADD, DL, VT, Op0,
10274 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
10275 }
10276
10277 // FIXME: Should really try to split the vector in case it's legal on a
10278 // subvector.
10280 return DAG.UnrollVectorOp(Node);
10281
10282 // Attempt to find an existing SETCC node that we can reuse.
10283 // TODO: Do we need a generic doesSETCCNodeExist?
10284 // TODO: Missing freeze(Op0)/freeze(Op1)?
10285 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10286 ISD::CondCode PrefCommuteCC,
10287 ISD::CondCode AltCommuteCC) {
10288 SDVTList BoolVTList = DAG.getVTList(BoolVT);
10289 for (ISD::CondCode CC : {PrefCC, AltCC}) {
10290 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10291 {Op0, Op1, DAG.getCondCode(CC)})) {
10292 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10293 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10294 }
10295 }
10296 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10297 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10298 {Op0, Op1, DAG.getCondCode(CC)})) {
10299 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10300 return DAG.getSelect(DL, VT, Cond, Op1, Op0);
10301 }
10302 }
10303 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
10304 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10305 };
10306
10307 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10308 // -> Y = (A < B) ? B : A
10309 // -> Y = (A >= B) ? A : B
10310 // -> Y = (A <= B) ? B : A
10311 switch (Opcode) {
10312 case ISD::SMAX:
10313 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10314 case ISD::SMIN:
10315 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10316 case ISD::UMAX:
10317 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10318 case ISD::UMIN:
10319 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10320 }
10321
10322 llvm_unreachable("How did we get here?");
10323}
10324
10326 unsigned Opcode = Node->getOpcode();
10327 SDValue LHS = Node->getOperand(0);
10328 SDValue RHS = Node->getOperand(1);
10329 EVT VT = LHS.getValueType();
10330 SDLoc dl(Node);
10331
10332 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10333 assert(VT.isInteger() && "Expected operands to be integers");
10334
10335 // usub.sat(a, b) -> umax(a, b) - b
10336 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
10337 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
10338 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
10339 }
10340
10341 // uadd.sat(a, b) -> umin(a, ~b) + b
10342 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
10343 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
10344 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
10345 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
10346 }
10347
10348 unsigned OverflowOp;
10349 switch (Opcode) {
10350 case ISD::SADDSAT:
10351 OverflowOp = ISD::SADDO;
10352 break;
10353 case ISD::UADDSAT:
10354 OverflowOp = ISD::UADDO;
10355 break;
10356 case ISD::SSUBSAT:
10357 OverflowOp = ISD::SSUBO;
10358 break;
10359 case ISD::USUBSAT:
10360 OverflowOp = ISD::USUBO;
10361 break;
10362 default:
10363 llvm_unreachable("Expected method to receive signed or unsigned saturation "
10364 "addition or subtraction node.");
10365 }
10366
10367 // FIXME: Should really try to split the vector in case it's legal on a
10368 // subvector.
10370 return DAG.UnrollVectorOp(Node);
10371
10372 unsigned BitWidth = LHS.getScalarValueSizeInBits();
10373 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10374 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10375 SDValue SumDiff = Result.getValue(0);
10376 SDValue Overflow = Result.getValue(1);
10377 SDValue Zero = DAG.getConstant(0, dl, VT);
10378 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
10379
10380 if (Opcode == ISD::UADDSAT) {
10382 // (LHS + RHS) | OverflowMask
10383 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10384 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
10385 }
10386 // Overflow ? 0xffff.... : (LHS + RHS)
10387 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
10388 }
10389
10390 if (Opcode == ISD::USUBSAT) {
10392 // (LHS - RHS) & ~OverflowMask
10393 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10394 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
10395 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
10396 }
10397 // Overflow ? 0 : (LHS - RHS)
10398 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
10399 }
10400
10401 if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10404
10405 KnownBits KnownLHS = DAG.computeKnownBits(LHS);
10406 KnownBits KnownRHS = DAG.computeKnownBits(RHS);
10407
10408 // If either of the operand signs are known, then they are guaranteed to
10409 // only saturate in one direction. If non-negative they will saturate
10410 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10411 //
10412 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10413 // sign of 'y' has to be flipped.
10414
10415 bool LHSIsNonNegative = KnownLHS.isNonNegative();
10416 bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10417 : KnownRHS.isNegative();
10418 if (LHSIsNonNegative || RHSIsNonNegative) {
10419 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10420 return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
10421 }
10422
10423 bool LHSIsNegative = KnownLHS.isNegative();
10424 bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10425 : KnownRHS.isNonNegative();
10426 if (LHSIsNegative || RHSIsNegative) {
10427 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10428 return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
10429 }
10430 }
10431
10432 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10434 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10435 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
10436 DAG.getConstant(BitWidth - 1, dl, VT));
10437 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
10438 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
10439}
10440
10442 unsigned Opcode = Node->getOpcode();
10443 SDValue LHS = Node->getOperand(0);
10444 SDValue RHS = Node->getOperand(1);
10445 EVT VT = LHS.getValueType();
10446 EVT ResVT = Node->getValueType(0);
10447 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10448 SDLoc dl(Node);
10449
10450 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
10451 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
10452 SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
10453 SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
10454
10455 // We can't perform arithmetic on i1 values. Extending them would
10456 // probably result in worse codegen, so let's just use two selects instead.
10457 // Some targets are also just better off using selects rather than subtraction
10458 // because one of the conditions can be merged with one of the selects.
10459 // And finally, if we don't know the contents of high bits of a boolean value
10460 // we can't perform any arithmetic either.
10461 if (shouldExpandCmpUsingSelects() || BoolVT.getScalarSizeInBits() == 1 ||
10463 SDValue SelectZeroOrOne =
10464 DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
10465 DAG.getConstant(0, dl, ResVT));
10466 return DAG.getSelect(dl, ResVT, IsLT, DAG.getConstant(-1, dl, ResVT),
10467 SelectZeroOrOne);
10468 }
10469
10471 std::swap(IsGT, IsLT);
10472 return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
10473 ResVT);
10474}
10475
10477 unsigned Opcode = Node->getOpcode();
10478 bool IsSigned = Opcode == ISD::SSHLSAT;
10479 SDValue LHS = Node->getOperand(0);
10480 SDValue RHS = Node->getOperand(1);
10481 EVT VT = LHS.getValueType();
10482 SDLoc dl(Node);
10483
10484 assert((Node->getOpcode() == ISD::SSHLSAT ||
10485 Node->getOpcode() == ISD::USHLSAT) &&
10486 "Expected a SHLSAT opcode");
10487 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10488 assert(VT.isInteger() && "Expected operands to be integers");
10489
10491 return DAG.UnrollVectorOp(Node);
10492
10493 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
10494
10495 unsigned BW = VT.getScalarSizeInBits();
10496 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10497 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
10498 SDValue Orig =
10499 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
10500
10501 SDValue SatVal;
10502 if (IsSigned) {
10503 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
10504 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
10505 SDValue Cond =
10506 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
10507 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
10508 } else {
10509 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
10510 }
10511 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
10512 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
10513}
10514
10516 bool Signed, EVT WideVT,
10517 const SDValue LL, const SDValue LH,
10518 const SDValue RL, const SDValue RH,
10519 SDValue &Lo, SDValue &Hi) const {
10520 // We can fall back to a libcall with an illegal type for the MUL if we
10521 // have a libcall big enough.
10522 // Also, we can fall back to a division in some cases, but that's a big
10523 // performance hit in the general case.
10524 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10525 if (WideVT == MVT::i16)
10526 LC = RTLIB::MUL_I16;
10527 else if (WideVT == MVT::i32)
10528 LC = RTLIB::MUL_I32;
10529 else if (WideVT == MVT::i64)
10530 LC = RTLIB::MUL_I64;
10531 else if (WideVT == MVT::i128)
10532 LC = RTLIB::MUL_I128;
10533
10534 if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
10535 // We'll expand the multiplication by brute force because we have no other
10536 // options. This is a trivially-generalized version of the code from
10537 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
10538 // 4.3.1).
10539 EVT VT = LL.getValueType();
10540 unsigned Bits = VT.getSizeInBits();
10541 unsigned HalfBits = Bits >> 1;
10542 SDValue Mask =
10543 DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
10544 SDValue LLL = DAG.getNode(ISD::AND, dl, VT, LL, Mask);
10545 SDValue RLL = DAG.getNode(ISD::AND, dl, VT, RL, Mask);
10546
10547 SDValue T = DAG.getNode(ISD::MUL, dl, VT, LLL, RLL);
10548 SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
10549
10550 SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
10551 SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
10552 SDValue LLH = DAG.getNode(ISD::SRL, dl, VT, LL, Shift);
10553 SDValue RLH = DAG.getNode(ISD::SRL, dl, VT, RL, Shift);
10554
10555 SDValue U = DAG.getNode(ISD::ADD, dl, VT,
10556 DAG.getNode(ISD::MUL, dl, VT, LLH, RLL), TH);
10557 SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
10558 SDValue UH = DAG.getNode(ISD::SRL, dl, VT, U, Shift);
10559
10560 SDValue V = DAG.getNode(ISD::ADD, dl, VT,
10561 DAG.getNode(ISD::MUL, dl, VT, LLL, RLH), UL);
10562 SDValue VH = DAG.getNode(ISD::SRL, dl, VT, V, Shift);
10563
10564 SDValue W =
10565 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LLH, RLH),
10566 DAG.getNode(ISD::ADD, dl, VT, UH, VH));
10567 Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
10568 DAG.getNode(ISD::SHL, dl, VT, V, Shift));
10569
10570 Hi = DAG.getNode(ISD::ADD, dl, VT, W,
10571 DAG.getNode(ISD::ADD, dl, VT,
10572 DAG.getNode(ISD::MUL, dl, VT, RH, LL),
10573 DAG.getNode(ISD::MUL, dl, VT, RL, LH)));
10574 } else {
10575 // Attempt a libcall.
10576 SDValue Ret;
10578 CallOptions.setSExt(Signed);
10579 CallOptions.setIsPostTypeLegalization(true);
10580 if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
10581 // Halves of WideVT are packed into registers in different order
10582 // depending on platform endianness. This is usually handled by
10583 // the C calling convention, but we can't defer to it in
10584 // the legalizer.
10585 SDValue Args[] = {LL, LH, RL, RH};
10586 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10587 } else {
10588 SDValue Args[] = {LH, LL, RH, RL};
10589 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10590 }
10591 assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10592 "Ret value is a collection of constituent nodes holding result.");
10593 if (DAG.getDataLayout().isLittleEndian()) {
10594 // Same as above.
10595 Lo = Ret.getOperand(0);
10596 Hi = Ret.getOperand(1);
10597 } else {
10598 Lo = Ret.getOperand(1);
10599 Hi = Ret.getOperand(0);
10600 }
10601 }
10602}
10603
10605 bool Signed, const SDValue LHS,
10606 const SDValue RHS, SDValue &Lo,
10607 SDValue &Hi) const {
10608 EVT VT = LHS.getValueType();
10609 assert(RHS.getValueType() == VT && "Mismatching operand types");
10610
10611 SDValue HiLHS;
10612 SDValue HiRHS;
10613 if (Signed) {
10614 // The high part is obtained by SRA'ing all but one of the bits of low
10615 // part.
10616 unsigned LoSize = VT.getFixedSizeInBits();
10617 HiLHS = DAG.getNode(
10618 ISD::SRA, dl, VT, LHS,
10619 DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10620 HiRHS = DAG.getNode(
10621 ISD::SRA, dl, VT, RHS,
10622 DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10623 } else {
10624 HiLHS = DAG.getConstant(0, dl, VT);
10625 HiRHS = DAG.getConstant(0, dl, VT);
10626 }
10627 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
10628 forceExpandWideMUL(DAG, dl, Signed, WideVT, LHS, HiLHS, RHS, HiRHS, Lo, Hi);
10629}
10630
10631SDValue
10633 assert((Node->getOpcode() == ISD::SMULFIX ||
10634 Node->getOpcode() == ISD::UMULFIX ||
10635 Node->getOpcode() == ISD::SMULFIXSAT ||
10636 Node->getOpcode() == ISD::UMULFIXSAT) &&
10637 "Expected a fixed point multiplication opcode");
10638
10639 SDLoc dl(Node);
10640 SDValue LHS = Node->getOperand(0);
10641 SDValue RHS = Node->getOperand(1);
10642 EVT VT = LHS.getValueType();
10643 unsigned Scale = Node->getConstantOperandVal(2);
10644 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
10645 Node->getOpcode() == ISD::UMULFIXSAT);
10646 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
10647 Node->getOpcode() == ISD::SMULFIXSAT);
10648 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10649 unsigned VTSize = VT.getScalarSizeInBits();
10650
10651 if (!Scale) {
10652 // [us]mul.fix(a, b, 0) -> mul(a, b)
10653 if (!Saturating) {
10655 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10656 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
10657 SDValue Result =
10658 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10659 SDValue Product = Result.getValue(0);
10660 SDValue Overflow = Result.getValue(1);
10661 SDValue Zero = DAG.getConstant(0, dl, VT);
10662
10663 APInt MinVal = APInt::getSignedMinValue(VTSize);
10664 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
10665 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10666 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10667 // Xor the inputs, if resulting sign bit is 0 the product will be
10668 // positive, else negative.
10669 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
10670 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
10671 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
10672 return DAG.getSelect(dl, VT, Overflow, Result, Product);
10673 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
10674 SDValue Result =
10675 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10676 SDValue Product = Result.getValue(0);
10677 SDValue Overflow = Result.getValue(1);
10678
10679 APInt MaxVal = APInt::getMaxValue(VTSize);
10680 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10681 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
10682 }
10683 }
10684
10685 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
10686 "Expected scale to be less than the number of bits if signed or at "
10687 "most the number of bits if unsigned.");
10688 assert(LHS.getValueType() == RHS.getValueType() &&
10689 "Expected both operands to be the same type");
10690
10691 // Get the upper and lower bits of the result.
10692 SDValue Lo, Hi;
10693 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10694 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
10695 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
10696 if (isOperationLegalOrCustom(LoHiOp, VT)) {
10697 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
10698 Lo = Result.getValue(0);
10699 Hi = Result.getValue(1);
10700 } else if (isOperationLegalOrCustom(HiOp, VT)) {
10701 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10702 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
10703 } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
10704 // Try for a multiplication using a wider type.
10705 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10706 SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
10707 SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
10708 SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
10709 Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
10710 SDValue Shifted =
10711 DAG.getNode(ISD::SRA, dl, WideVT, Res,
10712 DAG.getShiftAmountConstant(VTSize, WideVT, dl));
10713 Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
10714 } else if (VT.isVector()) {
10715 return SDValue();
10716 } else {
10717 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
10718 }
10719
10720 if (Scale == VTSize)
10721 // Result is just the top half since we'd be shifting by the width of the
10722 // operand. Overflow impossible so this works for both UMULFIX and
10723 // UMULFIXSAT.
10724 return Hi;
10725
10726 // The result will need to be shifted right by the scale since both operands
10727 // are scaled. The result is given to us in 2 halves, so we only want part of
10728 // both in the result.
10729 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
10730 DAG.getShiftAmountConstant(Scale, VT, dl));
10731 if (!Saturating)
10732 return Result;
10733
10734 if (!Signed) {
10735 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
10736 // widened multiplication) aren't all zeroes.
10737
10738 // Saturate to max if ((Hi >> Scale) != 0),
10739 // which is the same as if (Hi > ((1 << Scale) - 1))
10740 APInt MaxVal = APInt::getMaxValue(VTSize);
10741 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
10742 dl, VT);
10743 Result = DAG.getSelectCC(dl, Hi, LowMask,
10744 DAG.getConstant(MaxVal, dl, VT), Result,
10745 ISD::SETUGT);
10746
10747 return Result;
10748 }
10749
10750 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
10751 // widened multiplication) aren't all ones or all zeroes.
10752
10753 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
10754 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
10755
10756 if (Scale == 0) {
10757 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
10758 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
10759 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
10760 // Saturated to SatMin if wide product is negative, and SatMax if wide
10761 // product is positive ...
10762 SDValue Zero = DAG.getConstant(0, dl, VT);
10763 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
10764 ISD::SETLT);
10765 // ... but only if we overflowed.
10766 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
10767 }
10768
10769 // We handled Scale==0 above so all the bits to examine is in Hi.
10770
10771 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
10772 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
10773 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
10774 dl, VT);
10775 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
10776 // Saturate to min if (Hi >> (Scale - 1)) < -1),
10777 // which is the same as if (HI < (-1 << (Scale - 1))
10778 SDValue HighMask =
10779 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
10780 dl, VT);
10781 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
10782 return Result;
10783}
10784
10785SDValue
10787 SDValue LHS, SDValue RHS,
10788 unsigned Scale, SelectionDAG &DAG) const {
10789 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
10790 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
10791 "Expected a fixed point division opcode");
10792
10793 EVT VT = LHS.getValueType();
10794 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
10795 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
10796 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10797
10798 // If there is enough room in the type to upscale the LHS or downscale the
10799 // RHS before the division, we can perform it in this type without having to
10800 // resize. For signed operations, the LHS headroom is the number of
10801 // redundant sign bits, and for unsigned ones it is the number of zeroes.
10802 // The headroom for the RHS is the number of trailing zeroes.
10803 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
10805 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
10806
10807 // For signed saturating operations, we need to be able to detect true integer
10808 // division overflow; that is, when you have MIN / -EPS. However, this
10809 // is undefined behavior and if we emit divisions that could take such
10810 // values it may cause undesired behavior (arithmetic exceptions on x86, for
10811 // example).
10812 // Avoid this by requiring an extra bit so that we never get this case.
10813 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
10814 // signed saturating division, we need to emit a whopping 32-bit division.
10815 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
10816 return SDValue();
10817
10818 unsigned LHSShift = std::min(LHSLead, Scale);
10819 unsigned RHSShift = Scale - LHSShift;
10820
10821 // At this point, we know that if we shift the LHS up by LHSShift and the
10822 // RHS down by RHSShift, we can emit a regular division with a final scaling
10823 // factor of Scale.
10824
10825 if (LHSShift)
10826 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
10827 DAG.getShiftAmountConstant(LHSShift, VT, dl));
10828 if (RHSShift)
10829 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
10830 DAG.getShiftAmountConstant(RHSShift, VT, dl));
10831
10832 SDValue Quot;
10833 if (Signed) {
10834 // For signed operations, if the resulting quotient is negative and the
10835 // remainder is nonzero, subtract 1 from the quotient to round towards
10836 // negative infinity.
10837 SDValue Rem;
10838 // FIXME: Ideally we would always produce an SDIVREM here, but if the
10839 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
10840 // we couldn't just form a libcall, but the type legalizer doesn't do it.
10841 if (isTypeLegal(VT) &&
10843 Quot = DAG.getNode(ISD::SDIVREM, dl,
10844 DAG.getVTList(VT, VT),
10845 LHS, RHS);
10846 Rem = Quot.getValue(1);
10847 Quot = Quot.getValue(0);
10848 } else {
10849 Quot = DAG.getNode(ISD::SDIV, dl, VT,
10850 LHS, RHS);
10851 Rem = DAG.getNode(ISD::SREM, dl, VT,
10852 LHS, RHS);
10853 }
10854 SDValue Zero = DAG.getConstant(0, dl, VT);
10855 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
10856 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
10857 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
10858 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
10859 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
10860 DAG.getConstant(1, dl, VT));
10861 Quot = DAG.getSelect(dl, VT,
10862 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
10863 Sub1, Quot);
10864 } else
10865 Quot = DAG.getNode(ISD::UDIV, dl, VT,
10866 LHS, RHS);
10867
10868 return Quot;
10869}
10870
10872 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
10873 SDLoc dl(Node);
10874 SDValue LHS = Node->getOperand(0);
10875 SDValue RHS = Node->getOperand(1);
10876 bool IsAdd = Node->getOpcode() == ISD::UADDO;
10877
10878 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
10879 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
10880 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
10881 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
10882 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
10883 { LHS, RHS, CarryIn });
10884 Result = SDValue(NodeCarry.getNode(), 0);
10885 Overflow = SDValue(NodeCarry.getNode(), 1);
10886 return;
10887 }
10888
10889 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
10890 LHS.getValueType(), LHS, RHS);
10891
10892 EVT ResultType = Node->getValueType(1);
10893 EVT SetCCType = getSetCCResultType(
10894 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
10895 SDValue SetCC;
10896 if (IsAdd && isOneConstant(RHS)) {
10897 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
10898 // the live range of X. We assume comparing with 0 is cheap.
10899 // The general case (X + C) < C is not necessarily beneficial. Although we
10900 // reduce the live range of X, we may introduce the materialization of
10901 // constant C.
10902 SetCC =
10903 DAG.getSetCC(dl, SetCCType, Result,
10904 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
10905 } else if (IsAdd && isAllOnesConstant(RHS)) {
10906 // Special case: uaddo X, -1 overflows if X != 0.
10907 SetCC =
10908 DAG.getSetCC(dl, SetCCType, LHS,
10909 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
10910 } else {
10912 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
10913 }
10914 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
10915}
10916
10918 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
10919 SDLoc dl(Node);
10920 SDValue LHS = Node->getOperand(0);
10921 SDValue RHS = Node->getOperand(1);
10922 bool IsAdd = Node->getOpcode() == ISD::SADDO;
10923
10924 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
10925 LHS.getValueType(), LHS, RHS);
10926
10927 EVT ResultType = Node->getValueType(1);
10928 EVT OType = getSetCCResultType(
10929 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
10930
10931 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
10932 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
10933 if (isOperationLegal(OpcSat, LHS.getValueType())) {
10934 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
10935 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
10936 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
10937 return;
10938 }
10939
10940 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
10941
10942 // For an addition, the result should be less than one of the operands (LHS)
10943 // if and only if the other operand (RHS) is negative, otherwise there will
10944 // be overflow.
10945 // For a subtraction, the result should be less than one of the operands
10946 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
10947 // otherwise there will be overflow.
10948 SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
10949 SDValue ConditionRHS =
10950 DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
10951
10952 Overflow = DAG.getBoolExtOrTrunc(
10953 DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
10954 ResultType, ResultType);
10955}
10956
10958 SDValue &Overflow, SelectionDAG &DAG) const {
10959 SDLoc dl(Node);
10960 EVT VT = Node->getValueType(0);
10961 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10962 SDValue LHS = Node->getOperand(0);
10963 SDValue RHS = Node->getOperand(1);
10964 bool isSigned = Node->getOpcode() == ISD::SMULO;
10965
10966 // For power-of-two multiplications we can use a simpler shift expansion.
10967 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
10968 const APInt &C = RHSC->getAPIntValue();
10969 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
10970 if (C.isPowerOf2()) {
10971 // smulo(x, signed_min) is same as umulo(x, signed_min).
10972 bool UseArithShift = isSigned && !C.isMinSignedValue();
10973 SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
10974 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
10975 Overflow = DAG.getSetCC(dl, SetCCVT,
10976 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
10977 dl, VT, Result, ShiftAmt),
10978 LHS, ISD::SETNE);
10979 return true;
10980 }
10981 }
10982
10983 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
10984 if (VT.isVector())
10985 WideVT =
10987
10988 SDValue BottomHalf;
10989 SDValue TopHalf;
10990 static const unsigned Ops[2][3] =
10993 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
10994 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10995 TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
10996 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
10997 BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
10998 RHS);
10999 TopHalf = BottomHalf.getValue(1);
11000 } else if (isTypeLegal(WideVT)) {
11001 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
11002 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
11003 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
11004 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
11005 SDValue ShiftAmt =
11006 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
11007 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
11008 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
11009 } else {
11010 if (VT.isVector())
11011 return false;
11012
11013 forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
11014 }
11015
11016 Result = BottomHalf;
11017 if (isSigned) {
11018 SDValue ShiftAmt = DAG.getShiftAmountConstant(
11019 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
11020 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
11021 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
11022 } else {
11023 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
11024 DAG.getConstant(0, dl, VT), ISD::SETNE);
11025 }
11026
11027 // Truncate the result if SetCC returns a larger type than needed.
11028 EVT RType = Node->getValueType(1);
11029 if (RType.bitsLT(Overflow.getValueType()))
11030 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
11031
11032 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11033 "Unexpected result type for S/UMULO legalization");
11034 return true;
11035}
11036
11038 SDLoc dl(Node);
11039 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11040 SDValue Op = Node->getOperand(0);
11041 EVT VT = Op.getValueType();
11042
11043 if (VT.isScalableVector())
11045 "Expanding reductions for scalable vectors is undefined.");
11046
11047 // Try to use a shuffle reduction for power of two vectors.
11048 if (VT.isPow2VectorType()) {
11049 while (VT.getVectorNumElements() > 1) {
11050 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
11051 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
11052 break;
11053
11054 SDValue Lo, Hi;
11055 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
11056 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
11057 VT = HalfVT;
11058 }
11059 }
11060
11061 EVT EltVT = VT.getVectorElementType();
11062 unsigned NumElts = VT.getVectorNumElements();
11063
11065 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
11066
11067 SDValue Res = Ops[0];
11068 for (unsigned i = 1; i < NumElts; i++)
11069 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
11070
11071 // Result type may be wider than element type.
11072 if (EltVT != Node->getValueType(0))
11073 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
11074 return Res;
11075}
11076
11078 SDLoc dl(Node);
11079 SDValue AccOp = Node->getOperand(0);
11080 SDValue VecOp = Node->getOperand(1);
11081 SDNodeFlags Flags = Node->getFlags();
11082
11083 EVT VT = VecOp.getValueType();
11084 EVT EltVT = VT.getVectorElementType();
11085
11086 if (VT.isScalableVector())
11088 "Expanding reductions for scalable vectors is undefined.");
11089
11090 unsigned NumElts = VT.getVectorNumElements();
11091
11093 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
11094
11095 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11096
11097 SDValue Res = AccOp;
11098 for (unsigned i = 0; i < NumElts; i++)
11099 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
11100
11101 return Res;
11102}
11103
11105 SelectionDAG &DAG) const {
11106 EVT VT = Node->getValueType(0);
11107 SDLoc dl(Node);
11108 bool isSigned = Node->getOpcode() == ISD::SREM;
11109 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
11110 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
11111 SDValue Dividend = Node->getOperand(0);
11112 SDValue Divisor = Node->getOperand(1);
11113 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
11114 SDVTList VTs = DAG.getVTList(VT, VT);
11115 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
11116 return true;
11117 }
11118 if (isOperationLegalOrCustom(DivOpc, VT)) {
11119 // X % Y -> X-X/Y*Y
11120 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
11121 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
11122 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
11123 return true;
11124 }
11125 return false;
11126}
11127
11129 SelectionDAG &DAG) const {
11130 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
11131 SDLoc dl(SDValue(Node, 0));
11132 SDValue Src = Node->getOperand(0);
11133
11134 // DstVT is the result type, while SatVT is the size to which we saturate
11135 EVT SrcVT = Src.getValueType();
11136 EVT DstVT = Node->getValueType(0);
11137
11138 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
11139 unsigned SatWidth = SatVT.getScalarSizeInBits();
11140 unsigned DstWidth = DstVT.getScalarSizeInBits();
11141 assert(SatWidth <= DstWidth &&
11142 "Expected saturation width smaller than result width");
11143
11144 // Determine minimum and maximum integer values and their corresponding
11145 // floating-point values.
11146 APInt MinInt, MaxInt;
11147 if (IsSigned) {
11148 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
11149 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
11150 } else {
11151 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
11152 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
11153 }
11154
11155 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11156 // libcall emission cannot handle this. Large result types will fail.
11157 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
11158 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
11159 SrcVT = Src.getValueType();
11160 }
11161
11162 APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT));
11163 APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT));
11164
11165 APFloat::opStatus MinStatus =
11166 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
11167 APFloat::opStatus MaxStatus =
11168 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
11169 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
11170 !(MaxStatus & APFloat::opStatus::opInexact);
11171
11172 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
11173 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
11174
11175 // If the integer bounds are exactly representable as floats and min/max are
11176 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11177 // of comparisons and selects.
11178 bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
11180 if (AreExactFloatBounds && MinMaxLegal) {
11181 SDValue Clamped = Src;
11182
11183 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11184 Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
11185 // Clamp by MaxFloat from above. NaN cannot occur.
11186 Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
11187 // Convert clamped value to integer.
11188 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
11189 dl, DstVT, Clamped);
11190
11191 // In the unsigned case we're done, because we mapped NaN to MinFloat,
11192 // which will cast to zero.
11193 if (!IsSigned)
11194 return FpToInt;
11195
11196 // Otherwise, select 0 if Src is NaN.
11197 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11198 EVT SetCCVT =
11199 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11200 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11201 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
11202 }
11203
11204 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
11205 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
11206
11207 // Result of direct conversion. The assumption here is that the operation is
11208 // non-trapping and it's fine to apply it to an out-of-range value if we
11209 // select it away later.
11210 SDValue FpToInt =
11211 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
11212
11213 SDValue Select = FpToInt;
11214
11215 EVT SetCCVT =
11216 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11217
11218 // If Src ULT MinFloat, select MinInt. In particular, this also selects
11219 // MinInt if Src is NaN.
11220 SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
11221 Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
11222 // If Src OGT MaxFloat, select MaxInt.
11223 SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
11224 Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
11225
11226 // In the unsigned case we are done, because we mapped NaN to MinInt, which
11227 // is already zero.
11228 if (!IsSigned)
11229 return Select;
11230
11231 // Otherwise, select 0 if Src is NaN.
11232 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11233 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11234 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
11235}
11236
11238 const SDLoc &dl,
11239 SelectionDAG &DAG) const {
11240 EVT OperandVT = Op.getValueType();
11241 if (OperandVT.getScalarType() == ResultVT.getScalarType())
11242 return Op;
11243 EVT ResultIntVT = ResultVT.changeTypeToInteger();
11244 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11245 // can induce double-rounding which may alter the results. We can
11246 // correct for this using a trick explained in: Boldo, Sylvie, and
11247 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11248 // World Congress. 2005.
11249 unsigned BitSize = OperandVT.getScalarSizeInBits();
11250 EVT WideIntVT = OperandVT.changeTypeToInteger();
11251 SDValue OpAsInt = DAG.getBitcast(WideIntVT, Op);
11252 SDValue SignBit =
11253 DAG.getNode(ISD::AND, dl, WideIntVT, OpAsInt,
11254 DAG.getConstant(APInt::getSignMask(BitSize), dl, WideIntVT));
11255 SDValue AbsWide;
11256 if (isOperationLegalOrCustom(ISD::FABS, OperandVT)) {
11257 AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
11258 } else {
11259 SDValue ClearedSign = DAG.getNode(
11260 ISD::AND, dl, WideIntVT, OpAsInt,
11261 DAG.getConstant(APInt::getSignedMaxValue(BitSize), dl, WideIntVT));
11262 AbsWide = DAG.getBitcast(OperandVT, ClearedSign);
11263 }
11264 SDValue AbsNarrow = DAG.getFPExtendOrRound(AbsWide, dl, ResultVT);
11265 SDValue AbsNarrowAsWide = DAG.getFPExtendOrRound(AbsNarrow, dl, OperandVT);
11266
11267 // We can keep the narrow value as-is if narrowing was exact (no
11268 // rounding error), the wide value was NaN (the narrow value is also
11269 // NaN and should be preserved) or if we rounded to the odd value.
11270 SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, AbsNarrow);
11271 SDValue One = DAG.getConstant(1, dl, ResultIntVT);
11272 SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
11273 SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
11274 EVT ResultIntVTCCVT = getSetCCResultType(
11275 DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
11276 SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
11277 // The result is already odd so we don't need to do anything.
11278 SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
11279
11280 EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11281 AbsWide.getValueType());
11282 // We keep results which are exact, odd or NaN.
11283 SDValue KeepNarrow =
11284 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETUEQ);
11285 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
11286 // We morally performed a round-down if AbsNarrow is smaller than
11287 // AbsWide.
11288 SDValue NarrowIsRd =
11289 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
11290 // If the narrow value is odd or exact, pick it.
11291 // Otherwise, narrow is even and corresponds to either the rounded-up
11292 // or rounded-down value. If narrow is the rounded-down value, we want
11293 // the rounded-up value as it will be odd.
11294 SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
11295 SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
11296 Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
11297 int ShiftAmount = BitSize - ResultVT.getScalarSizeInBits();
11298 SDValue ShiftCnst = DAG.getShiftAmountConstant(ShiftAmount, WideIntVT, dl);
11299 SignBit = DAG.getNode(ISD::SRL, dl, WideIntVT, SignBit, ShiftCnst);
11300 SignBit = DAG.getNode(ISD::TRUNCATE, dl, ResultIntVT, SignBit);
11301 Op = DAG.getNode(ISD::OR, dl, ResultIntVT, Op, SignBit);
11302 return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
11303}
11304
11306 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
11307 SDValue Op = Node->getOperand(0);
11308 EVT VT = Node->getValueType(0);
11309 SDLoc dl(Node);
11310 if (VT.getScalarType() == MVT::bf16) {
11311 if (Node->getConstantOperandVal(1) == 1) {
11312 return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
11313 }
11314 EVT OperandVT = Op.getValueType();
11315 SDValue IsNaN = DAG.getSetCC(
11316 dl,
11317 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
11318 Op, Op, ISD::SETUO);
11319
11320 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11321 // can induce double-rounding which may alter the results. We can
11322 // correct for this using a trick explained in: Boldo, Sylvie, and
11323 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11324 // World Congress. 2005.
11325 EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
11326 EVT I32 = F32.changeTypeToInteger();
11327 Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
11328 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11329
11330 // Conversions should set NaN's quiet bit. This also prevents NaNs from
11331 // turning into infinities.
11332 SDValue NaN =
11333 DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
11334
11335 // Factor in the contribution of the low 16 bits.
11336 SDValue One = DAG.getConstant(1, dl, I32);
11337 SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
11338 DAG.getShiftAmountConstant(16, I32, dl));
11339 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
11340 SDValue RoundingBias =
11341 DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
11342 SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
11343
11344 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11345 // 0x80000000.
11346 Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
11347
11348 // Now that we have rounded, shift the bits into position.
11349 Op = DAG.getNode(ISD::SRL, dl, I32, Op,
11350 DAG.getShiftAmountConstant(16, I32, dl));
11351 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11352 EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
11353 Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
11354 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
11355 }
11356 return SDValue();
11357}
11358
11360 SelectionDAG &DAG) const {
11361 assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11362 assert(Node->getValueType(0).isScalableVector() &&
11363 "Fixed length vector types expected to use SHUFFLE_VECTOR!");
11364
11365 EVT VT = Node->getValueType(0);
11366 SDValue V1 = Node->getOperand(0);
11367 SDValue V2 = Node->getOperand(1);
11368 int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
11369 SDLoc DL(Node);
11370
11371 // Expand through memory thusly:
11372 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11373 // Store V1, Ptr
11374 // Store V2, Ptr + sizeof(V1)
11375 // If (Imm < 0)
11376 // TrailingElts = -Imm
11377 // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
11378 // else
11379 // Ptr = Ptr + (Imm * sizeof(VT.Elt))
11380 // Res = Load Ptr
11381
11382 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
11383
11385 VT.getVectorElementCount() * 2);
11386 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
11387 EVT PtrVT = StackPtr.getValueType();
11388 auto &MF = DAG.getMachineFunction();
11389 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11390 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
11391
11392 // Store the lo part of CONCAT_VECTORS(V1, V2)
11393 SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
11394 // Store the hi part of CONCAT_VECTORS(V1, V2)
11395 SDValue OffsetToV2 = DAG.getVScale(
11396 DL, PtrVT,
11398 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
11399 SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
11400
11401 if (Imm >= 0) {
11402 // Load back the required element. getVectorElementPointer takes care of
11403 // clamping the index if it's out-of-bounds.
11404 StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
11405 // Load the spliced result
11406 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
11408 }
11409
11410 uint64_t TrailingElts = -Imm;
11411
11412 // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11413 TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11414 SDValue TrailingBytes =
11415 DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
11416
11417 if (TrailingElts > VT.getVectorMinNumElements()) {
11418 SDValue VLBytes =
11419 DAG.getVScale(DL, PtrVT,
11420 APInt(PtrVT.getFixedSizeInBits(),
11422 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
11423 }
11424
11425 // Calculate the start address of the spliced result.
11426 StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
11427
11428 // Load the spliced result
11429 return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
11431}
11432
11434 SelectionDAG &DAG) const {
11435 SDLoc DL(Node);
11436 SDValue Vec = Node->getOperand(0);
11437 SDValue Mask = Node->getOperand(1);
11438 SDValue Passthru = Node->getOperand(2);
11439
11440 EVT VecVT = Vec.getValueType();
11441 EVT ScalarVT = VecVT.getScalarType();
11442 EVT MaskVT = Mask.getValueType();
11443 EVT MaskScalarVT = MaskVT.getScalarType();
11444
11445 // Needs to be handled by targets that have scalable vector types.
11446 if (VecVT.isScalableVector())
11447 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
11448
11449 SDValue StackPtr = DAG.CreateStackTemporary(
11450 VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
11451 int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11452 MachinePointerInfo PtrInfo =
11454
11455 MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
11456 SDValue Chain = DAG.getEntryNode();
11457 SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
11458
11459 bool HasPassthru = !Passthru.isUndef();
11460
11461 // If we have a passthru vector, store it on the stack, overwrite the matching
11462 // positions and then re-write the last element that was potentially
11463 // overwritten even though mask[i] = false.
11464 if (HasPassthru)
11465 Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo);
11466
11467 SDValue LastWriteVal;
11468 APInt PassthruSplatVal;
11469 bool IsSplatPassthru =
11470 ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
11471
11472 if (IsSplatPassthru) {
11473 // As we do not know which position we wrote to last, we cannot simply
11474 // access that index from the passthru vector. So we first check if passthru
11475 // is a splat vector, to use any element ...
11476 LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
11477 } else if (HasPassthru) {
11478 // ... if it is not a splat vector, we need to get the passthru value at
11479 // position = popcount(mask) and re-load it from the stack before it is
11480 // overwritten in the loop below.
11481 SDValue Popcount = DAG.getNode(
11482 ISD::TRUNCATE, DL, MaskVT.changeVectorElementType(MVT::i1), Mask);
11483 Popcount = DAG.getNode(ISD::ZERO_EXTEND, DL,
11484 MaskVT.changeVectorElementType(ScalarVT), Popcount);
11485 Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, ScalarVT, Popcount);
11486 SDValue LastElmtPtr =
11487 getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
11488 LastWriteVal = DAG.getLoad(
11489 ScalarVT, DL, Chain, LastElmtPtr,
11491 Chain = LastWriteVal.getValue(1);
11492 }
11493
11494 unsigned NumElms = VecVT.getVectorNumElements();
11495 for (unsigned I = 0; I < NumElms; I++) {
11497
11498 SDValue ValI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec, Idx);
11499 SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11500 Chain = DAG.getStore(
11501 Chain, DL, ValI, OutPtr,
11503
11504 // Get the mask value and add it to the current output position. This
11505 // either increments by 1 if MaskI is true or adds 0 otherwise.
11506 // Freeze in case we have poison/undef mask entries.
11507 SDValue MaskI = DAG.getFreeze(
11508 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MaskScalarVT, Mask, Idx));
11509 MaskI = DAG.getFreeze(MaskI);
11510 MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
11511 MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
11512 OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
11513
11514 if (HasPassthru && I == NumElms - 1) {
11515 SDValue EndOfVector =
11516 DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
11517 SDValue AllLanesSelected =
11518 DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
11519 OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
11520 OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11521
11522 // Re-write the last ValI if all lanes were selected. Otherwise,
11523 // overwrite the last write it with the passthru value.
11524 LastWriteVal =
11525 DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI, LastWriteVal);
11526 Chain = DAG.getStore(
11527 Chain, DL, LastWriteVal, OutPtr,
11529 }
11530 }
11531
11532 return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
11533}
11534
11536 SDValue &LHS, SDValue &RHS,
11537 SDValue &CC, SDValue Mask,
11538 SDValue EVL, bool &NeedInvert,
11539 const SDLoc &dl, SDValue &Chain,
11540 bool IsSignaling) const {
11541 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11542 MVT OpVT = LHS.getSimpleValueType();
11543 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
11544 NeedInvert = false;
11545 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
11546 bool IsNonVP = !EVL;
11547 switch (TLI.getCondCodeAction(CCCode, OpVT)) {
11548 default:
11549 llvm_unreachable("Unknown condition code action!");
11551 // Nothing to do.
11552 break;
11555 if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
11556 std::swap(LHS, RHS);
11557 CC = DAG.getCondCode(InvCC);
11558 return true;
11559 }
11560 // Swapping operands didn't work. Try inverting the condition.
11561 bool NeedSwap = false;
11562 InvCC = getSetCCInverse(CCCode, OpVT);
11563 if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
11564 // If inverting the condition is not enough, try swapping operands
11565 // on top of it.
11566 InvCC = ISD::getSetCCSwappedOperands(InvCC);
11567 NeedSwap = true;
11568 }
11569 if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
11570 CC = DAG.getCondCode(InvCC);
11571 NeedInvert = true;
11572 if (NeedSwap)
11573 std::swap(LHS, RHS);
11574 return true;
11575 }
11576
11578 unsigned Opc = 0;
11579 switch (CCCode) {
11580 default:
11581 llvm_unreachable("Don't know how to expand this condition!");
11582 case ISD::SETUO:
11583 if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
11584 CC1 = ISD::SETUNE;
11585 CC2 = ISD::SETUNE;
11586 Opc = ISD::OR;
11587 break;
11588 }
11589 assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11590 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
11591 NeedInvert = true;
11592 [[fallthrough]];
11593 case ISD::SETO:
11594 assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11595 "If SETO is expanded, SETOEQ must be legal!");
11596 CC1 = ISD::SETOEQ;
11597 CC2 = ISD::SETOEQ;
11598 Opc = ISD::AND;
11599 break;
11600 case ISD::SETONE:
11601 case ISD::SETUEQ:
11602 // If the SETUO or SETO CC isn't legal, we might be able to use
11603 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
11604 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
11605 // the operands.
11606 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11607 if (!TLI.isCondCodeLegal(CC2, OpVT) &&
11608 (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
11609 TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
11610 CC1 = ISD::SETOGT;
11611 CC2 = ISD::SETOLT;
11612 Opc = ISD::OR;
11613 NeedInvert = ((unsigned)CCCode & 0x8U);
11614 break;
11615 }
11616 [[fallthrough]];
11617 case ISD::SETOEQ:
11618 case ISD::SETOGT:
11619 case ISD::SETOGE:
11620 case ISD::SETOLT:
11621 case ISD::SETOLE:
11622 case ISD::SETUNE:
11623 case ISD::SETUGT:
11624 case ISD::SETUGE:
11625 case ISD::SETULT:
11626 case ISD::SETULE:
11627 // If we are floating point, assign and break, otherwise fall through.
11628 if (!OpVT.isInteger()) {
11629 // We can use the 4th bit to tell if we are the unordered
11630 // or ordered version of the opcode.
11631 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11632 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
11633 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
11634 break;
11635 }
11636 // Fallthrough if we are unsigned integer.
11637 [[fallthrough]];
11638 case ISD::SETLE:
11639 case ISD::SETGT:
11640 case ISD::SETGE:
11641 case ISD::SETLT:
11642 case ISD::SETNE:
11643 case ISD::SETEQ:
11644 // If all combinations of inverting the condition and swapping operands
11645 // didn't work then we have no means to expand the condition.
11646 llvm_unreachable("Don't know how to expand this condition!");
11647 }
11648
11649 SDValue SetCC1, SetCC2;
11650 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
11651 // If we aren't the ordered or unorder operation,
11652 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
11653 if (IsNonVP) {
11654 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
11655 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
11656 } else {
11657 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
11658 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
11659 }
11660 } else {
11661 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
11662 if (IsNonVP) {
11663 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
11664 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
11665 } else {
11666 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
11667 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
11668 }
11669 }
11670 if (Chain)
11671 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
11672 SetCC2.getValue(1));
11673 if (IsNonVP)
11674 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
11675 else {
11676 // Transform the binary opcode to the VP equivalent.
11677 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
11678 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
11679 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
11680 }
11681 RHS = SDValue();
11682 CC = SDValue();
11683 return true;
11684 }
11685 }
11686 return false;
11687}
unsigned const MachineRegisterInfo * MRI
static const LLT F32
amdgpu AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
basic Basic Alias true
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
static bool isUndef(ArrayRef< int > Mask)
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:512
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
#define T1
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const char LLVMTargetMachineRef TM
const char * Passes
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact UDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineShiftToAVG(SDValue Op, TargetLowering::TargetLoweringOpt &TLO, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
This file describes how to lower LLVM code to machine code.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1243
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition: APFloat.h:1064
APInt bitcastToAPInt() const
Definition: APFloat.h:1260
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition: APFloat.h:1044
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition: APFloat.h:1004
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:1015
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition: APInt.cpp:1543
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1728
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1387
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:429
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:209
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition: APInt.h:403
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1500
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition: APInt.h:1372
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1366
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1472
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:186
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1310
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:351
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1162
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:238
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:360
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
void setSignBit()
Set the sign bit to 1.
Definition: APInt.h:1320
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1448
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:189
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition: APInt.h:196
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:309
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1229
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1377
APInt reverseBits() const
Definition: APInt.cpp:737
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:814
void negate()
Negate this APInt in place.
Definition: APInt.h:1430
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1598
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1557
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:199
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1491
unsigned countLeadingZeros() const
Definition: APInt.h:1565
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:336
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
unsigned logBase2() const
Definition: APInt.h:1719
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:455
void setAllBits()
Set every bit to 1.
Definition: APInt.h:1299
APInt multiplicativeInverse() const
Definition: APInt.cpp:1244
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition: APInt.h:385
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:314
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1130
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition: APInt.h:1347
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:853
APInt byteSwap() const
Definition: APInt.cpp:715
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1237
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:420
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:286
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:276
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:180
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1369
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:453
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:369
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:266
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:219
void clearHighBits(unsigned hiBits)
Set top hiBits bits to 0.
Definition: APInt.h:1404
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1522
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:838
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:831
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1615
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1201
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
Definition: APInt.h:1323
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool hasAttributes() const
Return true if the builder has IR-level attributes.
Definition: Attributes.h:1092
bool contains(Attribute::AttrKind A) const
Return true if the builder has the specified attribute.
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
A "pseudo-class" with methods for operating on BUILD_VECTORs.
ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:706
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
bool isBigEndian() const
Definition: DataLayout.h:239
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:350
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
std::vector< std::string > ConstraintCodeVector
Definition: InlineAsm.h:102
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:393
Machine Value Type.
SimpleValueType SimpleTy
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Function & getFunction()
Return the LLVM function that this machine code represents.
@ EK_GPRel32BlockAddress
EK_GPRel32BlockAddress - Each entry is an address of block, encoded with a relocation as gp-relative,...
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
@ EK_GPRel64BlockAddress
EK_GPRel64BlockAddress - Each entry is an address of block, encoded with a relocation as gp-relative,...
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition: Module.h:461
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
iterator end() const
Definition: ArrayRef.h:357
iterator begin() const
Definition: ArrayRef.h:356
Class to represent pointers.
Definition: DerivedTypes.h:646
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
static SDNodeIterator end(const SDNode *N)
static SDNodeIterator begin(const SDNode *N)
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setFlags(SDNodeFlags NewFlags)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:227
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:737
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
Definition: SelectionDAG.h:969
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool shouldOptForSize() const
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:494
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:452
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:843
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:488
bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:676
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:877
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:489
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
bool isKnownNeverZeroFloat(SDValue Op) const
Test whether the given floating point SDValue is known to never be positive or negative zero.
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:691
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:783
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:483
std::optional< uint64_t > getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has shift amounts that are all less than the element bit-width of the shift n...
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
std::optional< uint64_t > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:501
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:571
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:893
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:556
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:250
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
iterator end() const
Definition: StringRef.h:113
Class to represent struct types.
Definition: DerivedTypes.h:216
void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const
Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...
virtual EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual bool shouldExpandCmpUsingSelects() const
Should we expand [US]CMP nodes using two selects and two compares, or by doing arithmetic on boolean ...
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
MVT getSimpleValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the MVT corresponding to this LLVM type. See getValueType.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
BooleanContent
Enum that describes how the target represents true/false values.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const
Get the CondCode that's to be used to test the result of the comparison libcall against zero.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom on this target.
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_UNDEF nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_UNDEF nodes.
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
virtual Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_UNDEF nodes.
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isPositionIndependent() const
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, EVT WideVT, const SDValue LL, const SDValue LH, const SDValue RL, const SDValue RH, SDValue &Lo, SDValue &Hi) const
forceExpandWideMUL - Unconditionally expand a MUL into either a libcall or brute force via a wide mul...
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
TargetLowering(const TargetLowering &)=delete
virtual bool shouldSimplifyDemandedVectorElts(SDValue Op, const TargetLoweringOpt &TLO) const
Return true if the target supports simplifying demanded vector elements by converting them to undefs.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const
Expand a vector VECTOR_COMPRESS into a sequence of extract element, store temporarily,...
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]CMP.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const
Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
bool isPositionIndependent() const
const Triple & getTargetTriple() const
TargetOptions Options
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< regclass_iterator > regclasses() const
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition: Triple.h:726
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
const fltSemantics & getFltSemantics() const
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition: Type.h:287
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:302
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition: APInt.cpp:2978
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:779
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:752
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:511
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:573
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:743
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:374
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:276
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:501
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1074
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:380
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:497
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:557
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:850
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:933
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition: ISDOpcodes.h:387
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1455
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:684
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:634
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:751
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:960
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ BRIND
BRIND - Indirect branch.
Definition: ISDOpcodes.h:1095
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:514
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:521
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:756
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:641
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1409
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:673
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:614
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:587
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1021
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:438
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:810
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:439
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:771
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1008
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:366
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:338
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:839
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:828
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:696
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:393
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:918
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:765
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:310
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:456
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1027
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:866
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:164
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:679
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:286
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition: ISDOpcodes.h:223
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:538
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:626
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:899
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:861
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:885
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:816
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:793
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:691
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:529
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantSDNode predicate.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
Definition: ISDOpcodes.h:1636
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
Definition: ISDOpcodes.h:1641
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1611
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1578
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1558
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:547
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
auto find_if_not(R &&Range, UnaryPredicate P)
Definition: STLExtras.h:1754
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isConstFalseVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Definition: Utils.cpp:1606
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition: APFloat.h:1446
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:382
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:250
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:254
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:266
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
Extended Value Type.
Definition: ValueTypes.h:34
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: ValueTypes.h:120
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:233
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:349
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:415
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:455
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:397
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:203
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:101
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:438
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
ConstraintPrefix Type
Type - The basic type of the constraint: input/output/clobber/label.
Definition: InlineAsm.h:126
int MatchingInput
MatchingInput - If this is not -1, this is an output constraint where an input constraint is required...
Definition: InlineAsm.h:136
ConstraintCodeVector Codes
Code - The constraint code, either the register name (in braces) or the constraint letter/number.
Definition: InlineAsm.h:154
SubConstraintInfoVector multipleAlternatives
multipleAlternatives - If there are multiple alternative constraints, this array will contain them.
Definition: InlineAsm.h:161
bool isIndirect
isIndirect - True if this operand is an indirect operand.
Definition: InlineAsm.h:150
bool hasMatchingInput() const
hasMatchingInput - Return true if this is an output constraint that has a matching input constraint.
Definition: InlineAsm.h:140
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition: KnownBits.h:290
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition: KnownBits.h:175
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition: KnownBits.h:244
static KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
Definition: KnownBits.cpp:202
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:97
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:231
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:62
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:150
static std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
Definition: KnownBits.cpp:536
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition: KnownBits.h:278
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
Definition: KnownBits.h:222
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
static KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
Definition: KnownBits.cpp:178
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:161
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:70
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition: KnownBits.h:310
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:300
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:169
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:237
static KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
Definition: KnownBits.cpp:215
static std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
Definition: KnownBits.cpp:502
static std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
Definition: KnownBits.cpp:542
static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition: KnownBits.cpp:51
static std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
Definition: KnownBits.cpp:518
static std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
Definition: KnownBits.cpp:522
bool isNegative() const
Returns true if this value is known to be negative.
Definition: KnownBits.h:94
static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
Definition: KnownBits.cpp:797
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition: KnownBits.h:156
static std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
Definition: KnownBits.cpp:546
static std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
Definition: KnownBits.cpp:526
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition: KnownBits.h:275
static std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
Definition: KnownBits.cpp:512
static KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
Definition: KnownBits.cpp:196
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Magic data for optimising signed division by a constant.
static SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This contains information for each constraint that we are lowering.
MVT ConstraintVT
The ValueType for the operand value.
TargetLowering::ConstraintType ConstraintType
Information about the constraint code, e.g.
std::string ConstraintCode
This contains the actual string for the code, like "m".
Value * CallOperandVal
If this is the result output operand or a clobber, this is null, otherwise it is the incoming operand...
unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
CallLoweringInfo & setChain(SDValue InChain)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
MakeLibCallOptions & setSExt(bool Value=true)
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)
Magic data for optimising unsigned division by a constant.
static UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...