LLVM 22.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
27#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/MC/MCAsmInfo.h"
32#include "llvm/MC/MCExpr.h"
38#include <cctype>
39#include <deque>
40using namespace llvm;
41using namespace llvm::SDPatternMatch;
42
43/// NOTE: The TargetMachine owns TLOF.
47
48// Define the virtual destructor out-of-line for build efficiency.
50
51const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
52 return nullptr;
53}
54
58
59/// Check whether a given call node is in tail position within its function. If
60/// so, it sets Chain to the input chain of the tail call.
62 SDValue &Chain) const {
64
65 // First, check if tail calls have been disabled in this function.
66 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
67 return false;
68
69 // Conservatively require the attributes of the call to match those of
70 // the return. Ignore following attributes because they don't affect the
71 // call sequence.
72 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
73 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
74 Attribute::DereferenceableOrNull, Attribute::NoAlias,
75 Attribute::NonNull, Attribute::NoUndef,
76 Attribute::Range, Attribute::NoFPClass})
77 CallerAttrs.removeAttribute(Attr);
78
79 if (CallerAttrs.hasAttributes())
80 return false;
81
82 // It's not safe to eliminate the sign / zero extension of the return value.
83 if (CallerAttrs.contains(Attribute::ZExt) ||
84 CallerAttrs.contains(Attribute::SExt))
85 return false;
86
87 // Check if the only use is a function return node.
88 return isUsedByReturnOnly(Node, Chain);
89}
90
92 const uint32_t *CallerPreservedMask,
93 const SmallVectorImpl<CCValAssign> &ArgLocs,
94 const SmallVectorImpl<SDValue> &OutVals) const {
95 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
96 const CCValAssign &ArgLoc = ArgLocs[I];
97 if (!ArgLoc.isRegLoc())
98 continue;
99 MCRegister Reg = ArgLoc.getLocReg();
100 // Only look at callee saved registers.
101 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
102 continue;
103 // Check that we pass the value used for the caller.
104 // (We look for a CopyFromReg reading a virtual register that is used
105 // for the function live-in value of register Reg)
106 SDValue Value = OutVals[I];
107 if (Value->getOpcode() == ISD::AssertZext)
108 Value = Value.getOperand(0);
109 if (Value->getOpcode() != ISD::CopyFromReg)
110 return false;
111 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
112 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
113 return false;
114 }
115 return true;
116}
117
118/// Set CallLoweringInfo attribute flags based on a call instruction
119/// and called function attributes.
121 unsigned ArgIdx) {
122 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
123 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
124 IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
125 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
126 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
127 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
128 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
129 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
130 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
131 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
132 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
133 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
134 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
135 Alignment = Call->getParamStackAlign(ArgIdx);
136 IndirectType = nullptr;
138 "multiple ABI attributes?");
139 if (IsByVal) {
140 IndirectType = Call->getParamByValType(ArgIdx);
141 if (!Alignment)
142 Alignment = Call->getParamAlign(ArgIdx);
143 }
144 if (IsPreallocated)
145 IndirectType = Call->getParamPreallocatedType(ArgIdx);
146 if (IsInAlloca)
147 IndirectType = Call->getParamInAllocaType(ArgIdx);
148 if (IsSRet)
149 IndirectType = Call->getParamStructRetType(ArgIdx);
150}
151
152/// Generate a libcall taking the given operands as arguments and returning a
153/// result of type RetVT.
154std::pair<SDValue, SDValue>
155TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl,
157 MakeLibCallOptions CallOptions, const SDLoc &dl,
158 SDValue InChain) const {
159 if (LibcallImpl == RTLIB::Unsupported)
160 reportFatalInternalError("unsupported library call operation");
161
162 if (!InChain)
163 InChain = DAG.getEntryNode();
164
166 Args.reserve(Ops.size());
167
168 ArrayRef<Type *> OpsTypeOverrides = CallOptions.OpsTypeOverrides;
169 for (unsigned i = 0; i < Ops.size(); ++i) {
170 SDValue NewOp = Ops[i];
171 Type *Ty = i < OpsTypeOverrides.size() && OpsTypeOverrides[i]
172 ? OpsTypeOverrides[i]
173 : NewOp.getValueType().getTypeForEVT(*DAG.getContext());
174 TargetLowering::ArgListEntry Entry(NewOp, Ty);
175 if (CallOptions.IsSoften)
176 Entry.OrigTy =
177 CallOptions.OpsVTBeforeSoften[i].getTypeForEVT(*DAG.getContext());
178
179 Entry.IsSExt =
180 shouldSignExtendTypeInLibCall(Entry.Ty, CallOptions.IsSigned);
181 Entry.IsZExt = !Entry.IsSExt;
182
183 if (CallOptions.IsSoften &&
185 Entry.IsSExt = Entry.IsZExt = false;
186 }
187 Args.push_back(Entry);
188 }
189
190 SDValue Callee =
191 DAG.getExternalSymbol(LibcallImpl, getPointerTy(DAG.getDataLayout()));
192
193 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
194 Type *OrigRetTy = RetTy;
196 bool signExtend = shouldSignExtendTypeInLibCall(RetTy, CallOptions.IsSigned);
197 bool zeroExtend = !signExtend;
198
199 if (CallOptions.IsSoften) {
200 OrigRetTy = CallOptions.RetVTBeforeSoften.getTypeForEVT(*DAG.getContext());
202 signExtend = zeroExtend = false;
203 }
204
205 CLI.setDebugLoc(dl)
206 .setChain(InChain)
207 .setLibCallee(getLibcallImplCallingConv(LibcallImpl), RetTy, OrigRetTy,
208 Callee, std::move(Args))
209 .setNoReturn(CallOptions.DoesNotReturn)
212 .setSExtResult(signExtend)
213 .setZExtResult(zeroExtend);
214 return LowerCallTo(CLI);
215}
216
218 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
219 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
220 const AttributeList &FuncAttributes) const {
221 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
222 Op.getSrcAlign() < Op.getDstAlign())
223 return false;
224
225 EVT VT = getOptimalMemOpType(Context, Op, FuncAttributes);
226
227 if (VT == MVT::Other) {
228 // Use the largest integer type whose alignment constraints are satisfied.
229 // We only need to check DstAlign here as SrcAlign is always greater or
230 // equal to DstAlign (or zero).
231 VT = MVT::LAST_INTEGER_VALUETYPE;
232 if (Op.isFixedDstAlign())
233 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
234 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
236 assert(VT.isInteger());
237
238 // Find the largest legal integer type.
239 MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
240 while (!isTypeLegal(LVT))
241 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
242 assert(LVT.isInteger());
243
244 // If the type we've chosen is larger than the largest legal integer type
245 // then use that instead.
246 if (VT.bitsGT(LVT))
247 VT = LVT;
248 }
249
250 unsigned NumMemOps = 0;
251 uint64_t Size = Op.size();
252 while (Size) {
253 unsigned VTSize = VT.getSizeInBits() / 8;
254 while (VTSize > Size) {
255 // For now, only use non-vector load / store's for the left-over pieces.
256 EVT NewVT = VT;
257 unsigned NewVTSize;
258
259 bool Found = false;
260 if (VT.isVector() || VT.isFloatingPoint()) {
261 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
264 Found = true;
265 else if (NewVT == MVT::i64 &&
267 isSafeMemOpType(MVT::f64)) {
268 // i64 is usually not legal on 32-bit targets, but f64 may be.
269 NewVT = MVT::f64;
270 Found = true;
271 }
272 }
273
274 if (!Found) {
275 do {
276 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
277 if (NewVT == MVT::i8)
278 break;
279 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
280 }
281 NewVTSize = NewVT.getSizeInBits() / 8;
282
283 // If the new VT cannot cover all of the remaining bits, then consider
284 // issuing a (or a pair of) unaligned and overlapping load / store.
285 unsigned Fast;
286 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
288 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
290 Fast)
291 VTSize = Size;
292 else {
293 VT = NewVT;
294 VTSize = NewVTSize;
295 }
296 }
297
298 if (++NumMemOps > Limit)
299 return false;
300
301 MemOps.push_back(VT);
302 Size -= VTSize;
303 }
304
305 return true;
306}
307
308/// Soften the operands of a comparison. This code is shared among BR_CC,
309/// SELECT_CC, and SETCC handlers.
311 SDValue &NewLHS, SDValue &NewRHS,
312 ISD::CondCode &CCCode,
313 const SDLoc &dl, const SDValue OldLHS,
314 const SDValue OldRHS) const {
315 SDValue Chain;
316 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
317 OldRHS, Chain);
318}
319
321 SDValue &NewLHS, SDValue &NewRHS,
322 ISD::CondCode &CCCode,
323 const SDLoc &dl, const SDValue OldLHS,
324 const SDValue OldRHS,
325 SDValue &Chain,
326 bool IsSignaling) const {
327 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
328 // not supporting it. We can update this code when libgcc provides such
329 // functions.
330
331 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
332 && "Unsupported setcc type!");
333
334 // Expand into one or more soft-fp libcall(s).
335 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
336 bool ShouldInvertCC = false;
337 switch (CCCode) {
338 case ISD::SETEQ:
339 case ISD::SETOEQ:
340 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
341 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
342 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
343 break;
344 case ISD::SETNE:
345 case ISD::SETUNE:
346 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
347 (VT == MVT::f64) ? RTLIB::UNE_F64 :
348 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
349 break;
350 case ISD::SETGE:
351 case ISD::SETOGE:
352 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
353 (VT == MVT::f64) ? RTLIB::OGE_F64 :
354 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
355 break;
356 case ISD::SETLT:
357 case ISD::SETOLT:
358 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
359 (VT == MVT::f64) ? RTLIB::OLT_F64 :
360 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
361 break;
362 case ISD::SETLE:
363 case ISD::SETOLE:
364 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
365 (VT == MVT::f64) ? RTLIB::OLE_F64 :
366 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
367 break;
368 case ISD::SETGT:
369 case ISD::SETOGT:
370 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
371 (VT == MVT::f64) ? RTLIB::OGT_F64 :
372 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
373 break;
374 case ISD::SETO:
375 ShouldInvertCC = true;
376 [[fallthrough]];
377 case ISD::SETUO:
378 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
379 (VT == MVT::f64) ? RTLIB::UO_F64 :
380 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
381 break;
382 case ISD::SETONE:
383 // SETONE = O && UNE
384 ShouldInvertCC = true;
385 [[fallthrough]];
386 case ISD::SETUEQ:
387 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
388 (VT == MVT::f64) ? RTLIB::UO_F64 :
389 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
390 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
391 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
392 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
393 break;
394 default:
395 // Invert CC for unordered comparisons
396 ShouldInvertCC = true;
397 switch (CCCode) {
398 case ISD::SETULT:
399 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
400 (VT == MVT::f64) ? RTLIB::OGE_F64 :
401 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
402 break;
403 case ISD::SETULE:
404 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
405 (VT == MVT::f64) ? RTLIB::OGT_F64 :
406 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
407 break;
408 case ISD::SETUGT:
409 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
410 (VT == MVT::f64) ? RTLIB::OLE_F64 :
411 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
412 break;
413 case ISD::SETUGE:
414 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
415 (VT == MVT::f64) ? RTLIB::OLT_F64 :
416 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
417 break;
418 default: llvm_unreachable("Do not know how to soften this setcc!");
419 }
420 }
421
422 // Use the target specific return value for comparison lib calls.
424 SDValue Ops[2] = {NewLHS, NewRHS};
426 EVT OpsVT[2] = { OldLHS.getValueType(),
427 OldRHS.getValueType() };
428 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT);
429 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
430 NewLHS = Call.first;
431 NewRHS = DAG.getConstant(0, dl, RetVT);
432
433 RTLIB::LibcallImpl LC1Impl = getLibcallImpl(LC1);
434 if (LC1Impl == RTLIB::Unsupported) {
436 "no libcall available to soften floating-point compare");
437 }
438
439 CCCode = getSoftFloatCmpLibcallPredicate(LC1Impl);
440 if (ShouldInvertCC) {
441 assert(RetVT.isInteger());
442 CCCode = getSetCCInverse(CCCode, RetVT);
443 }
444
445 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
446 // Update Chain.
447 Chain = Call.second;
448 } else {
449 RTLIB::LibcallImpl LC2Impl = getLibcallImpl(LC2);
450 if (LC2Impl == RTLIB::Unsupported) {
452 "no libcall available to soften floating-point compare");
453 }
454
455 assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
456 "unordered call should be simple boolean");
457
458 EVT SetCCVT =
459 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
461 NewLHS = DAG.getNode(ISD::AssertZext, dl, RetVT, Call.first,
462 DAG.getValueType(MVT::i1));
463 }
464
465 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
466 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
467 CCCode = getSoftFloatCmpLibcallPredicate(LC2Impl);
468 if (ShouldInvertCC)
469 CCCode = getSetCCInverse(CCCode, RetVT);
470 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
471 if (Chain)
472 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
473 Call2.second);
474 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
475 Tmp.getValueType(), Tmp, NewLHS);
476 NewRHS = SDValue();
477 }
478}
479
480/// Return the entry encoding for a jump table in the current function. The
481/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
483 // In non-pic modes, just use the address of a block.
486
487 // Otherwise, use a label difference.
489}
490
492 SelectionDAG &DAG) const {
493 return Table;
494}
495
496/// This returns the relocation base for the given PIC jumptable, the same as
497/// getPICJumpTableRelocBase, but as an MCExpr.
498const MCExpr *
500 unsigned JTI,MCContext &Ctx) const{
501 // The normal PIC reloc base is the label at the start of the jump table.
502 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
503}
504
506 SDValue Addr, int JTI,
507 SelectionDAG &DAG) const {
508 SDValue Chain = Value;
509 // Jump table debug info is only needed if CodeView is enabled.
511 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
512 }
513 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
514}
515
516bool
518 const TargetMachine &TM = getTargetMachine();
519 const GlobalValue *GV = GA->getGlobal();
520
521 // If the address is not even local to this DSO we will have to load it from
522 // a got and then add the offset.
523 if (!TM.shouldAssumeDSOLocal(GV))
524 return false;
525
526 // If the code is position independent we will have to add a base register.
528 return false;
529
530 // Otherwise we can do it.
531 return true;
532}
533
534//===----------------------------------------------------------------------===//
535// Optimization Methods
536//===----------------------------------------------------------------------===//
537
538/// If the specified instruction has a constant integer operand and there are
539/// bits set in that constant that are not demanded, then clear those bits and
540/// return true.
542 const APInt &DemandedBits,
543 const APInt &DemandedElts,
544 TargetLoweringOpt &TLO) const {
545 SDLoc DL(Op);
546 unsigned Opcode = Op.getOpcode();
547
548 // Early-out if we've ended up calling an undemanded node, leave this to
549 // constant folding.
550 if (DemandedBits.isZero() || DemandedElts.isZero())
551 return false;
552
553 // Do target-specific constant optimization.
554 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
555 return TLO.New.getNode();
556
557 // FIXME: ISD::SELECT, ISD::SELECT_CC
558 switch (Opcode) {
559 default:
560 break;
561 case ISD::XOR:
562 case ISD::AND:
563 case ISD::OR: {
564 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
565 if (!Op1C || Op1C->isOpaque())
566 return false;
567
568 // If this is a 'not' op, don't touch it because that's a canonical form.
569 const APInt &C = Op1C->getAPIntValue();
570 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
571 return false;
572
573 if (!C.isSubsetOf(DemandedBits)) {
574 EVT VT = Op.getValueType();
575 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
576 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
577 Op->getFlags());
578 return TLO.CombineTo(Op, NewOp);
579 }
580
581 break;
582 }
583 }
584
585 return false;
586}
587
589 const APInt &DemandedBits,
590 TargetLoweringOpt &TLO) const {
591 EVT VT = Op.getValueType();
592 APInt DemandedElts = VT.isVector()
594 : APInt(1, 1);
595 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
596}
597
598/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
599/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
600/// but it could be generalized for targets with other types of implicit
601/// widening casts.
603 const APInt &DemandedBits,
604 TargetLoweringOpt &TLO) const {
605 assert(Op.getNumOperands() == 2 &&
606 "ShrinkDemandedOp only supports binary operators!");
607 assert(Op.getNode()->getNumValues() == 1 &&
608 "ShrinkDemandedOp only supports nodes with one result!");
609
610 EVT VT = Op.getValueType();
611 SelectionDAG &DAG = TLO.DAG;
612 SDLoc dl(Op);
613
614 // Early return, as this function cannot handle vector types.
615 if (VT.isVector())
616 return false;
617
618 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
619 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
620 "ShrinkDemandedOp only supports operands that have the same size!");
621
622 // Don't do this if the node has another user, which may require the
623 // full value.
624 if (!Op.getNode()->hasOneUse())
625 return false;
626
627 // Search for the smallest integer type with free casts to and from
628 // Op's type. For expedience, just check power-of-2 integer types.
629 unsigned DemandedSize = DemandedBits.getActiveBits();
630 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
631 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
632 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
633 if (isTruncateFree(Op, SmallVT) && isZExtFree(SmallVT, VT)) {
634 // We found a type with free casts.
635
636 // If the operation has the 'disjoint' flag, then the
637 // operands on the new node are also disjoint.
638 SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
640 unsigned Opcode = Op.getOpcode();
641 if (Opcode == ISD::PTRADD) {
642 // It isn't a ptradd anymore if it doesn't operate on the entire
643 // pointer.
644 Opcode = ISD::ADD;
645 }
646 SDValue X = DAG.getNode(
647 Opcode, dl, SmallVT,
648 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
649 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags);
650 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
651 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
652 return TLO.CombineTo(Op, Z);
653 }
654 }
655 return false;
656}
657
659 DAGCombinerInfo &DCI) const {
660 SelectionDAG &DAG = DCI.DAG;
661 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
662 !DCI.isBeforeLegalizeOps());
663 KnownBits Known;
664
665 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
666 if (Simplified) {
667 DCI.AddToWorklist(Op.getNode());
669 }
670 return Simplified;
671}
672
674 const APInt &DemandedElts,
675 DAGCombinerInfo &DCI) const {
676 SelectionDAG &DAG = DCI.DAG;
677 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
678 !DCI.isBeforeLegalizeOps());
679 KnownBits Known;
680
681 bool Simplified =
682 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
683 if (Simplified) {
684 DCI.AddToWorklist(Op.getNode());
686 }
687 return Simplified;
688}
689
691 KnownBits &Known,
693 unsigned Depth,
694 bool AssumeSingleUse) const {
695 EVT VT = Op.getValueType();
696
697 // Since the number of lanes in a scalable vector is unknown at compile time,
698 // we track one bit which is implicitly broadcast to all lanes. This means
699 // that all lanes in a scalable vector are considered demanded.
700 APInt DemandedElts = VT.isFixedLengthVector()
702 : APInt(1, 1);
703 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
704 AssumeSingleUse);
705}
706
707// TODO: Under what circumstances can we create nodes? Constant folding?
709 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
710 SelectionDAG &DAG, unsigned Depth) const {
711 EVT VT = Op.getValueType();
712
713 // Limit search depth.
715 return SDValue();
716
717 // Ignore UNDEFs.
718 if (Op.isUndef())
719 return SDValue();
720
721 // Not demanding any bits/elts from Op.
722 if (DemandedBits == 0 || DemandedElts == 0)
723 return DAG.getUNDEF(VT);
724
725 bool IsLE = DAG.getDataLayout().isLittleEndian();
726 unsigned NumElts = DemandedElts.getBitWidth();
727 unsigned BitWidth = DemandedBits.getBitWidth();
728 KnownBits LHSKnown, RHSKnown;
729 switch (Op.getOpcode()) {
730 case ISD::BITCAST: {
731 if (VT.isScalableVector())
732 return SDValue();
733
734 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
735 EVT SrcVT = Src.getValueType();
736 EVT DstVT = Op.getValueType();
737 if (SrcVT == DstVT)
738 return Src;
739
740 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
741 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
742 if (NumSrcEltBits == NumDstEltBits)
744 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
745 return DAG.getBitcast(DstVT, V);
746
747 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
748 unsigned Scale = NumDstEltBits / NumSrcEltBits;
749 unsigned NumSrcElts = SrcVT.getVectorNumElements();
750 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
751 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
752 for (unsigned i = 0; i != Scale; ++i) {
753 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
754 unsigned BitOffset = EltOffset * NumSrcEltBits;
755 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
756 if (!Sub.isZero()) {
757 DemandedSrcBits |= Sub;
758 for (unsigned j = 0; j != NumElts; ++j)
759 if (DemandedElts[j])
760 DemandedSrcElts.setBit((j * Scale) + i);
761 }
762 }
763
765 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
766 return DAG.getBitcast(DstVT, V);
767 }
768
769 // TODO - bigendian once we have test coverage.
770 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
771 unsigned Scale = NumSrcEltBits / NumDstEltBits;
772 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
773 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
774 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
775 for (unsigned i = 0; i != NumElts; ++i)
776 if (DemandedElts[i]) {
777 unsigned Offset = (i % Scale) * NumDstEltBits;
778 DemandedSrcBits.insertBits(DemandedBits, Offset);
779 DemandedSrcElts.setBit(i / Scale);
780 }
781
783 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
784 return DAG.getBitcast(DstVT, V);
785 }
786
787 break;
788 }
789 case ISD::AND: {
790 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
791 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
792
793 // If all of the demanded bits are known 1 on one side, return the other.
794 // These bits cannot contribute to the result of the 'and' in this
795 // context.
796 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
797 return Op.getOperand(0);
798 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
799 return Op.getOperand(1);
800 break;
801 }
802 case ISD::OR: {
803 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
804 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
805
806 // If all of the demanded bits are known zero on one side, return the
807 // other. These bits cannot contribute to the result of the 'or' in this
808 // context.
809 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
810 return Op.getOperand(0);
811 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
812 return Op.getOperand(1);
813 break;
814 }
815 case ISD::XOR: {
816 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
817 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
818
819 // If all of the demanded bits are known zero on one side, return the
820 // other.
821 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
822 return Op.getOperand(0);
823 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
824 return Op.getOperand(1);
825 break;
826 }
827 case ISD::ADD: {
828 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
829 if (RHSKnown.isZero())
830 return Op.getOperand(0);
831
832 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
833 if (LHSKnown.isZero())
834 return Op.getOperand(1);
835 break;
836 }
837 case ISD::SHL: {
838 // If we are only demanding sign bits then we can use the shift source
839 // directly.
840 if (std::optional<unsigned> MaxSA =
841 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
842 SDValue Op0 = Op.getOperand(0);
843 unsigned ShAmt = *MaxSA;
844 unsigned NumSignBits =
845 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
846 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
847 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
848 return Op0;
849 }
850 break;
851 }
852 case ISD::SRL: {
853 // If we are only demanding sign bits then we can use the shift source
854 // directly.
855 if (std::optional<unsigned> MaxSA =
856 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
857 SDValue Op0 = Op.getOperand(0);
858 unsigned ShAmt = *MaxSA;
859 // Must already be signbits in DemandedBits bounds, and can't demand any
860 // shifted in zeroes.
861 if (DemandedBits.countl_zero() >= ShAmt) {
862 unsigned NumSignBits =
863 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
864 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
865 return Op0;
866 }
867 }
868 break;
869 }
870 case ISD::SETCC: {
871 SDValue Op0 = Op.getOperand(0);
872 SDValue Op1 = Op.getOperand(1);
873 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
874 // If (1) we only need the sign-bit, (2) the setcc operands are the same
875 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
876 // -1, we may be able to bypass the setcc.
877 if (DemandedBits.isSignMask() &&
881 // If we're testing X < 0, then this compare isn't needed - just use X!
882 // FIXME: We're limiting to integer types here, but this should also work
883 // if we don't care about FP signed-zero. The use of SETLT with FP means
884 // that we don't care about NaNs.
885 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
887 return Op0;
888 }
889 break;
890 }
892 // If none of the extended bits are demanded, eliminate the sextinreg.
893 SDValue Op0 = Op.getOperand(0);
894 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
895 unsigned ExBits = ExVT.getScalarSizeInBits();
896 if (DemandedBits.getActiveBits() <= ExBits &&
898 return Op0;
899 // If the input is already sign extended, just drop the extension.
900 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
901 if (NumSignBits >= (BitWidth - ExBits + 1))
902 return Op0;
903 break;
904 }
908 if (VT.isScalableVector())
909 return SDValue();
910
911 // If we only want the lowest element and none of extended bits, then we can
912 // return the bitcasted source vector.
913 SDValue Src = Op.getOperand(0);
914 EVT SrcVT = Src.getValueType();
915 EVT DstVT = Op.getValueType();
916 if (IsLE && DemandedElts == 1 &&
917 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
918 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
919 return DAG.getBitcast(DstVT, Src);
920 }
921 break;
922 }
924 if (VT.isScalableVector())
925 return SDValue();
926
927 // If we don't demand the inserted element, return the base vector.
928 SDValue Vec = Op.getOperand(0);
929 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
930 EVT VecVT = Vec.getValueType();
931 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
932 !DemandedElts[CIdx->getZExtValue()])
933 return Vec;
934 break;
935 }
937 if (VT.isScalableVector())
938 return SDValue();
939
940 SDValue Vec = Op.getOperand(0);
941 SDValue Sub = Op.getOperand(1);
942 uint64_t Idx = Op.getConstantOperandVal(2);
943 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
944 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
945 // If we don't demand the inserted subvector, return the base vector.
946 if (DemandedSubElts == 0)
947 return Vec;
948 break;
949 }
950 case ISD::VECTOR_SHUFFLE: {
952 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
953
954 // If all the demanded elts are from one operand and are inline,
955 // then we can use the operand directly.
956 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
957 for (unsigned i = 0; i != NumElts; ++i) {
958 int M = ShuffleMask[i];
959 if (M < 0 || !DemandedElts[i])
960 continue;
961 AllUndef = false;
962 IdentityLHS &= (M == (int)i);
963 IdentityRHS &= ((M - NumElts) == i);
964 }
965
966 if (AllUndef)
967 return DAG.getUNDEF(Op.getValueType());
968 if (IdentityLHS)
969 return Op.getOperand(0);
970 if (IdentityRHS)
971 return Op.getOperand(1);
972 break;
973 }
974 default:
975 // TODO: Probably okay to remove after audit; here to reduce change size
976 // in initial enablement patch for scalable vectors
977 if (VT.isScalableVector())
978 return SDValue();
979
980 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
982 Op, DemandedBits, DemandedElts, DAG, Depth))
983 return V;
984 break;
985 }
986 return SDValue();
987}
988
991 unsigned Depth) const {
992 EVT VT = Op.getValueType();
993 // Since the number of lanes in a scalable vector is unknown at compile time,
994 // we track one bit which is implicitly broadcast to all lanes. This means
995 // that all lanes in a scalable vector are considered demanded.
996 APInt DemandedElts = VT.isFixedLengthVector()
998 : APInt(1, 1);
999 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1000 Depth);
1001}
1002
1004 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
1005 unsigned Depth) const {
1006 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
1007 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1008 Depth);
1009}
1010
1011// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
1012// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
1015 const TargetLowering &TLI,
1016 const APInt &DemandedBits,
1017 const APInt &DemandedElts, unsigned Depth) {
1018 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
1019 "SRL or SRA node is required here!");
1020 // Is the right shift using an immediate value of 1?
1021 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
1022 if (!N1C || !N1C->isOne())
1023 return SDValue();
1024
1025 // We are looking for an avgfloor
1026 // add(ext, ext)
1027 // or one of these as a avgceil
1028 // add(add(ext, ext), 1)
1029 // add(add(ext, 1), ext)
1030 // add(ext, add(ext, 1))
1031 SDValue Add = Op.getOperand(0);
1032 if (Add.getOpcode() != ISD::ADD)
1033 return SDValue();
1034
1035 SDValue ExtOpA = Add.getOperand(0);
1036 SDValue ExtOpB = Add.getOperand(1);
1037 SDValue Add2;
1038 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1039 ConstantSDNode *ConstOp;
1040 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
1041 ConstOp->isOne()) {
1042 ExtOpA = Op1;
1043 ExtOpB = Op3;
1044 Add2 = A;
1045 return true;
1046 }
1047 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
1048 ConstOp->isOne()) {
1049 ExtOpA = Op1;
1050 ExtOpB = Op2;
1051 Add2 = A;
1052 return true;
1053 }
1054 return false;
1055 };
1056 bool IsCeil =
1057 (ExtOpA.getOpcode() == ISD::ADD &&
1058 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1059 (ExtOpB.getOpcode() == ISD::ADD &&
1060 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1061
1062 // If the shift is signed (sra):
1063 // - Needs >= 2 sign bit for both operands.
1064 // - Needs >= 2 zero bits.
1065 // If the shift is unsigned (srl):
1066 // - Needs >= 1 zero bit for both operands.
1067 // - Needs 1 demanded bit zero and >= 2 sign bits.
1068 SelectionDAG &DAG = TLO.DAG;
1069 unsigned ShiftOpc = Op.getOpcode();
1070 bool IsSigned = false;
1071 unsigned KnownBits;
1072 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1073 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1074 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1075 unsigned NumZeroA =
1076 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1077 unsigned NumZeroB =
1078 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1079 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1080
1081 switch (ShiftOpc) {
1082 default:
1083 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1084 case ISD::SRA: {
1085 if (NumZero >= 2 && NumSigned < NumZero) {
1086 IsSigned = false;
1087 KnownBits = NumZero;
1088 break;
1089 }
1090 if (NumSigned >= 1) {
1091 IsSigned = true;
1092 KnownBits = NumSigned;
1093 break;
1094 }
1095 return SDValue();
1096 }
1097 case ISD::SRL: {
1098 if (NumZero >= 1 && NumSigned < NumZero) {
1099 IsSigned = false;
1100 KnownBits = NumZero;
1101 break;
1102 }
1103 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1104 IsSigned = true;
1105 KnownBits = NumSigned;
1106 break;
1107 }
1108 return SDValue();
1109 }
1110 }
1111
1112 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1113 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1114
1115 // Find the smallest power-2 type that is legal for this vector size and
1116 // operation, given the original type size and the number of known sign/zero
1117 // bits.
1118 EVT VT = Op.getValueType();
1119 unsigned MinWidth =
1120 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1121 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1123 return SDValue();
1124 if (VT.isVector())
1125 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1126 if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1127 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1128 // larger type size to do the transform.
1129 if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1130 return SDValue();
1131 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1132 Add.getOperand(1)) &&
1133 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1134 Add2.getOperand(1))))
1135 NVT = VT;
1136 else
1137 return SDValue();
1138 }
1139
1140 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1141 // this is likely to stop other folds (reassociation, value tracking etc.)
1142 if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1143 (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1144 return SDValue();
1145
1146 SDLoc DL(Op);
1147 SDValue ResultAVG =
1148 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1149 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1150 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1151}
1152
1153/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1154/// result of Op are ever used downstream. If we can use this information to
1155/// simplify Op, create a new simplified DAG node and return true, returning the
1156/// original and new nodes in Old and New. Otherwise, analyze the expression and
1157/// return a mask of Known bits for the expression (used to simplify the
1158/// caller). The Known bits may only be accurate for those bits in the
1159/// OriginalDemandedBits and OriginalDemandedElts.
1161 SDValue Op, const APInt &OriginalDemandedBits,
1162 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1163 unsigned Depth, bool AssumeSingleUse) const {
1164 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1165 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1166 "Mask size mismatches value type size!");
1167
1168 // Don't know anything.
1169 Known = KnownBits(BitWidth);
1170
1171 EVT VT = Op.getValueType();
1172 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1173 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1174 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1175 "Unexpected vector size");
1176
1177 APInt DemandedBits = OriginalDemandedBits;
1178 APInt DemandedElts = OriginalDemandedElts;
1179 SDLoc dl(Op);
1180
1181 // Undef operand.
1182 if (Op.isUndef())
1183 return false;
1184
1185 // We can't simplify target constants.
1186 if (Op.getOpcode() == ISD::TargetConstant)
1187 return false;
1188
1189 if (Op.getOpcode() == ISD::Constant) {
1190 // We know all of the bits for a constant!
1191 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1192 return false;
1193 }
1194
1195 if (Op.getOpcode() == ISD::ConstantFP) {
1196 // We know all of the bits for a floating point constant!
1198 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1199 return false;
1200 }
1201
1202 // Other users may use these bits.
1203 bool HasMultiUse = false;
1204 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1206 // Limit search depth.
1207 return false;
1208 }
1209 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1211 DemandedElts = APInt::getAllOnes(NumElts);
1212 HasMultiUse = true;
1213 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1214 // Not demanding any bits/elts from Op.
1215 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1216 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1217 // Limit search depth.
1218 return false;
1219 }
1220
1221 KnownBits Known2;
1222 switch (Op.getOpcode()) {
1223 case ISD::SCALAR_TO_VECTOR: {
1224 if (VT.isScalableVector())
1225 return false;
1226 if (!DemandedElts[0])
1227 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1228
1229 KnownBits SrcKnown;
1230 SDValue Src = Op.getOperand(0);
1231 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1232 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1233 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1234 return true;
1235
1236 // Upper elements are undef, so only get the knownbits if we just demand
1237 // the bottom element.
1238 if (DemandedElts == 1)
1239 Known = SrcKnown.anyextOrTrunc(BitWidth);
1240 break;
1241 }
1242 case ISD::BUILD_VECTOR:
1243 // Collect the known bits that are shared by every demanded element.
1244 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1245 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1246 return false; // Don't fall through, will infinitely loop.
1247 case ISD::SPLAT_VECTOR: {
1248 SDValue Scl = Op.getOperand(0);
1249 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1250 KnownBits KnownScl;
1251 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1252 return true;
1253
1254 // Implicitly truncate the bits to match the official semantics of
1255 // SPLAT_VECTOR.
1256 Known = KnownScl.trunc(BitWidth);
1257 break;
1258 }
1259 case ISD::LOAD: {
1260 auto *LD = cast<LoadSDNode>(Op);
1261 if (getTargetConstantFromLoad(LD)) {
1262 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1263 return false; // Don't fall through, will infinitely loop.
1264 }
1265 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1266 // If this is a ZEXTLoad and we are looking at the loaded value.
1267 EVT MemVT = LD->getMemoryVT();
1268 unsigned MemBits = MemVT.getScalarSizeInBits();
1269 Known.Zero.setBitsFrom(MemBits);
1270 return false; // Don't fall through, will infinitely loop.
1271 }
1272 break;
1273 }
1275 if (VT.isScalableVector())
1276 return false;
1277 SDValue Vec = Op.getOperand(0);
1278 SDValue Scl = Op.getOperand(1);
1279 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1280 EVT VecVT = Vec.getValueType();
1281
1282 // If index isn't constant, assume we need all vector elements AND the
1283 // inserted element.
1284 APInt DemandedVecElts(DemandedElts);
1285 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1286 unsigned Idx = CIdx->getZExtValue();
1287 DemandedVecElts.clearBit(Idx);
1288
1289 // Inserted element is not required.
1290 if (!DemandedElts[Idx])
1291 return TLO.CombineTo(Op, Vec);
1292 }
1293
1294 KnownBits KnownScl;
1295 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1296 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1297 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1298 return true;
1299
1300 Known = KnownScl.anyextOrTrunc(BitWidth);
1301
1302 KnownBits KnownVec;
1303 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1304 Depth + 1))
1305 return true;
1306
1307 if (!!DemandedVecElts)
1308 Known = Known.intersectWith(KnownVec);
1309
1310 return false;
1311 }
1312 case ISD::INSERT_SUBVECTOR: {
1313 if (VT.isScalableVector())
1314 return false;
1315 // Demand any elements from the subvector and the remainder from the src its
1316 // inserted into.
1317 SDValue Src = Op.getOperand(0);
1318 SDValue Sub = Op.getOperand(1);
1319 uint64_t Idx = Op.getConstantOperandVal(2);
1320 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1321 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1322 APInt DemandedSrcElts = DemandedElts;
1323 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
1324
1325 KnownBits KnownSub, KnownSrc;
1326 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1327 Depth + 1))
1328 return true;
1329 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1330 Depth + 1))
1331 return true;
1332
1333 Known.setAllConflict();
1334 if (!!DemandedSubElts)
1335 Known = Known.intersectWith(KnownSub);
1336 if (!!DemandedSrcElts)
1337 Known = Known.intersectWith(KnownSrc);
1338
1339 // Attempt to avoid multi-use src if we don't need anything from it.
1340 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1341 !DemandedSrcElts.isAllOnes()) {
1343 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1345 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1346 if (NewSub || NewSrc) {
1347 NewSub = NewSub ? NewSub : Sub;
1348 NewSrc = NewSrc ? NewSrc : Src;
1349 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1350 Op.getOperand(2));
1351 return TLO.CombineTo(Op, NewOp);
1352 }
1353 }
1354 break;
1355 }
1357 if (VT.isScalableVector())
1358 return false;
1359 // Offset the demanded elts by the subvector index.
1360 SDValue Src = Op.getOperand(0);
1361 if (Src.getValueType().isScalableVector())
1362 break;
1363 uint64_t Idx = Op.getConstantOperandVal(1);
1364 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1365 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1366
1367 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1368 Depth + 1))
1369 return true;
1370
1371 // Attempt to avoid multi-use src if we don't need anything from it.
1372 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1374 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1375 if (DemandedSrc) {
1376 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1377 Op.getOperand(1));
1378 return TLO.CombineTo(Op, NewOp);
1379 }
1380 }
1381 break;
1382 }
1383 case ISD::CONCAT_VECTORS: {
1384 if (VT.isScalableVector())
1385 return false;
1386 Known.setAllConflict();
1387 EVT SubVT = Op.getOperand(0).getValueType();
1388 unsigned NumSubVecs = Op.getNumOperands();
1389 unsigned NumSubElts = SubVT.getVectorNumElements();
1390 for (unsigned i = 0; i != NumSubVecs; ++i) {
1391 APInt DemandedSubElts =
1392 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1393 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1394 Known2, TLO, Depth + 1))
1395 return true;
1396 // Known bits are shared by every demanded subvector element.
1397 if (!!DemandedSubElts)
1398 Known = Known.intersectWith(Known2);
1399 }
1400 break;
1401 }
1402 case ISD::VECTOR_SHUFFLE: {
1403 assert(!VT.isScalableVector());
1404 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1405
1406 // Collect demanded elements from shuffle operands..
1407 APInt DemandedLHS, DemandedRHS;
1408 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1409 DemandedRHS))
1410 break;
1411
1412 if (!!DemandedLHS || !!DemandedRHS) {
1413 SDValue Op0 = Op.getOperand(0);
1414 SDValue Op1 = Op.getOperand(1);
1415
1416 Known.setAllConflict();
1417 if (!!DemandedLHS) {
1418 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1419 Depth + 1))
1420 return true;
1421 Known = Known.intersectWith(Known2);
1422 }
1423 if (!!DemandedRHS) {
1424 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1425 Depth + 1))
1426 return true;
1427 Known = Known.intersectWith(Known2);
1428 }
1429
1430 // Attempt to avoid multi-use ops if we don't need anything from them.
1432 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1434 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1435 if (DemandedOp0 || DemandedOp1) {
1436 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1437 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1438 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1439 return TLO.CombineTo(Op, NewOp);
1440 }
1441 }
1442 break;
1443 }
1444 case ISD::AND: {
1445 SDValue Op0 = Op.getOperand(0);
1446 SDValue Op1 = Op.getOperand(1);
1447
1448 // If the RHS is a constant, check to see if the LHS would be zero without
1449 // using the bits from the RHS. Below, we use knowledge about the RHS to
1450 // simplify the LHS, here we're using information from the LHS to simplify
1451 // the RHS.
1452 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1453 // Do not increment Depth here; that can cause an infinite loop.
1454 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1455 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1456 if ((LHSKnown.Zero & DemandedBits) ==
1457 (~RHSC->getAPIntValue() & DemandedBits))
1458 return TLO.CombineTo(Op, Op0);
1459
1460 // If any of the set bits in the RHS are known zero on the LHS, shrink
1461 // the constant.
1462 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1463 DemandedElts, TLO))
1464 return true;
1465
1466 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1467 // constant, but if this 'and' is only clearing bits that were just set by
1468 // the xor, then this 'and' can be eliminated by shrinking the mask of
1469 // the xor. For example, for a 32-bit X:
1470 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1471 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1472 LHSKnown.One == ~RHSC->getAPIntValue()) {
1473 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1474 return TLO.CombineTo(Op, Xor);
1475 }
1476 }
1477
1478 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1479 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1480 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1481 (Op0.getOperand(0).isUndef() ||
1483 Op0->hasOneUse()) {
1484 unsigned NumSubElts =
1486 unsigned SubIdx = Op0.getConstantOperandVal(2);
1487 APInt DemandedSub =
1488 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1489 KnownBits KnownSubMask =
1490 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1491 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1492 SDValue NewAnd =
1493 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1494 SDValue NewInsert =
1495 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1496 Op0.getOperand(1), Op0.getOperand(2));
1497 return TLO.CombineTo(Op, NewInsert);
1498 }
1499 }
1500
1501 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1502 Depth + 1))
1503 return true;
1504 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1505 Known2, TLO, Depth + 1))
1506 return true;
1507
1508 // If all of the demanded bits are known one on one side, return the other.
1509 // These bits cannot contribute to the result of the 'and'.
1510 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1511 return TLO.CombineTo(Op, Op0);
1512 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1513 return TLO.CombineTo(Op, Op1);
1514 // If all of the demanded bits in the inputs are known zeros, return zero.
1515 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1516 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1517 // If the RHS is a constant, see if we can simplify it.
1518 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1519 TLO))
1520 return true;
1521 // If the operation can be done in a smaller type, do so.
1523 return true;
1524
1525 // Attempt to avoid multi-use ops if we don't need anything from them.
1526 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1528 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1530 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1531 if (DemandedOp0 || DemandedOp1) {
1532 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1533 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1534 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1535 return TLO.CombineTo(Op, NewOp);
1536 }
1537 }
1538
1539 Known &= Known2;
1540 break;
1541 }
1542 case ISD::OR: {
1543 SDValue Op0 = Op.getOperand(0);
1544 SDValue Op1 = Op.getOperand(1);
1545 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1546 Depth + 1)) {
1547 Op->dropFlags(SDNodeFlags::Disjoint);
1548 return true;
1549 }
1550
1551 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1552 Known2, TLO, Depth + 1)) {
1553 Op->dropFlags(SDNodeFlags::Disjoint);
1554 return true;
1555 }
1556
1557 // If all of the demanded bits are known zero on one side, return the other.
1558 // These bits cannot contribute to the result of the 'or'.
1559 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1560 return TLO.CombineTo(Op, Op0);
1561 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1562 return TLO.CombineTo(Op, Op1);
1563 // If the RHS is a constant, see if we can simplify it.
1564 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1565 return true;
1566 // If the operation can be done in a smaller type, do so.
1568 return true;
1569
1570 // Attempt to avoid multi-use ops if we don't need anything from them.
1571 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1573 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1575 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1576 if (DemandedOp0 || DemandedOp1) {
1577 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1578 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1579 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1580 return TLO.CombineTo(Op, NewOp);
1581 }
1582 }
1583
1584 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1585 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1586 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1587 Op0->hasOneUse() && Op1->hasOneUse()) {
1588 // Attempt to match all commutations - m_c_Or would've been useful!
1589 for (int I = 0; I != 2; ++I) {
1590 SDValue X = Op.getOperand(I).getOperand(0);
1591 SDValue C1 = Op.getOperand(I).getOperand(1);
1592 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1593 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1594 if (Alt.getOpcode() == ISD::OR) {
1595 for (int J = 0; J != 2; ++J) {
1596 if (X == Alt.getOperand(J)) {
1597 SDValue Y = Alt.getOperand(1 - J);
1598 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1599 {C1, C2})) {
1600 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1601 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1602 return TLO.CombineTo(
1603 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1604 }
1605 }
1606 }
1607 }
1608 }
1609 }
1610
1611 Known |= Known2;
1612 break;
1613 }
1614 case ISD::XOR: {
1615 SDValue Op0 = Op.getOperand(0);
1616 SDValue Op1 = Op.getOperand(1);
1617
1618 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1619 Depth + 1))
1620 return true;
1621 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1622 Depth + 1))
1623 return true;
1624
1625 // If all of the demanded bits are known zero on one side, return the other.
1626 // These bits cannot contribute to the result of the 'xor'.
1627 if (DemandedBits.isSubsetOf(Known.Zero))
1628 return TLO.CombineTo(Op, Op0);
1629 if (DemandedBits.isSubsetOf(Known2.Zero))
1630 return TLO.CombineTo(Op, Op1);
1631 // If the operation can be done in a smaller type, do so.
1633 return true;
1634
1635 // If all of the unknown bits are known to be zero on one side or the other
1636 // turn this into an *inclusive* or.
1637 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1638 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1639 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1640
1641 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1642 if (C) {
1643 // If one side is a constant, and all of the set bits in the constant are
1644 // also known set on the other side, turn this into an AND, as we know
1645 // the bits will be cleared.
1646 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1647 // NB: it is okay if more bits are known than are requested
1648 if (C->getAPIntValue() == Known2.One) {
1649 SDValue ANDC =
1650 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1651 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1652 }
1653
1654 // If the RHS is a constant, see if we can change it. Don't alter a -1
1655 // constant because that's a 'not' op, and that is better for combining
1656 // and codegen.
1657 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1658 // We're flipping all demanded bits. Flip the undemanded bits too.
1659 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1660 return TLO.CombineTo(Op, New);
1661 }
1662
1663 unsigned Op0Opcode = Op0.getOpcode();
1664 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1665 if (ConstantSDNode *ShiftC =
1666 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1667 // Don't crash on an oversized shift. We can not guarantee that a
1668 // bogus shift has been simplified to undef.
1669 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1670 uint64_t ShiftAmt = ShiftC->getZExtValue();
1672 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1673 : Ones.lshr(ShiftAmt);
1674 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1676 // If the xor constant is a demanded mask, do a 'not' before the
1677 // shift:
1678 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1679 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1680 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1681 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1682 Op0.getOperand(1)));
1683 }
1684 }
1685 }
1686 }
1687 }
1688
1689 // If we can't turn this into a 'not', try to shrink the constant.
1690 if (!C || !C->isAllOnes())
1691 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1692 return true;
1693
1694 // Attempt to avoid multi-use ops if we don't need anything from them.
1695 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1697 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1699 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1700 if (DemandedOp0 || DemandedOp1) {
1701 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1702 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1703 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1704 return TLO.CombineTo(Op, NewOp);
1705 }
1706 }
1707
1708 Known ^= Known2;
1709 break;
1710 }
1711 case ISD::SELECT:
1712 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1713 Known, TLO, Depth + 1))
1714 return true;
1715 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1716 Known2, TLO, Depth + 1))
1717 return true;
1718
1719 // If the operands are constants, see if we can simplify them.
1720 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1721 return true;
1722
1723 // Only known if known in both the LHS and RHS.
1724 Known = Known.intersectWith(Known2);
1725 break;
1726 case ISD::VSELECT:
1727 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1728 Known, TLO, Depth + 1))
1729 return true;
1730 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1731 Known2, TLO, Depth + 1))
1732 return true;
1733
1734 // Only known if known in both the LHS and RHS.
1735 Known = Known.intersectWith(Known2);
1736 break;
1737 case ISD::SELECT_CC:
1738 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1739 Known, TLO, Depth + 1))
1740 return true;
1741 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1742 Known2, TLO, Depth + 1))
1743 return true;
1744
1745 // If the operands are constants, see if we can simplify them.
1746 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1747 return true;
1748
1749 // Only known if known in both the LHS and RHS.
1750 Known = Known.intersectWith(Known2);
1751 break;
1752 case ISD::SETCC: {
1753 SDValue Op0 = Op.getOperand(0);
1754 SDValue Op1 = Op.getOperand(1);
1755 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1756 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1757 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1758 // -1, we may be able to bypass the setcc.
1759 if (DemandedBits.isSignMask() &&
1763 // If we're testing X < 0, then this compare isn't needed - just use X!
1764 // FIXME: We're limiting to integer types here, but this should also work
1765 // if we don't care about FP signed-zero. The use of SETLT with FP means
1766 // that we don't care about NaNs.
1767 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1769 return TLO.CombineTo(Op, Op0);
1770
1771 // TODO: Should we check for other forms of sign-bit comparisons?
1772 // Examples: X <= -1, X >= 0
1773 }
1774 if (getBooleanContents(Op0.getValueType()) ==
1776 BitWidth > 1)
1777 Known.Zero.setBitsFrom(1);
1778 break;
1779 }
1780 case ISD::SHL: {
1781 SDValue Op0 = Op.getOperand(0);
1782 SDValue Op1 = Op.getOperand(1);
1783 EVT ShiftVT = Op1.getValueType();
1784
1785 if (std::optional<unsigned> KnownSA =
1786 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1787 unsigned ShAmt = *KnownSA;
1788 if (ShAmt == 0)
1789 return TLO.CombineTo(Op, Op0);
1790
1791 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1792 // single shift. We can do this if the bottom bits (which are shifted
1793 // out) are never demanded.
1794 // TODO - support non-uniform vector amounts.
1795 if (Op0.getOpcode() == ISD::SRL) {
1796 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1797 if (std::optional<unsigned> InnerSA =
1798 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1799 unsigned C1 = *InnerSA;
1800 unsigned Opc = ISD::SHL;
1801 int Diff = ShAmt - C1;
1802 if (Diff < 0) {
1803 Diff = -Diff;
1804 Opc = ISD::SRL;
1805 }
1806 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1807 return TLO.CombineTo(
1808 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1809 }
1810 }
1811 }
1812
1813 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1814 // are not demanded. This will likely allow the anyext to be folded away.
1815 // TODO - support non-uniform vector amounts.
1816 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1817 SDValue InnerOp = Op0.getOperand(0);
1818 EVT InnerVT = InnerOp.getValueType();
1819 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1820 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1821 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1822 SDValue NarrowShl = TLO.DAG.getNode(
1823 ISD::SHL, dl, InnerVT, InnerOp,
1824 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1825 return TLO.CombineTo(
1826 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1827 }
1828
1829 // Repeat the SHL optimization above in cases where an extension
1830 // intervenes: (shl (anyext (shr x, c1)), c2) to
1831 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1832 // aren't demanded (as above) and that the shifted upper c1 bits of
1833 // x aren't demanded.
1834 // TODO - support non-uniform vector amounts.
1835 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1836 InnerOp.hasOneUse()) {
1837 if (std::optional<unsigned> SA2 = TLO.DAG.getValidShiftAmount(
1838 InnerOp, DemandedElts, Depth + 2)) {
1839 unsigned InnerShAmt = *SA2;
1840 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1841 DemandedBits.getActiveBits() <=
1842 (InnerBits - InnerShAmt + ShAmt) &&
1843 DemandedBits.countr_zero() >= ShAmt) {
1844 SDValue NewSA =
1845 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1846 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1847 InnerOp.getOperand(0));
1848 return TLO.CombineTo(
1849 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1850 }
1851 }
1852 }
1853 }
1854
1855 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1856 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1857 Depth + 1)) {
1858 // Disable the nsw and nuw flags. We can no longer guarantee that we
1859 // won't wrap after simplification.
1860 Op->dropFlags(SDNodeFlags::NoWrap);
1861 return true;
1862 }
1863 Known <<= ShAmt;
1864 // low bits known zero.
1865 Known.Zero.setLowBits(ShAmt);
1866
1867 // Attempt to avoid multi-use ops if we don't need anything from them.
1868 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1870 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1871 if (DemandedOp0) {
1872 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1873 return TLO.CombineTo(Op, NewOp);
1874 }
1875 }
1876
1877 // TODO: Can we merge this fold with the one below?
1878 // Try shrinking the operation as long as the shift amount will still be
1879 // in range.
1880 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1881 Op.getNode()->hasOneUse()) {
1882 // Search for the smallest integer type with free casts to and from
1883 // Op's type. For expedience, just check power-of-2 integer types.
1884 unsigned DemandedSize = DemandedBits.getActiveBits();
1885 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1886 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1887 EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1888 if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&
1889 isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1890 isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1891 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1892 assert(DemandedSize <= SmallVTBits &&
1893 "Narrowed below demanded bits?");
1894 // We found a type with free casts.
1895 SDValue NarrowShl = TLO.DAG.getNode(
1896 ISD::SHL, dl, SmallVT,
1897 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1898 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1899 return TLO.CombineTo(
1900 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1901 }
1902 }
1903 }
1904
1905 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1906 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1907 // Only do this if we demand the upper half so the knownbits are correct.
1908 unsigned HalfWidth = BitWidth / 2;
1909 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1910 DemandedBits.countLeadingOnes() >= HalfWidth) {
1911 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1912 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1913 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1914 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1915 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1916 // If we're demanding the upper bits at all, we must ensure
1917 // that the upper bits of the shift result are known to be zero,
1918 // which is equivalent to the narrow shift being NUW.
1919 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1920 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1921 SDNodeFlags Flags;
1922 Flags.setNoSignedWrap(IsNSW);
1923 Flags.setNoUnsignedWrap(IsNUW);
1924 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1925 SDValue NewShiftAmt =
1926 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1927 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1928 NewShiftAmt, Flags);
1929 SDValue NewExt =
1930 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1931 return TLO.CombineTo(Op, NewExt);
1932 }
1933 }
1934 }
1935 } else {
1936 // This is a variable shift, so we can't shift the demand mask by a known
1937 // amount. But if we are not demanding high bits, then we are not
1938 // demanding those bits from the pre-shifted operand either.
1939 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1940 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1941 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1942 Depth + 1)) {
1943 // Disable the nsw and nuw flags. We can no longer guarantee that we
1944 // won't wrap after simplification.
1945 Op->dropFlags(SDNodeFlags::NoWrap);
1946 return true;
1947 }
1948 Known.resetAll();
1949 }
1950 }
1951
1952 // If we are only demanding sign bits then we can use the shift source
1953 // directly.
1954 if (std::optional<unsigned> MaxSA =
1955 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1956 unsigned ShAmt = *MaxSA;
1957 unsigned NumSignBits =
1958 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1959 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1960 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1961 return TLO.CombineTo(Op, Op0);
1962 }
1963 break;
1964 }
1965 case ISD::SRL: {
1966 SDValue Op0 = Op.getOperand(0);
1967 SDValue Op1 = Op.getOperand(1);
1968 EVT ShiftVT = Op1.getValueType();
1969
1970 if (std::optional<unsigned> KnownSA =
1971 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1972 unsigned ShAmt = *KnownSA;
1973 if (ShAmt == 0)
1974 return TLO.CombineTo(Op, Op0);
1975
1976 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1977 // single shift. We can do this if the top bits (which are shifted out)
1978 // are never demanded.
1979 // TODO - support non-uniform vector amounts.
1980 if (Op0.getOpcode() == ISD::SHL) {
1981 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1982 if (std::optional<unsigned> InnerSA =
1983 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1984 unsigned C1 = *InnerSA;
1985 unsigned Opc = ISD::SRL;
1986 int Diff = ShAmt - C1;
1987 if (Diff < 0) {
1988 Diff = -Diff;
1989 Opc = ISD::SHL;
1990 }
1991 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1992 return TLO.CombineTo(
1993 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1994 }
1995 }
1996 }
1997
1998 // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
1999 // single sra. We can do this if the top bits are never demanded.
2000 if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
2001 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
2002 if (std::optional<unsigned> InnerSA =
2003 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2004 unsigned C1 = *InnerSA;
2005 // Clamp the combined shift amount if it exceeds the bit width.
2006 unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
2007 SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
2008 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
2009 Op0.getOperand(0), NewSA));
2010 }
2011 }
2012 }
2013
2014 APInt InDemandedMask = (DemandedBits << ShAmt);
2015
2016 // If the shift is exact, then it does demand the low bits (and knows that
2017 // they are zero).
2018 if (Op->getFlags().hasExact())
2019 InDemandedMask.setLowBits(ShAmt);
2020
2021 // Narrow shift to lower half - similar to ShrinkDemandedOp.
2022 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2023 if ((BitWidth % 2) == 0 && !VT.isVector()) {
2025 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
2026 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
2027 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
2028 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
2029 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
2030 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2031 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
2032 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
2033 SDValue NewShiftAmt =
2034 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
2035 SDValue NewShift =
2036 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
2037 return TLO.CombineTo(
2038 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
2039 }
2040 }
2041
2042 // Compute the new bits that are at the top now.
2043 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2044 Depth + 1))
2045 return true;
2046 Known >>= ShAmt;
2047 // High bits known zero.
2048 Known.Zero.setHighBits(ShAmt);
2049
2050 // Attempt to avoid multi-use ops if we don't need anything from them.
2051 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2053 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2054 if (DemandedOp0) {
2055 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2056 return TLO.CombineTo(Op, NewOp);
2057 }
2058 }
2059 } else {
2060 // Use generic knownbits computation as it has support for non-uniform
2061 // shift amounts.
2062 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2063 }
2064
2065 // If we are only demanding sign bits then we can use the shift source
2066 // directly.
2067 if (std::optional<unsigned> MaxSA =
2068 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
2069 unsigned ShAmt = *MaxSA;
2070 // Must already be signbits in DemandedBits bounds, and can't demand any
2071 // shifted in zeroes.
2072 if (DemandedBits.countl_zero() >= ShAmt) {
2073 unsigned NumSignBits =
2074 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
2075 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2076 return TLO.CombineTo(Op, Op0);
2077 }
2078 }
2079
2080 // Try to match AVG patterns (after shift simplification).
2081 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2082 DemandedElts, Depth + 1))
2083 return TLO.CombineTo(Op, AVG);
2084
2085 break;
2086 }
2087 case ISD::SRA: {
2088 SDValue Op0 = Op.getOperand(0);
2089 SDValue Op1 = Op.getOperand(1);
2090 EVT ShiftVT = Op1.getValueType();
2091
2092 // If we only want bits that already match the signbit then we don't need
2093 // to shift.
2094 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2095 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2096 NumHiDemandedBits)
2097 return TLO.CombineTo(Op, Op0);
2098
2099 // If this is an arithmetic shift right and only the low-bit is set, we can
2100 // always convert this into a logical shr, even if the shift amount is
2101 // variable. The low bit of the shift cannot be an input sign bit unless
2102 // the shift amount is >= the size of the datatype, which is undefined.
2103 if (DemandedBits.isOne())
2104 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2105
2106 if (std::optional<unsigned> KnownSA =
2107 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2108 unsigned ShAmt = *KnownSA;
2109 if (ShAmt == 0)
2110 return TLO.CombineTo(Op, Op0);
2111
2112 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2113 // supports sext_inreg.
2114 if (Op0.getOpcode() == ISD::SHL) {
2115 if (std::optional<unsigned> InnerSA =
2116 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2117 unsigned LowBits = BitWidth - ShAmt;
2118 EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
2119 if (VT.isVector())
2120 ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
2122
2123 if (*InnerSA == ShAmt) {
2124 if (!TLO.LegalOperations() ||
2126 return TLO.CombineTo(
2127 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2128 Op0.getOperand(0),
2129 TLO.DAG.getValueType(ExtVT)));
2130
2131 // Even if we can't convert to sext_inreg, we might be able to
2132 // remove this shift pair if the input is already sign extended.
2133 unsigned NumSignBits =
2134 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2135 if (NumSignBits > ShAmt)
2136 return TLO.CombineTo(Op, Op0.getOperand(0));
2137 }
2138 }
2139 }
2140
2141 APInt InDemandedMask = (DemandedBits << ShAmt);
2142
2143 // If the shift is exact, then it does demand the low bits (and knows that
2144 // they are zero).
2145 if (Op->getFlags().hasExact())
2146 InDemandedMask.setLowBits(ShAmt);
2147
2148 // If any of the demanded bits are produced by the sign extension, we also
2149 // demand the input sign bit.
2150 if (DemandedBits.countl_zero() < ShAmt)
2151 InDemandedMask.setSignBit();
2152
2153 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2154 Depth + 1))
2155 return true;
2156 Known >>= ShAmt;
2157
2158 // If the input sign bit is known to be zero, or if none of the top bits
2159 // are demanded, turn this into an unsigned shift right.
2160 if (Known.Zero[BitWidth - ShAmt - 1] ||
2161 DemandedBits.countl_zero() >= ShAmt) {
2162 SDNodeFlags Flags;
2163 Flags.setExact(Op->getFlags().hasExact());
2164 return TLO.CombineTo(
2165 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2166 }
2167
2168 int Log2 = DemandedBits.exactLogBase2();
2169 if (Log2 >= 0) {
2170 // The bit must come from the sign.
2171 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2172 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2173 }
2174
2175 if (Known.One[BitWidth - ShAmt - 1])
2176 // New bits are known one.
2177 Known.One.setHighBits(ShAmt);
2178
2179 // Attempt to avoid multi-use ops if we don't need anything from them.
2180 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2182 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2183 if (DemandedOp0) {
2184 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2185 return TLO.CombineTo(Op, NewOp);
2186 }
2187 }
2188 }
2189
2190 // Try to match AVG patterns (after shift simplification).
2191 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2192 DemandedElts, Depth + 1))
2193 return TLO.CombineTo(Op, AVG);
2194
2195 break;
2196 }
2197 case ISD::FSHL:
2198 case ISD::FSHR: {
2199 SDValue Op0 = Op.getOperand(0);
2200 SDValue Op1 = Op.getOperand(1);
2201 SDValue Op2 = Op.getOperand(2);
2202 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2203
2204 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2205 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2206
2207 // For fshl, 0-shift returns the 1st arg.
2208 // For fshr, 0-shift returns the 2nd arg.
2209 if (Amt == 0) {
2210 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2211 Known, TLO, Depth + 1))
2212 return true;
2213 break;
2214 }
2215
2216 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2217 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2218 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2219 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2220 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2221 Depth + 1))
2222 return true;
2223 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2224 Depth + 1))
2225 return true;
2226
2227 Known2 <<= (IsFSHL ? Amt : (BitWidth - Amt));
2228 Known >>= (IsFSHL ? (BitWidth - Amt) : Amt);
2229 Known = Known.unionWith(Known2);
2230
2231 // Attempt to avoid multi-use ops if we don't need anything from them.
2232 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2233 !DemandedElts.isAllOnes()) {
2235 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2237 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2238 if (DemandedOp0 || DemandedOp1) {
2239 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2240 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2241 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2242 DemandedOp1, Op2);
2243 return TLO.CombineTo(Op, NewOp);
2244 }
2245 }
2246 }
2247
2248 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2249 if (isPowerOf2_32(BitWidth)) {
2250 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2251 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2252 Known2, TLO, Depth + 1))
2253 return true;
2254 }
2255 break;
2256 }
2257 case ISD::ROTL:
2258 case ISD::ROTR: {
2259 SDValue Op0 = Op.getOperand(0);
2260 SDValue Op1 = Op.getOperand(1);
2261 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2262
2263 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2264 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2265 return TLO.CombineTo(Op, Op0);
2266
2267 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2268 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2269 unsigned RevAmt = BitWidth - Amt;
2270
2271 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2272 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2273 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2274 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2275 Depth + 1))
2276 return true;
2277
2278 // rot*(x, 0) --> x
2279 if (Amt == 0)
2280 return TLO.CombineTo(Op, Op0);
2281
2282 // See if we don't demand either half of the rotated bits.
2283 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2284 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2285 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2286 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2287 }
2288 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2289 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2290 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2291 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2292 }
2293 }
2294
2295 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2296 if (isPowerOf2_32(BitWidth)) {
2297 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2298 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2299 Depth + 1))
2300 return true;
2301 }
2302 break;
2303 }
2304 case ISD::SMIN:
2305 case ISD::SMAX:
2306 case ISD::UMIN:
2307 case ISD::UMAX: {
2308 unsigned Opc = Op.getOpcode();
2309 SDValue Op0 = Op.getOperand(0);
2310 SDValue Op1 = Op.getOperand(1);
2311
2312 // If we're only demanding signbits, then we can simplify to OR/AND node.
2313 unsigned BitOp =
2314 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2315 unsigned NumSignBits =
2316 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2317 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2318 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2319 if (NumSignBits >= NumDemandedUpperBits)
2320 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2321
2322 // Check if one arg is always less/greater than (or equal) to the other arg.
2323 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2324 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2325 switch (Opc) {
2326 case ISD::SMIN:
2327 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2328 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2329 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2330 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2331 Known = KnownBits::smin(Known0, Known1);
2332 break;
2333 case ISD::SMAX:
2334 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2335 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2336 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2337 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2338 Known = KnownBits::smax(Known0, Known1);
2339 break;
2340 case ISD::UMIN:
2341 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2342 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2343 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2344 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2345 Known = KnownBits::umin(Known0, Known1);
2346 break;
2347 case ISD::UMAX:
2348 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2349 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2350 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2351 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2352 Known = KnownBits::umax(Known0, Known1);
2353 break;
2354 }
2355 break;
2356 }
2357 case ISD::BITREVERSE: {
2358 SDValue Src = Op.getOperand(0);
2359 APInt DemandedSrcBits = DemandedBits.reverseBits();
2360 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2361 Depth + 1))
2362 return true;
2363 Known = Known2.reverseBits();
2364 break;
2365 }
2366 case ISD::BSWAP: {
2367 SDValue Src = Op.getOperand(0);
2368
2369 // If the only bits demanded come from one byte of the bswap result,
2370 // just shift the input byte into position to eliminate the bswap.
2371 unsigned NLZ = DemandedBits.countl_zero();
2372 unsigned NTZ = DemandedBits.countr_zero();
2373
2374 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2375 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2376 // have 14 leading zeros, round to 8.
2377 NLZ = alignDown(NLZ, 8);
2378 NTZ = alignDown(NTZ, 8);
2379 // If we need exactly one byte, we can do this transformation.
2380 if (BitWidth - NLZ - NTZ == 8) {
2381 // Replace this with either a left or right shift to get the byte into
2382 // the right place.
2383 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2384 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2385 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2386 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2387 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2388 return TLO.CombineTo(Op, NewOp);
2389 }
2390 }
2391
2392 APInt DemandedSrcBits = DemandedBits.byteSwap();
2393 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2394 Depth + 1))
2395 return true;
2396 Known = Known2.byteSwap();
2397 break;
2398 }
2399 case ISD::CTPOP: {
2400 // If only 1 bit is demanded, replace with PARITY as long as we're before
2401 // op legalization.
2402 // FIXME: Limit to scalars for now.
2403 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2404 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2405 Op.getOperand(0)));
2406
2407 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2408 break;
2409 }
2411 SDValue Op0 = Op.getOperand(0);
2412 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2413 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2414
2415 // If we only care about the highest bit, don't bother shifting right.
2416 if (DemandedBits.isSignMask()) {
2417 unsigned MinSignedBits =
2418 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2419 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2420 // However if the input is already sign extended we expect the sign
2421 // extension to be dropped altogether later and do not simplify.
2422 if (!AlreadySignExtended) {
2423 // Compute the correct shift amount type, which must be getShiftAmountTy
2424 // for scalar types after legalization.
2425 SDValue ShiftAmt =
2426 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2427 return TLO.CombineTo(Op,
2428 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2429 }
2430 }
2431
2432 // If none of the extended bits are demanded, eliminate the sextinreg.
2433 if (DemandedBits.getActiveBits() <= ExVTBits)
2434 return TLO.CombineTo(Op, Op0);
2435
2436 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2437
2438 // Since the sign extended bits are demanded, we know that the sign
2439 // bit is demanded.
2440 InputDemandedBits.setBit(ExVTBits - 1);
2441
2442 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2443 Depth + 1))
2444 return true;
2445
2446 // If the sign bit of the input is known set or clear, then we know the
2447 // top bits of the result.
2448
2449 // If the input sign bit is known zero, convert this into a zero extension.
2450 if (Known.Zero[ExVTBits - 1])
2451 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2452
2453 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2454 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2455 Known.One.setBitsFrom(ExVTBits);
2456 Known.Zero &= Mask;
2457 } else { // Input sign bit unknown
2458 Known.Zero &= Mask;
2459 Known.One &= Mask;
2460 }
2461 break;
2462 }
2463 case ISD::BUILD_PAIR: {
2464 EVT HalfVT = Op.getOperand(0).getValueType();
2465 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2466
2467 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2468 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2469
2470 KnownBits KnownLo, KnownHi;
2471
2472 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2473 return true;
2474
2475 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2476 return true;
2477
2478 Known = KnownHi.concat(KnownLo);
2479 break;
2480 }
2482 if (VT.isScalableVector())
2483 return false;
2484 [[fallthrough]];
2485 case ISD::ZERO_EXTEND: {
2486 SDValue Src = Op.getOperand(0);
2487 EVT SrcVT = Src.getValueType();
2488 unsigned InBits = SrcVT.getScalarSizeInBits();
2489 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2490 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2491
2492 // If none of the top bits are demanded, convert this into an any_extend.
2493 if (DemandedBits.getActiveBits() <= InBits) {
2494 // If we only need the non-extended bits of the bottom element
2495 // then we can just bitcast to the result.
2496 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2497 VT.getSizeInBits() == SrcVT.getSizeInBits())
2498 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2499
2500 unsigned Opc =
2502 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2503 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2504 }
2505
2506 APInt InDemandedBits = DemandedBits.trunc(InBits);
2507 APInt InDemandedElts = DemandedElts.zext(InElts);
2508 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2509 Depth + 1)) {
2510 Op->dropFlags(SDNodeFlags::NonNeg);
2511 return true;
2512 }
2513 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2514 Known = Known.zext(BitWidth);
2515
2516 // Attempt to avoid multi-use ops if we don't need anything from them.
2518 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2519 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2520 break;
2521 }
2523 if (VT.isScalableVector())
2524 return false;
2525 [[fallthrough]];
2526 case ISD::SIGN_EXTEND: {
2527 SDValue Src = Op.getOperand(0);
2528 EVT SrcVT = Src.getValueType();
2529 unsigned InBits = SrcVT.getScalarSizeInBits();
2530 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2531 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2532
2533 APInt InDemandedElts = DemandedElts.zext(InElts);
2534 APInt InDemandedBits = DemandedBits.trunc(InBits);
2535
2536 // Since some of the sign extended bits are demanded, we know that the sign
2537 // bit is demanded.
2538 InDemandedBits.setBit(InBits - 1);
2539
2540 // If none of the top bits are demanded, convert this into an any_extend.
2541 if (DemandedBits.getActiveBits() <= InBits) {
2542 // If we only need the non-extended bits of the bottom element
2543 // then we can just bitcast to the result.
2544 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2545 VT.getSizeInBits() == SrcVT.getSizeInBits())
2546 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2547
2548 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2550 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2551 InBits) {
2552 unsigned Opc =
2554 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2555 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2556 }
2557 }
2558
2559 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2560 Depth + 1))
2561 return true;
2562 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2563
2564 // If the sign bit is known one, the top bits match.
2565 Known = Known.sext(BitWidth);
2566
2567 // If the sign bit is known zero, convert this to a zero extend.
2568 if (Known.isNonNegative()) {
2569 unsigned Opc =
2571 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2572 SDNodeFlags Flags;
2573 if (!IsVecInReg)
2574 Flags |= SDNodeFlags::NonNeg;
2575 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2576 }
2577 }
2578
2579 // Attempt to avoid multi-use ops if we don't need anything from them.
2581 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2582 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2583 break;
2584 }
2586 if (VT.isScalableVector())
2587 return false;
2588 [[fallthrough]];
2589 case ISD::ANY_EXTEND: {
2590 SDValue Src = Op.getOperand(0);
2591 EVT SrcVT = Src.getValueType();
2592 unsigned InBits = SrcVT.getScalarSizeInBits();
2593 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2594 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2595
2596 // If we only need the bottom element then we can just bitcast.
2597 // TODO: Handle ANY_EXTEND?
2598 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2599 VT.getSizeInBits() == SrcVT.getSizeInBits())
2600 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2601
2602 APInt InDemandedBits = DemandedBits.trunc(InBits);
2603 APInt InDemandedElts = DemandedElts.zext(InElts);
2604 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2605 Depth + 1))
2606 return true;
2607 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2608 Known = Known.anyext(BitWidth);
2609
2610 // Attempt to avoid multi-use ops if we don't need anything from them.
2612 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2613 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2614 break;
2615 }
2616 case ISD::TRUNCATE: {
2617 SDValue Src = Op.getOperand(0);
2618
2619 // Simplify the input, using demanded bit information, and compute the known
2620 // zero/one bits live out.
2621 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2622 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2623 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2624 Depth + 1)) {
2625 // Disable the nsw and nuw flags. We can no longer guarantee that we
2626 // won't wrap after simplification.
2627 Op->dropFlags(SDNodeFlags::NoWrap);
2628 return true;
2629 }
2630 Known = Known.trunc(BitWidth);
2631
2632 // Attempt to avoid multi-use ops if we don't need anything from them.
2634 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2635 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2636
2637 // If the input is only used by this truncate, see if we can shrink it based
2638 // on the known demanded bits.
2639 switch (Src.getOpcode()) {
2640 default:
2641 break;
2642 case ISD::SRL:
2643 // Shrink SRL by a constant if none of the high bits shifted in are
2644 // demanded.
2645 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2646 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2647 // undesirable.
2648 break;
2649
2650 if (Src.getNode()->hasOneUse()) {
2651 if (isTruncateFree(Src, VT) &&
2652 !isTruncateFree(Src.getValueType(), VT)) {
2653 // If truncate is only free at trunc(srl), do not turn it into
2654 // srl(trunc). The check is done by first check the truncate is free
2655 // at Src's opcode(srl), then check the truncate is not done by
2656 // referencing sub-register. In test, if both trunc(srl) and
2657 // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2658 // trunc(srl)'s trunc is free, trunc(srl) is better.
2659 break;
2660 }
2661
2662 std::optional<unsigned> ShAmtC =
2663 TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2664 if (!ShAmtC || *ShAmtC >= BitWidth)
2665 break;
2666 unsigned ShVal = *ShAmtC;
2667
2668 APInt HighBits =
2669 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2670 HighBits.lshrInPlace(ShVal);
2671 HighBits = HighBits.trunc(BitWidth);
2672 if (!(HighBits & DemandedBits)) {
2673 // None of the shifted in bits are needed. Add a truncate of the
2674 // shift input, then shift it.
2675 SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2676 SDValue NewTrunc =
2677 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2678 return TLO.CombineTo(
2679 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2680 }
2681 }
2682 break;
2683 }
2684
2685 break;
2686 }
2687 case ISD::AssertZext: {
2688 // AssertZext demands all of the high bits, plus any of the low bits
2689 // demanded by its users.
2690 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2692 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2693 TLO, Depth + 1))
2694 return true;
2695
2696 Known.Zero |= ~InMask;
2697 Known.One &= (~Known.Zero);
2698 break;
2699 }
2701 SDValue Src = Op.getOperand(0);
2702 SDValue Idx = Op.getOperand(1);
2703 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2704 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2705
2706 if (SrcEltCnt.isScalable())
2707 return false;
2708
2709 // Demand the bits from every vector element without a constant index.
2710 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2711 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2712 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2713 if (CIdx->getAPIntValue().ult(NumSrcElts))
2714 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2715
2716 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2717 // anything about the extended bits.
2718 APInt DemandedSrcBits = DemandedBits;
2719 if (BitWidth > EltBitWidth)
2720 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2721
2722 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2723 Depth + 1))
2724 return true;
2725
2726 // Attempt to avoid multi-use ops if we don't need anything from them.
2727 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2728 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2729 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2730 SDValue NewOp =
2731 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2732 return TLO.CombineTo(Op, NewOp);
2733 }
2734 }
2735
2736 Known = Known2;
2737 if (BitWidth > EltBitWidth)
2738 Known = Known.anyext(BitWidth);
2739 break;
2740 }
2741 case ISD::BITCAST: {
2742 if (VT.isScalableVector())
2743 return false;
2744 SDValue Src = Op.getOperand(0);
2745 EVT SrcVT = Src.getValueType();
2746 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2747
2748 // If this is an FP->Int bitcast and if the sign bit is the only
2749 // thing demanded, turn this into a FGETSIGN.
2750 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2751 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2752 SrcVT.isFloatingPoint()) {
2753 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2754 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2755 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2756 SrcVT != MVT::f128) {
2757 // Cannot eliminate/lower SHL for f128 yet.
2758 EVT Ty = OpVTLegal ? VT : MVT::i32;
2759 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2760 // place. We expect the SHL to be eliminated by other optimizations.
2761 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2762 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2763 if (!OpVTLegal && OpVTSizeInBits > 32)
2764 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2765 unsigned ShVal = Op.getValueSizeInBits() - 1;
2766 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2767 return TLO.CombineTo(Op,
2768 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2769 }
2770 }
2771
2772 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2773 // Demand the elt/bit if any of the original elts/bits are demanded.
2774 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2775 unsigned Scale = BitWidth / NumSrcEltBits;
2776 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2777 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2778 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2779 for (unsigned i = 0; i != Scale; ++i) {
2780 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2781 unsigned BitOffset = EltOffset * NumSrcEltBits;
2782 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2783 if (!Sub.isZero()) {
2784 DemandedSrcBits |= Sub;
2785 for (unsigned j = 0; j != NumElts; ++j)
2786 if (DemandedElts[j])
2787 DemandedSrcElts.setBit((j * Scale) + i);
2788 }
2789 }
2790
2791 APInt KnownSrcUndef, KnownSrcZero;
2792 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2793 KnownSrcZero, TLO, Depth + 1))
2794 return true;
2795
2796 KnownBits KnownSrcBits;
2797 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2798 KnownSrcBits, TLO, Depth + 1))
2799 return true;
2800 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2801 // TODO - bigendian once we have test coverage.
2802 unsigned Scale = NumSrcEltBits / BitWidth;
2803 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2804 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2805 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2806 for (unsigned i = 0; i != NumElts; ++i)
2807 if (DemandedElts[i]) {
2808 unsigned Offset = (i % Scale) * BitWidth;
2809 DemandedSrcBits.insertBits(DemandedBits, Offset);
2810 DemandedSrcElts.setBit(i / Scale);
2811 }
2812
2813 if (SrcVT.isVector()) {
2814 APInt KnownSrcUndef, KnownSrcZero;
2815 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2816 KnownSrcZero, TLO, Depth + 1))
2817 return true;
2818 }
2819
2820 KnownBits KnownSrcBits;
2821 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2822 KnownSrcBits, TLO, Depth + 1))
2823 return true;
2824
2825 // Attempt to avoid multi-use ops if we don't need anything from them.
2826 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2827 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2828 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2829 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2830 return TLO.CombineTo(Op, NewOp);
2831 }
2832 }
2833 }
2834
2835 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2836 // recursive call where Known may be useful to the caller.
2837 if (Depth > 0) {
2838 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2839 return false;
2840 }
2841 break;
2842 }
2843 case ISD::MUL:
2844 if (DemandedBits.isPowerOf2()) {
2845 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2846 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2847 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2848 unsigned CTZ = DemandedBits.countr_zero();
2849 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2850 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2851 SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2852 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2853 return TLO.CombineTo(Op, Shl);
2854 }
2855 }
2856 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2857 // X * X is odd iff X is odd.
2858 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2859 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2860 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2861 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2862 return TLO.CombineTo(Op, And1);
2863 }
2864 [[fallthrough]];
2865 case ISD::PTRADD:
2866 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
2867 break;
2868 // PTRADD behaves like ADD if pointers are represented as integers.
2869 [[fallthrough]];
2870 case ISD::ADD:
2871 case ISD::SUB: {
2872 // Add, Sub, and Mul don't demand any bits in positions beyond that
2873 // of the highest bit demanded of them.
2874 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2875 SDNodeFlags Flags = Op.getNode()->getFlags();
2876 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2877 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2878 KnownBits KnownOp0, KnownOp1;
2879 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2880 const KnownBits &KnownRHS) {
2881 if (Op.getOpcode() == ISD::MUL)
2882 Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2883 return Demanded;
2884 };
2885 if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2886 Depth + 1) ||
2887 SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2888 DemandedElts, KnownOp0, TLO, Depth + 1) ||
2889 // See if the operation should be performed at a smaller bit width.
2891 // Disable the nsw and nuw flags. We can no longer guarantee that we
2892 // won't wrap after simplification.
2893 Op->dropFlags(SDNodeFlags::NoWrap);
2894 return true;
2895 }
2896
2897 // neg x with only low bit demanded is simply x.
2898 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2899 isNullConstant(Op0))
2900 return TLO.CombineTo(Op, Op1);
2901
2902 // Attempt to avoid multi-use ops if we don't need anything from them.
2903 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2905 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2907 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2908 if (DemandedOp0 || DemandedOp1) {
2909 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2910 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2911 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
2912 Flags & ~SDNodeFlags::NoWrap);
2913 return TLO.CombineTo(Op, NewOp);
2914 }
2915 }
2916
2917 // If we have a constant operand, we may be able to turn it into -1 if we
2918 // do not demand the high bits. This can make the constant smaller to
2919 // encode, allow more general folding, or match specialized instruction
2920 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2921 // is probably not useful (and could be detrimental).
2923 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2924 if (C && !C->isAllOnes() && !C->isOne() &&
2925 (C->getAPIntValue() | HighMask).isAllOnes()) {
2926 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2927 // Disable the nsw and nuw flags. We can no longer guarantee that we
2928 // won't wrap after simplification.
2929 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1,
2930 Flags & ~SDNodeFlags::NoWrap);
2931 return TLO.CombineTo(Op, NewOp);
2932 }
2933
2934 // Match a multiply with a disguised negated-power-of-2 and convert to a
2935 // an equivalent shift-left amount.
2936 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2937 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2938 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2939 return 0;
2940
2941 // Don't touch opaque constants. Also, ignore zero and power-of-2
2942 // multiplies. Those will get folded later.
2943 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2944 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2945 !MulC->getAPIntValue().isPowerOf2()) {
2946 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2947 if (UnmaskedC.isNegatedPowerOf2())
2948 return (-UnmaskedC).logBase2();
2949 }
2950 return 0;
2951 };
2952
2953 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2954 unsigned ShlAmt) {
2955 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2956 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2957 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2958 return TLO.CombineTo(Op, Res);
2959 };
2960
2962 if (Op.getOpcode() == ISD::ADD) {
2963 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2964 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2965 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2966 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2967 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2968 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2969 }
2970 if (Op.getOpcode() == ISD::SUB) {
2971 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2972 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2973 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2974 }
2975 }
2976
2977 if (Op.getOpcode() == ISD::MUL) {
2978 Known = KnownBits::mul(KnownOp0, KnownOp1);
2979 } else { // Op.getOpcode() is either ISD::ADD, ISD::PTRADD, or ISD::SUB.
2981 Op.getOpcode() != ISD::SUB, Flags.hasNoSignedWrap(),
2982 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2983 }
2984 break;
2985 }
2986 case ISD::FABS: {
2987 SDValue Op0 = Op.getOperand(0);
2988 APInt SignMask = APInt::getSignMask(BitWidth);
2989
2990 if (!DemandedBits.intersects(SignMask))
2991 return TLO.CombineTo(Op, Op0);
2992
2993 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
2994 Depth + 1))
2995 return true;
2996
2997 if (Known.isNonNegative())
2998 return TLO.CombineTo(Op, Op0);
2999 if (Known.isNegative())
3000 return TLO.CombineTo(
3001 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT, Op0, Op->getFlags()));
3002
3003 Known.Zero |= SignMask;
3004 Known.One &= ~SignMask;
3005
3006 break;
3007 }
3008 case ISD::FCOPYSIGN: {
3009 SDValue Op0 = Op.getOperand(0);
3010 SDValue Op1 = Op.getOperand(1);
3011
3012 unsigned BitWidth0 = Op0.getScalarValueSizeInBits();
3013 unsigned BitWidth1 = Op1.getScalarValueSizeInBits();
3014 APInt SignMask0 = APInt::getSignMask(BitWidth0);
3015 APInt SignMask1 = APInt::getSignMask(BitWidth1);
3016
3017 if (!DemandedBits.intersects(SignMask0))
3018 return TLO.CombineTo(Op, Op0);
3019
3020 if (SimplifyDemandedBits(Op0, ~SignMask0 & DemandedBits, DemandedElts,
3021 Known, TLO, Depth + 1) ||
3022 SimplifyDemandedBits(Op1, SignMask1, DemandedElts, Known2, TLO,
3023 Depth + 1))
3024 return true;
3025
3026 if (Known2.isNonNegative())
3027 return TLO.CombineTo(
3028 Op, TLO.DAG.getNode(ISD::FABS, dl, VT, Op0, Op->getFlags()));
3029
3030 if (Known2.isNegative())
3031 return TLO.CombineTo(
3032 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT,
3033 TLO.DAG.getNode(ISD::FABS, SDLoc(Op0), VT, Op0)));
3034
3035 Known.Zero &= ~SignMask0;
3036 Known.One &= ~SignMask0;
3037 break;
3038 }
3039 case ISD::FNEG: {
3040 SDValue Op0 = Op.getOperand(0);
3041 APInt SignMask = APInt::getSignMask(BitWidth);
3042
3043 if (!DemandedBits.intersects(SignMask))
3044 return TLO.CombineTo(Op, Op0);
3045
3046 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
3047 Depth + 1))
3048 return true;
3049
3050 if (!Known.isSignUnknown()) {
3051 Known.Zero ^= SignMask;
3052 Known.One ^= SignMask;
3053 }
3054
3055 break;
3056 }
3057 default:
3058 // We also ask the target about intrinsics (which could be specific to it).
3059 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3060 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3061 // TODO: Probably okay to remove after audit; here to reduce change size
3062 // in initial enablement patch for scalable vectors
3063 if (Op.getValueType().isScalableVector())
3064 break;
3066 Known, TLO, Depth))
3067 return true;
3068 break;
3069 }
3070
3071 // Just use computeKnownBits to compute output bits.
3072 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
3073 break;
3074 }
3075
3076 // If we know the value of all of the demanded bits, return this as a
3077 // constant.
3079 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
3080 // Avoid folding to a constant if any OpaqueConstant is involved.
3081 if (llvm::any_of(Op->ops(), [](SDValue V) {
3082 auto *C = dyn_cast<ConstantSDNode>(V);
3083 return C && C->isOpaque();
3084 }))
3085 return false;
3086 if (VT.isInteger())
3087 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
3088 if (VT.isFloatingPoint())
3089 return TLO.CombineTo(
3090 Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
3091 dl, VT));
3092 }
3093
3094 // A multi use 'all demanded elts' simplify failed to find any knownbits.
3095 // Try again just for the original demanded elts.
3096 // Ensure we do this AFTER constant folding above.
3097 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3098 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
3099
3100 return false;
3101}
3102
3104 const APInt &DemandedElts,
3105 DAGCombinerInfo &DCI) const {
3106 SelectionDAG &DAG = DCI.DAG;
3107 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3108 !DCI.isBeforeLegalizeOps());
3109
3110 APInt KnownUndef, KnownZero;
3111 bool Simplified =
3112 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
3113 if (Simplified) {
3114 DCI.AddToWorklist(Op.getNode());
3115 DCI.CommitTargetLoweringOpt(TLO);
3116 }
3117
3118 return Simplified;
3119}
3120
3121/// Given a vector binary operation and known undefined elements for each input
3122/// operand, compute whether each element of the output is undefined.
3124 const APInt &UndefOp0,
3125 const APInt &UndefOp1) {
3126 EVT VT = BO.getValueType();
3128 "Vector binop only");
3129
3130 EVT EltVT = VT.getVectorElementType();
3131 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3132 assert(UndefOp0.getBitWidth() == NumElts &&
3133 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3134
3135 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3136 const APInt &UndefVals) {
3137 if (UndefVals[Index])
3138 return DAG.getUNDEF(EltVT);
3139
3140 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3141 // Try hard to make sure that the getNode() call is not creating temporary
3142 // nodes. Ignore opaque integers because they do not constant fold.
3143 SDValue Elt = BV->getOperand(Index);
3144 auto *C = dyn_cast<ConstantSDNode>(Elt);
3145 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3146 return Elt;
3147 }
3148
3149 return SDValue();
3150 };
3151
3152 APInt KnownUndef = APInt::getZero(NumElts);
3153 for (unsigned i = 0; i != NumElts; ++i) {
3154 // If both inputs for this element are either constant or undef and match
3155 // the element type, compute the constant/undef result for this element of
3156 // the vector.
3157 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3158 // not handle FP constants. The code within getNode() should be refactored
3159 // to avoid the danger of creating a bogus temporary node here.
3160 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3161 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3162 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3163 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3164 KnownUndef.setBit(i);
3165 }
3166 return KnownUndef;
3167}
3168
3170 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3171 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3172 bool AssumeSingleUse) const {
3173 EVT VT = Op.getValueType();
3174 unsigned Opcode = Op.getOpcode();
3175 APInt DemandedElts = OriginalDemandedElts;
3176 unsigned NumElts = DemandedElts.getBitWidth();
3177 assert(VT.isVector() && "Expected vector op");
3178
3179 KnownUndef = KnownZero = APInt::getZero(NumElts);
3180
3182 return false;
3183
3184 // TODO: For now we assume we know nothing about scalable vectors.
3185 if (VT.isScalableVector())
3186 return false;
3187
3188 assert(VT.getVectorNumElements() == NumElts &&
3189 "Mask size mismatches value type element count!");
3190
3191 // Undef operand.
3192 if (Op.isUndef()) {
3193 KnownUndef.setAllBits();
3194 return false;
3195 }
3196
3197 // If Op has other users, assume that all elements are needed.
3198 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3199 DemandedElts.setAllBits();
3200
3201 // Not demanding any elements from Op.
3202 if (DemandedElts == 0) {
3203 KnownUndef.setAllBits();
3204 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3205 }
3206
3207 // Limit search depth.
3209 return false;
3210
3211 SDLoc DL(Op);
3212 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3213 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3214
3215 // Helper for demanding the specified elements and all the bits of both binary
3216 // operands.
3217 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3218 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3219 TLO.DAG, Depth + 1);
3220 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3221 TLO.DAG, Depth + 1);
3222 if (NewOp0 || NewOp1) {
3223 SDValue NewOp =
3224 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3225 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3226 return TLO.CombineTo(Op, NewOp);
3227 }
3228 return false;
3229 };
3230
3231 switch (Opcode) {
3232 case ISD::SCALAR_TO_VECTOR: {
3233 if (!DemandedElts[0]) {
3234 KnownUndef.setAllBits();
3235 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3236 }
3237 KnownUndef.setHighBits(NumElts - 1);
3238 break;
3239 }
3240 case ISD::BITCAST: {
3241 SDValue Src = Op.getOperand(0);
3242 EVT SrcVT = Src.getValueType();
3243
3244 if (!SrcVT.isVector()) {
3245 // TODO - bigendian once we have test coverage.
3246 if (IsLE) {
3247 APInt DemandedSrcBits = APInt::getZero(SrcVT.getSizeInBits());
3248 unsigned EltSize = VT.getScalarSizeInBits();
3249 for (unsigned I = 0; I != NumElts; ++I) {
3250 if (DemandedElts[I]) {
3251 unsigned Offset = I * EltSize;
3252 DemandedSrcBits.setBits(Offset, Offset + EltSize);
3253 }
3254 }
3255 KnownBits Known;
3256 if (SimplifyDemandedBits(Src, DemandedSrcBits, Known, TLO, Depth + 1))
3257 return true;
3258 }
3259 break;
3260 }
3261
3262 // Fast handling of 'identity' bitcasts.
3263 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3264 if (NumSrcElts == NumElts)
3265 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3266 KnownZero, TLO, Depth + 1);
3267
3268 APInt SrcDemandedElts, SrcZero, SrcUndef;
3269
3270 // Bitcast from 'large element' src vector to 'small element' vector, we
3271 // must demand a source element if any DemandedElt maps to it.
3272 if ((NumElts % NumSrcElts) == 0) {
3273 unsigned Scale = NumElts / NumSrcElts;
3274 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3275 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3276 TLO, Depth + 1))
3277 return true;
3278
3279 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3280 // of the large element.
3281 // TODO - bigendian once we have test coverage.
3282 if (IsLE) {
3283 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3284 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3285 for (unsigned i = 0; i != NumElts; ++i)
3286 if (DemandedElts[i]) {
3287 unsigned Ofs = (i % Scale) * EltSizeInBits;
3288 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3289 }
3290
3291 KnownBits Known;
3292 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3293 TLO, Depth + 1))
3294 return true;
3295
3296 // The bitcast has split each wide element into a number of
3297 // narrow subelements. We have just computed the Known bits
3298 // for wide elements. See if element splitting results in
3299 // some subelements being zero. Only for demanded elements!
3300 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3301 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3302 .isAllOnes())
3303 continue;
3304 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3305 unsigned Elt = Scale * SrcElt + SubElt;
3306 if (DemandedElts[Elt])
3307 KnownZero.setBit(Elt);
3308 }
3309 }
3310 }
3311
3312 // If the src element is zero/undef then all the output elements will be -
3313 // only demanded elements are guaranteed to be correct.
3314 for (unsigned i = 0; i != NumSrcElts; ++i) {
3315 if (SrcDemandedElts[i]) {
3316 if (SrcZero[i])
3317 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3318 if (SrcUndef[i])
3319 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3320 }
3321 }
3322 }
3323
3324 // Bitcast from 'small element' src vector to 'large element' vector, we
3325 // demand all smaller source elements covered by the larger demanded element
3326 // of this vector.
3327 if ((NumSrcElts % NumElts) == 0) {
3328 unsigned Scale = NumSrcElts / NumElts;
3329 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3330 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3331 TLO, Depth + 1))
3332 return true;
3333
3334 // If all the src elements covering an output element are zero/undef, then
3335 // the output element will be as well, assuming it was demanded.
3336 for (unsigned i = 0; i != NumElts; ++i) {
3337 if (DemandedElts[i]) {
3338 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3339 KnownZero.setBit(i);
3340 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3341 KnownUndef.setBit(i);
3342 }
3343 }
3344 }
3345 break;
3346 }
3347 case ISD::FREEZE: {
3348 SDValue N0 = Op.getOperand(0);
3349 if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
3350 /*PoisonOnly=*/false,
3351 Depth + 1))
3352 return TLO.CombineTo(Op, N0);
3353
3354 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3355 // freeze(op(x, ...)) -> op(freeze(x), ...).
3356 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3357 return TLO.CombineTo(
3359 TLO.DAG.getFreeze(N0.getOperand(0))));
3360 break;
3361 }
3362 case ISD::BUILD_VECTOR: {
3363 // Check all elements and simplify any unused elements with UNDEF.
3364 if (!DemandedElts.isAllOnes()) {
3365 // Don't simplify BROADCASTS.
3366 if (llvm::any_of(Op->op_values(),
3367 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3369 bool Updated = false;
3370 for (unsigned i = 0; i != NumElts; ++i) {
3371 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3372 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3373 KnownUndef.setBit(i);
3374 Updated = true;
3375 }
3376 }
3377 if (Updated)
3378 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3379 }
3380 }
3381 for (unsigned i = 0; i != NumElts; ++i) {
3382 SDValue SrcOp = Op.getOperand(i);
3383 if (SrcOp.isUndef()) {
3384 KnownUndef.setBit(i);
3385 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3387 KnownZero.setBit(i);
3388 }
3389 }
3390 break;
3391 }
3392 case ISD::CONCAT_VECTORS: {
3393 EVT SubVT = Op.getOperand(0).getValueType();
3394 unsigned NumSubVecs = Op.getNumOperands();
3395 unsigned NumSubElts = SubVT.getVectorNumElements();
3396 for (unsigned i = 0; i != NumSubVecs; ++i) {
3397 SDValue SubOp = Op.getOperand(i);
3398 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3399 APInt SubUndef, SubZero;
3400 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3401 Depth + 1))
3402 return true;
3403 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3404 KnownZero.insertBits(SubZero, i * NumSubElts);
3405 }
3406
3407 // Attempt to avoid multi-use ops if we don't need anything from them.
3408 if (!DemandedElts.isAllOnes()) {
3409 bool FoundNewSub = false;
3410 SmallVector<SDValue, 2> DemandedSubOps;
3411 for (unsigned i = 0; i != NumSubVecs; ++i) {
3412 SDValue SubOp = Op.getOperand(i);
3413 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3415 SubOp, SubElts, TLO.DAG, Depth + 1);
3416 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3417 FoundNewSub = NewSubOp ? true : FoundNewSub;
3418 }
3419 if (FoundNewSub) {
3420 SDValue NewOp =
3421 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3422 return TLO.CombineTo(Op, NewOp);
3423 }
3424 }
3425 break;
3426 }
3427 case ISD::INSERT_SUBVECTOR: {
3428 // Demand any elements from the subvector and the remainder from the src it
3429 // is inserted into.
3430 SDValue Src = Op.getOperand(0);
3431 SDValue Sub = Op.getOperand(1);
3432 uint64_t Idx = Op.getConstantOperandVal(2);
3433 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3434 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3435 APInt DemandedSrcElts = DemandedElts;
3436 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
3437
3438 // If none of the sub operand elements are demanded, bypass the insert.
3439 if (!DemandedSubElts)
3440 return TLO.CombineTo(Op, Src);
3441
3442 APInt SubUndef, SubZero;
3443 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3444 Depth + 1))
3445 return true;
3446
3447 // If none of the src operand elements are demanded, replace it with undef.
3448 if (!DemandedSrcElts && !Src.isUndef())
3449 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3450 TLO.DAG.getUNDEF(VT), Sub,
3451 Op.getOperand(2)));
3452
3453 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3454 TLO, Depth + 1))
3455 return true;
3456 KnownUndef.insertBits(SubUndef, Idx);
3457 KnownZero.insertBits(SubZero, Idx);
3458
3459 // Attempt to avoid multi-use ops if we don't need anything from them.
3460 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3462 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3464 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3465 if (NewSrc || NewSub) {
3466 NewSrc = NewSrc ? NewSrc : Src;
3467 NewSub = NewSub ? NewSub : Sub;
3468 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3469 NewSub, Op.getOperand(2));
3470 return TLO.CombineTo(Op, NewOp);
3471 }
3472 }
3473 break;
3474 }
3476 // Offset the demanded elts by the subvector index.
3477 SDValue Src = Op.getOperand(0);
3478 if (Src.getValueType().isScalableVector())
3479 break;
3480 uint64_t Idx = Op.getConstantOperandVal(1);
3481 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3482 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3483
3484 APInt SrcUndef, SrcZero;
3485 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3486 Depth + 1))
3487 return true;
3488 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3489 KnownZero = SrcZero.extractBits(NumElts, Idx);
3490
3491 // Attempt to avoid multi-use ops if we don't need anything from them.
3492 if (!DemandedElts.isAllOnes()) {
3494 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3495 if (NewSrc) {
3496 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3497 Op.getOperand(1));
3498 return TLO.CombineTo(Op, NewOp);
3499 }
3500 }
3501 break;
3502 }
3504 SDValue Vec = Op.getOperand(0);
3505 SDValue Scl = Op.getOperand(1);
3506 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3507
3508 // For a legal, constant insertion index, if we don't need this insertion
3509 // then strip it, else remove it from the demanded elts.
3510 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3511 unsigned Idx = CIdx->getZExtValue();
3512 if (!DemandedElts[Idx])
3513 return TLO.CombineTo(Op, Vec);
3514
3515 APInt DemandedVecElts(DemandedElts);
3516 DemandedVecElts.clearBit(Idx);
3517 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3518 KnownZero, TLO, Depth + 1))
3519 return true;
3520
3521 KnownUndef.setBitVal(Idx, Scl.isUndef());
3522
3523 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3524 break;
3525 }
3526
3527 APInt VecUndef, VecZero;
3528 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3529 Depth + 1))
3530 return true;
3531 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3532 break;
3533 }
3534 case ISD::VSELECT: {
3535 SDValue Sel = Op.getOperand(0);
3536 SDValue LHS = Op.getOperand(1);
3537 SDValue RHS = Op.getOperand(2);
3538
3539 // Try to transform the select condition based on the current demanded
3540 // elements.
3541 APInt UndefSel, ZeroSel;
3542 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3543 Depth + 1))
3544 return true;
3545
3546 // See if we can simplify either vselect operand.
3547 APInt DemandedLHS(DemandedElts);
3548 APInt DemandedRHS(DemandedElts);
3549 APInt UndefLHS, ZeroLHS;
3550 APInt UndefRHS, ZeroRHS;
3551 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3552 Depth + 1))
3553 return true;
3554 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3555 Depth + 1))
3556 return true;
3557
3558 KnownUndef = UndefLHS & UndefRHS;
3559 KnownZero = ZeroLHS & ZeroRHS;
3560
3561 // If we know that the selected element is always zero, we don't need the
3562 // select value element.
3563 APInt DemandedSel = DemandedElts & ~KnownZero;
3564 if (DemandedSel != DemandedElts)
3565 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3566 Depth + 1))
3567 return true;
3568
3569 break;
3570 }
3571 case ISD::VECTOR_SHUFFLE: {
3572 SDValue LHS = Op.getOperand(0);
3573 SDValue RHS = Op.getOperand(1);
3574 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3575
3576 // Collect demanded elements from shuffle operands..
3577 APInt DemandedLHS(NumElts, 0);
3578 APInt DemandedRHS(NumElts, 0);
3579 for (unsigned i = 0; i != NumElts; ++i) {
3580 int M = ShuffleMask[i];
3581 if (M < 0 || !DemandedElts[i])
3582 continue;
3583 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3584 if (M < (int)NumElts)
3585 DemandedLHS.setBit(M);
3586 else
3587 DemandedRHS.setBit(M - NumElts);
3588 }
3589
3590 // If either side isn't demanded, replace it by UNDEF. We handle this
3591 // explicitly here to also simplify in case of multiple uses (on the
3592 // contrary to the SimplifyDemandedVectorElts calls below).
3593 bool FoldLHS = !DemandedLHS && !LHS.isUndef();
3594 bool FoldRHS = !DemandedRHS && !RHS.isUndef();
3595 if (FoldLHS || FoldRHS) {
3596 LHS = FoldLHS ? TLO.DAG.getUNDEF(LHS.getValueType()) : LHS;
3597 RHS = FoldRHS ? TLO.DAG.getUNDEF(RHS.getValueType()) : RHS;
3598 SDValue NewOp =
3599 TLO.DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, ShuffleMask);
3600 return TLO.CombineTo(Op, NewOp);
3601 }
3602
3603 // See if we can simplify either shuffle operand.
3604 APInt UndefLHS, ZeroLHS;
3605 APInt UndefRHS, ZeroRHS;
3606 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3607 Depth + 1))
3608 return true;
3609 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3610 Depth + 1))
3611 return true;
3612
3613 // Simplify mask using undef elements from LHS/RHS.
3614 bool Updated = false;
3615 bool IdentityLHS = true, IdentityRHS = true;
3616 SmallVector<int, 32> NewMask(ShuffleMask);
3617 for (unsigned i = 0; i != NumElts; ++i) {
3618 int &M = NewMask[i];
3619 if (M < 0)
3620 continue;
3621 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3622 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3623 Updated = true;
3624 M = -1;
3625 }
3626 IdentityLHS &= (M < 0) || (M == (int)i);
3627 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3628 }
3629
3630 // Update legal shuffle masks based on demanded elements if it won't reduce
3631 // to Identity which can cause premature removal of the shuffle mask.
3632 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3633 SDValue LegalShuffle =
3634 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3635 if (LegalShuffle)
3636 return TLO.CombineTo(Op, LegalShuffle);
3637 }
3638
3639 // Propagate undef/zero elements from LHS/RHS.
3640 for (unsigned i = 0; i != NumElts; ++i) {
3641 int M = ShuffleMask[i];
3642 if (M < 0) {
3643 KnownUndef.setBit(i);
3644 } else if (M < (int)NumElts) {
3645 if (UndefLHS[M])
3646 KnownUndef.setBit(i);
3647 if (ZeroLHS[M])
3648 KnownZero.setBit(i);
3649 } else {
3650 if (UndefRHS[M - NumElts])
3651 KnownUndef.setBit(i);
3652 if (ZeroRHS[M - NumElts])
3653 KnownZero.setBit(i);
3654 }
3655 }
3656 break;
3657 }
3661 APInt SrcUndef, SrcZero;
3662 SDValue Src = Op.getOperand(0);
3663 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3664 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3665 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3666 Depth + 1))
3667 return true;
3668 KnownZero = SrcZero.zextOrTrunc(NumElts);
3669 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3670
3671 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3672 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3673 DemandedSrcElts == 1) {
3674 // aext - if we just need the bottom element then we can bitcast.
3675 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3676 }
3677
3678 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3679 // zext(undef) upper bits are guaranteed to be zero.
3680 if (DemandedElts.isSubsetOf(KnownUndef))
3681 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3682 KnownUndef.clearAllBits();
3683
3684 // zext - if we just need the bottom element then we can mask:
3685 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3686 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3687 Op->isOnlyUserOf(Src.getNode()) &&
3688 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3689 SDLoc DL(Op);
3690 EVT SrcVT = Src.getValueType();
3691 EVT SrcSVT = SrcVT.getScalarType();
3692 SmallVector<SDValue> MaskElts;
3693 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3694 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3695 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3696 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3697 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3698 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3699 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3700 }
3701 }
3702 }
3703 break;
3704 }
3705
3706 // TODO: There are more binop opcodes that could be handled here - MIN,
3707 // MAX, saturated math, etc.
3708 case ISD::ADD: {
3709 SDValue Op0 = Op.getOperand(0);
3710 SDValue Op1 = Op.getOperand(1);
3711 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3712 APInt UndefLHS, ZeroLHS;
3713 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3714 Depth + 1, /*AssumeSingleUse*/ true))
3715 return true;
3716 }
3717 [[fallthrough]];
3718 }
3719 case ISD::AVGCEILS:
3720 case ISD::AVGCEILU:
3721 case ISD::AVGFLOORS:
3722 case ISD::AVGFLOORU:
3723 case ISD::OR:
3724 case ISD::XOR:
3725 case ISD::SUB:
3726 case ISD::FADD:
3727 case ISD::FSUB:
3728 case ISD::FMUL:
3729 case ISD::FDIV:
3730 case ISD::FREM: {
3731 SDValue Op0 = Op.getOperand(0);
3732 SDValue Op1 = Op.getOperand(1);
3733
3734 APInt UndefRHS, ZeroRHS;
3735 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3736 Depth + 1))
3737 return true;
3738 APInt UndefLHS, ZeroLHS;
3739 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3740 Depth + 1))
3741 return true;
3742
3743 KnownZero = ZeroLHS & ZeroRHS;
3744 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3745
3746 // Attempt to avoid multi-use ops if we don't need anything from them.
3747 // TODO - use KnownUndef to relax the demandedelts?
3748 if (!DemandedElts.isAllOnes())
3749 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3750 return true;
3751 break;
3752 }
3753 case ISD::SHL:
3754 case ISD::SRL:
3755 case ISD::SRA:
3756 case ISD::ROTL:
3757 case ISD::ROTR: {
3758 SDValue Op0 = Op.getOperand(0);
3759 SDValue Op1 = Op.getOperand(1);
3760
3761 APInt UndefRHS, ZeroRHS;
3762 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3763 Depth + 1))
3764 return true;
3765 APInt UndefLHS, ZeroLHS;
3766 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3767 Depth + 1))
3768 return true;
3769
3770 KnownZero = ZeroLHS;
3771 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3772
3773 // Attempt to avoid multi-use ops if we don't need anything from them.
3774 // TODO - use KnownUndef to relax the demandedelts?
3775 if (!DemandedElts.isAllOnes())
3776 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3777 return true;
3778 break;
3779 }
3780 case ISD::MUL:
3781 case ISD::MULHU:
3782 case ISD::MULHS:
3783 case ISD::AND: {
3784 SDValue Op0 = Op.getOperand(0);
3785 SDValue Op1 = Op.getOperand(1);
3786
3787 APInt SrcUndef, SrcZero;
3788 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3789 Depth + 1))
3790 return true;
3791 // If we know that a demanded element was zero in Op1 we don't need to
3792 // demand it in Op0 - its guaranteed to be zero.
3793 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3794 if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3795 TLO, Depth + 1))
3796 return true;
3797
3798 KnownUndef &= DemandedElts0;
3799 KnownZero &= DemandedElts0;
3800
3801 // If every element pair has a zero/undef then just fold to zero.
3802 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3803 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3804 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3805 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3806
3807 // If either side has a zero element, then the result element is zero, even
3808 // if the other is an UNDEF.
3809 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3810 // and then handle 'and' nodes with the rest of the binop opcodes.
3811 KnownZero |= SrcZero;
3812 KnownUndef &= SrcUndef;
3813 KnownUndef &= ~KnownZero;
3814
3815 // Attempt to avoid multi-use ops if we don't need anything from them.
3816 if (!DemandedElts.isAllOnes())
3817 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3818 return true;
3819 break;
3820 }
3821 case ISD::TRUNCATE:
3822 case ISD::SIGN_EXTEND:
3823 case ISD::ZERO_EXTEND:
3824 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3825 KnownZero, TLO, Depth + 1))
3826 return true;
3827
3828 if (!DemandedElts.isAllOnes())
3830 Op.getOperand(0), DemandedElts, TLO.DAG, Depth + 1))
3831 return TLO.CombineTo(Op, TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp));
3832
3833 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3834 // zext(undef) upper bits are guaranteed to be zero.
3835 if (DemandedElts.isSubsetOf(KnownUndef))
3836 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3837 KnownUndef.clearAllBits();
3838 }
3839 break;
3840 case ISD::SINT_TO_FP:
3841 case ISD::UINT_TO_FP:
3842 case ISD::FP_TO_SINT:
3843 case ISD::FP_TO_UINT:
3844 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3845 KnownZero, TLO, Depth + 1))
3846 return true;
3847 // Don't fall through to generic undef -> undef handling.
3848 return false;
3849 default: {
3850 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3851 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3852 KnownZero, TLO, Depth))
3853 return true;
3854 } else {
3855 KnownBits Known;
3856 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3857 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3858 TLO, Depth, AssumeSingleUse))
3859 return true;
3860 }
3861 break;
3862 }
3863 }
3864 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3865
3866 // Constant fold all undef cases.
3867 // TODO: Handle zero cases as well.
3868 if (DemandedElts.isSubsetOf(KnownUndef))
3869 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3870
3871 return false;
3872}
3873
3874/// Determine which of the bits specified in Mask are known to be either zero or
3875/// one and return them in the Known.
3877 KnownBits &Known,
3878 const APInt &DemandedElts,
3879 const SelectionDAG &DAG,
3880 unsigned Depth) const {
3881 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3882 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3883 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3884 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3885 "Should use MaskedValueIsZero if you don't know whether Op"
3886 " is a target node!");
3887 Known.resetAll();
3888}
3889
3892 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3893 unsigned Depth) const {
3894 Known.resetAll();
3895}
3896
3899 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3900 unsigned Depth) const {
3901 Known.resetAll();
3902}
3903
3905 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3906 // The low bits are known zero if the pointer is aligned.
3907 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3908}
3909
3915
3916/// This method can be implemented by targets that want to expose additional
3917/// information about sign bits to the DAG Combiner.
3919 const APInt &,
3920 const SelectionDAG &,
3921 unsigned Depth) const {
3922 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3923 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3924 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3925 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3926 "Should use ComputeNumSignBits if you don't know whether Op"
3927 " is a target node!");
3928 return 1;
3929}
3930
3932 GISelValueTracking &Analysis, Register R, const APInt &DemandedElts,
3933 const MachineRegisterInfo &MRI, unsigned Depth) const {
3934 return 1;
3935}
3936
3938 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3939 TargetLoweringOpt &TLO, unsigned Depth) const {
3940 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3941 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3942 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3943 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3944 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3945 " is a target node!");
3946 return false;
3947}
3948
3950 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3951 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3952 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3953 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3954 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3955 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3956 "Should use SimplifyDemandedBits if you don't know whether Op"
3957 " is a target node!");
3958 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3959 return false;
3960}
3961
3963 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3964 SelectionDAG &DAG, unsigned Depth) const {
3965 assert(
3966 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3967 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3968 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3969 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3970 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3971 " is a target node!");
3972 return SDValue();
3973}
3974
3975SDValue
3978 SelectionDAG &DAG) const {
3979 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3980 if (!LegalMask) {
3981 std::swap(N0, N1);
3983 LegalMask = isShuffleMaskLegal(Mask, VT);
3984 }
3985
3986 if (!LegalMask)
3987 return SDValue();
3988
3989 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3990}
3991
3993 return nullptr;
3994}
3995
3997 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3998 bool PoisonOnly, unsigned Depth) const {
3999 assert(
4000 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4001 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4002 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4003 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4004 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
4005 " is a target node!");
4006
4007 // If Op can't create undef/poison and none of its operands are undef/poison
4008 // then Op is never undef/poison.
4009 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
4010 /*ConsiderFlags*/ true, Depth) &&
4011 all_of(Op->ops(), [&](SDValue V) {
4012 return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
4013 Depth + 1);
4014 });
4015}
4016
4018 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4019 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
4020 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4021 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4022 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4023 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4024 "Should use canCreateUndefOrPoison if you don't know whether Op"
4025 " is a target node!");
4026 // Be conservative and return true.
4027 return true;
4028}
4029
4031 const APInt &DemandedElts,
4032 const SelectionDAG &DAG,
4033 bool SNaN,
4034 unsigned Depth) const {
4035 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4036 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4037 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4038 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4039 "Should use isKnownNeverNaN if you don't know whether Op"
4040 " is a target node!");
4041 return false;
4042}
4043
4045 const APInt &DemandedElts,
4046 APInt &UndefElts,
4047 const SelectionDAG &DAG,
4048 unsigned Depth) const {
4049 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4050 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4051 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4052 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4053 "Should use isSplatValue if you don't know whether Op"
4054 " is a target node!");
4055 return false;
4056}
4057
4058// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
4059// work with truncating build vectors and vectors with elements of less than
4060// 8 bits.
4062 if (!N)
4063 return false;
4064
4065 unsigned EltWidth;
4066 APInt CVal;
4067 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
4068 /*AllowTruncation=*/true)) {
4069 CVal = CN->getAPIntValue();
4070 EltWidth = N.getValueType().getScalarSizeInBits();
4071 } else
4072 return false;
4073
4074 // If this is a truncating splat, truncate the splat value.
4075 // Otherwise, we may fail to match the expected values below.
4076 if (EltWidth < CVal.getBitWidth())
4077 CVal = CVal.trunc(EltWidth);
4078
4079 switch (getBooleanContents(N.getValueType())) {
4081 return CVal[0];
4083 return CVal.isOne();
4085 return CVal.isAllOnes();
4086 }
4087
4088 llvm_unreachable("Invalid boolean contents");
4089}
4090
4092 if (!N)
4093 return false;
4094
4096 if (!CN) {
4098 if (!BV)
4099 return false;
4100
4101 // Only interested in constant splats, we don't care about undef
4102 // elements in identifying boolean constants and getConstantSplatNode
4103 // returns NULL if all ops are undef;
4104 CN = BV->getConstantSplatNode();
4105 if (!CN)
4106 return false;
4107 }
4108
4109 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
4110 return !CN->getAPIntValue()[0];
4111
4112 return CN->isZero();
4113}
4114
4116 bool SExt) const {
4117 if (VT == MVT::i1)
4118 return N->isOne();
4119
4121 switch (Cnt) {
4123 // An extended value of 1 is always true, unless its original type is i1,
4124 // in which case it will be sign extended to -1.
4125 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
4128 return N->isAllOnes() && SExt;
4129 }
4130 llvm_unreachable("Unexpected enumeration.");
4131}
4132
4133/// This helper function of SimplifySetCC tries to optimize the comparison when
4134/// either operand of the SetCC node is a bitwise-and instruction.
4135SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4136 ISD::CondCode Cond, const SDLoc &DL,
4137 DAGCombinerInfo &DCI) const {
4138 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4139 std::swap(N0, N1);
4140
4141 SelectionDAG &DAG = DCI.DAG;
4142 EVT OpVT = N0.getValueType();
4143 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
4144 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4145 return SDValue();
4146
4147 // (X & Y) != 0 --> zextOrTrunc(X & Y)
4148 // iff everything but LSB is known zero:
4149 if (Cond == ISD::SETNE && isNullConstant(N1) &&
4152 unsigned NumEltBits = OpVT.getScalarSizeInBits();
4153 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
4154 if (DAG.MaskedValueIsZero(N0, UpperBits))
4155 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
4156 }
4157
4158 // Try to eliminate a power-of-2 mask constant by converting to a signbit
4159 // test in a narrow type that we can truncate to with no cost. Examples:
4160 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4161 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4162 // TODO: This conservatively checks for type legality on the source and
4163 // destination types. That may inhibit optimizations, but it also
4164 // allows setcc->shift transforms that may be more beneficial.
4165 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4166 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4167 isTypeLegal(OpVT) && N0.hasOneUse()) {
4168 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4169 AndC->getAPIntValue().getActiveBits());
4170 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4171 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4172 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4173 return DAG.getSetCC(DL, VT, Trunc, Zero,
4175 }
4176 }
4177
4178 // Match these patterns in any of their permutations:
4179 // (X & Y) == Y
4180 // (X & Y) != Y
4181 SDValue X, Y;
4182 if (N0.getOperand(0) == N1) {
4183 X = N0.getOperand(1);
4184 Y = N0.getOperand(0);
4185 } else if (N0.getOperand(1) == N1) {
4186 X = N0.getOperand(0);
4187 Y = N0.getOperand(1);
4188 } else {
4189 return SDValue();
4190 }
4191
4192 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4193 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4194 // its liable to create and infinite loop.
4195 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4196 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4198 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4199 // Note that where Y is variable and is known to have at most one bit set
4200 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4201 // equivalent when Y == 0.
4202 assert(OpVT.isInteger());
4204 if (DCI.isBeforeLegalizeOps() ||
4206 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4207 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4208 // If the target supports an 'and-not' or 'and-complement' logic operation,
4209 // try to use that to make a comparison operation more efficient.
4210 // But don't do this transform if the mask is a single bit because there are
4211 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4212 // 'rlwinm' on PPC).
4213
4214 // Bail out if the compare operand that we want to turn into a zero is
4215 // already a zero (otherwise, infinite loop).
4216 if (isNullConstant(Y))
4217 return SDValue();
4218
4219 // Transform this into: ~X & Y == 0.
4220 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4221 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4222 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4223 }
4224
4225 return SDValue();
4226}
4227
4228/// This helper function of SimplifySetCC tries to optimize the comparison when
4229/// either operand of the SetCC node is a bitwise-or instruction.
4230/// For now, this just transforms (X | Y) ==/!= Y into X & ~Y ==/!= 0.
4231SDValue TargetLowering::foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1,
4232 ISD::CondCode Cond, const SDLoc &DL,
4233 DAGCombinerInfo &DCI) const {
4234 if (N1.getOpcode() == ISD::OR && N0.getOpcode() != ISD::OR)
4235 std::swap(N0, N1);
4236
4237 SelectionDAG &DAG = DCI.DAG;
4238 EVT OpVT = N0.getValueType();
4239 if (!N0.hasOneUse() || !OpVT.isInteger() ||
4240 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4241 return SDValue();
4242
4243 // (X | Y) == Y
4244 // (X | Y) != Y
4245 SDValue X;
4246 if (sd_match(N0, m_Or(m_Value(X), m_Specific(N1))) && hasAndNotCompare(X)) {
4247 // If the target supports an 'and-not' or 'and-complement' logic operation,
4248 // try to use that to make a comparison operation more efficient.
4249
4250 // Bail out if the compare operand that we want to turn into a zero is
4251 // already a zero (otherwise, infinite loop).
4252 if (isNullConstant(N1))
4253 return SDValue();
4254
4255 // Transform this into: X & ~Y ==/!= 0.
4256 SDValue NotY = DAG.getNOT(SDLoc(N1), N1, OpVT);
4257 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, X, NotY);
4258 return DAG.getSetCC(DL, VT, NewAnd, DAG.getConstant(0, DL, OpVT), Cond);
4259 }
4260
4261 return SDValue();
4262}
4263
4264/// There are multiple IR patterns that could be checking whether certain
4265/// truncation of a signed number would be lossy or not. The pattern which is
4266/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4267/// We are looking for the following pattern: (KeptBits is a constant)
4268/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4269/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4270/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4271/// We will unfold it into the natural trunc+sext pattern:
4272/// ((%x << C) a>> C) dstcond %x
4273/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4274SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4275 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4276 const SDLoc &DL) const {
4277 // We must be comparing with a constant.
4278 ConstantSDNode *C1;
4279 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4280 return SDValue();
4281
4282 // N0 should be: add %x, (1 << (KeptBits-1))
4283 if (N0->getOpcode() != ISD::ADD)
4284 return SDValue();
4285
4286 // And we must be 'add'ing a constant.
4287 ConstantSDNode *C01;
4288 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4289 return SDValue();
4290
4291 SDValue X = N0->getOperand(0);
4292 EVT XVT = X.getValueType();
4293
4294 // Validate constants ...
4295
4296 APInt I1 = C1->getAPIntValue();
4297
4298 ISD::CondCode NewCond;
4299 if (Cond == ISD::CondCode::SETULT) {
4300 NewCond = ISD::CondCode::SETEQ;
4301 } else if (Cond == ISD::CondCode::SETULE) {
4302 NewCond = ISD::CondCode::SETEQ;
4303 // But need to 'canonicalize' the constant.
4304 I1 += 1;
4305 } else if (Cond == ISD::CondCode::SETUGT) {
4306 NewCond = ISD::CondCode::SETNE;
4307 // But need to 'canonicalize' the constant.
4308 I1 += 1;
4309 } else if (Cond == ISD::CondCode::SETUGE) {
4310 NewCond = ISD::CondCode::SETNE;
4311 } else
4312 return SDValue();
4313
4314 APInt I01 = C01->getAPIntValue();
4315
4316 auto checkConstants = [&I1, &I01]() -> bool {
4317 // Both of them must be power-of-two, and the constant from setcc is bigger.
4318 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4319 };
4320
4321 if (checkConstants()) {
4322 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4323 } else {
4324 // What if we invert constants? (and the target predicate)
4325 I1.negate();
4326 I01.negate();
4327 assert(XVT.isInteger());
4328 NewCond = getSetCCInverse(NewCond, XVT);
4329 if (!checkConstants())
4330 return SDValue();
4331 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4332 }
4333
4334 // They are power-of-two, so which bit is set?
4335 const unsigned KeptBits = I1.logBase2();
4336 const unsigned KeptBitsMinusOne = I01.logBase2();
4337
4338 // Magic!
4339 if (KeptBits != (KeptBitsMinusOne + 1))
4340 return SDValue();
4341 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4342
4343 // We don't want to do this in every single case.
4344 SelectionDAG &DAG = DCI.DAG;
4345 if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4346 return SDValue();
4347
4348 // Unfold into: sext_inreg(%x) cond %x
4349 // Where 'cond' will be either 'eq' or 'ne'.
4350 SDValue SExtInReg = DAG.getNode(
4352 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4353 return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4354}
4355
4356// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4357SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4358 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4359 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4361 "Should be a comparison with 0.");
4362 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4363 "Valid only for [in]equality comparisons.");
4364
4365 unsigned NewShiftOpcode;
4366 SDValue X, C, Y;
4367
4368 SelectionDAG &DAG = DCI.DAG;
4369
4370 // Look for '(C l>>/<< Y)'.
4371 auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4372 // The shift should be one-use.
4373 if (!V.hasOneUse())
4374 return false;
4375 unsigned OldShiftOpcode = V.getOpcode();
4376 switch (OldShiftOpcode) {
4377 case ISD::SHL:
4378 NewShiftOpcode = ISD::SRL;
4379 break;
4380 case ISD::SRL:
4381 NewShiftOpcode = ISD::SHL;
4382 break;
4383 default:
4384 return false; // must be a logical shift.
4385 }
4386 // We should be shifting a constant.
4387 // FIXME: best to use isConstantOrConstantVector().
4388 C = V.getOperand(0);
4389 ConstantSDNode *CC =
4390 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4391 if (!CC)
4392 return false;
4393 Y = V.getOperand(1);
4394
4395 ConstantSDNode *XC =
4396 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4398 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4399 };
4400
4401 // LHS of comparison should be an one-use 'and'.
4402 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4403 return SDValue();
4404
4405 X = N0.getOperand(0);
4406 SDValue Mask = N0.getOperand(1);
4407
4408 // 'and' is commutative!
4409 if (!Match(Mask)) {
4410 std::swap(X, Mask);
4411 if (!Match(Mask))
4412 return SDValue();
4413 }
4414
4415 EVT VT = X.getValueType();
4416
4417 // Produce:
4418 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4419 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4420 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4421 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4422 return T2;
4423}
4424
4425/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4426/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4427/// handle the commuted versions of these patterns.
4428SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4429 ISD::CondCode Cond, const SDLoc &DL,
4430 DAGCombinerInfo &DCI) const {
4431 unsigned BOpcode = N0.getOpcode();
4432 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4433 "Unexpected binop");
4434 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4435
4436 // (X + Y) == X --> Y == 0
4437 // (X - Y) == X --> Y == 0
4438 // (X ^ Y) == X --> Y == 0
4439 SelectionDAG &DAG = DCI.DAG;
4440 EVT OpVT = N0.getValueType();
4441 SDValue X = N0.getOperand(0);
4442 SDValue Y = N0.getOperand(1);
4443 if (X == N1)
4444 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4445
4446 if (Y != N1)
4447 return SDValue();
4448
4449 // (X + Y) == Y --> X == 0
4450 // (X ^ Y) == Y --> X == 0
4451 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4452 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4453
4454 // The shift would not be valid if the operands are boolean (i1).
4455 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4456 return SDValue();
4457
4458 // (X - Y) == Y --> X == Y << 1
4459 SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4460 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4461 if (!DCI.isCalledByLegalizer())
4462 DCI.AddToWorklist(YShl1.getNode());
4463 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4464}
4465
4467 SDValue N0, const APInt &C1,
4468 ISD::CondCode Cond, const SDLoc &dl,
4469 SelectionDAG &DAG) {
4470 // Look through truncs that don't change the value of a ctpop.
4471 // FIXME: Add vector support? Need to be careful with setcc result type below.
4472 SDValue CTPOP = N0;
4473 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4475 CTPOP = N0.getOperand(0);
4476
4477 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4478 return SDValue();
4479
4480 EVT CTVT = CTPOP.getValueType();
4481 SDValue CTOp = CTPOP.getOperand(0);
4482
4483 // Expand a power-of-2-or-zero comparison based on ctpop:
4484 // (ctpop x) u< 2 -> (x & x-1) == 0
4485 // (ctpop x) u> 1 -> (x & x-1) != 0
4486 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4487 // Keep the CTPOP if it is a cheap vector op.
4488 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4489 return SDValue();
4490
4491 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4492 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4493 return SDValue();
4494 if (C1 == 0 && (Cond == ISD::SETULT))
4495 return SDValue(); // This is handled elsewhere.
4496
4497 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4498
4499 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4500 SDValue Result = CTOp;
4501 for (unsigned i = 0; i < Passes; i++) {
4502 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4503 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4504 }
4506 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4507 }
4508
4509 // Expand a power-of-2 comparison based on ctpop
4510 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4511 // Keep the CTPOP if it is cheap.
4512 if (TLI.isCtpopFast(CTVT))
4513 return SDValue();
4514
4515 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4516 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4517 assert(CTVT.isInteger());
4518 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4519
4520 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4521 // check before emitting a potentially unnecessary op.
4522 if (DAG.isKnownNeverZero(CTOp)) {
4523 // (ctpop x) == 1 --> (x & x-1) == 0
4524 // (ctpop x) != 1 --> (x & x-1) != 0
4525 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4526 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4527 return RHS;
4528 }
4529
4530 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4531 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4532 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4534 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4535 }
4536
4537 return SDValue();
4538}
4539
4541 ISD::CondCode Cond, const SDLoc &dl,
4542 SelectionDAG &DAG) {
4543 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4544 return SDValue();
4545
4546 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4547 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4548 return SDValue();
4549
4550 auto getRotateSource = [](SDValue X) {
4551 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4552 return X.getOperand(0);
4553 return SDValue();
4554 };
4555
4556 // Peek through a rotated value compared against 0 or -1:
4557 // (rot X, Y) == 0/-1 --> X == 0/-1
4558 // (rot X, Y) != 0/-1 --> X != 0/-1
4559 if (SDValue R = getRotateSource(N0))
4560 return DAG.getSetCC(dl, VT, R, N1, Cond);
4561
4562 // Peek through an 'or' of a rotated value compared against 0:
4563 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4564 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4565 //
4566 // TODO: Add the 'and' with -1 sibling.
4567 // TODO: Recurse through a series of 'or' ops to find the rotate.
4568 EVT OpVT = N0.getValueType();
4569 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4570 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4571 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4572 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4573 }
4574 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4575 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4576 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4577 }
4578 }
4579
4580 return SDValue();
4581}
4582
4584 ISD::CondCode Cond, const SDLoc &dl,
4585 SelectionDAG &DAG) {
4586 // If we are testing for all-bits-clear, we might be able to do that with
4587 // less shifting since bit-order does not matter.
4588 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4589 return SDValue();
4590
4591 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4592 if (!C1 || !C1->isZero())
4593 return SDValue();
4594
4595 if (!N0.hasOneUse() ||
4596 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4597 return SDValue();
4598
4599 unsigned BitWidth = N0.getScalarValueSizeInBits();
4600 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4601 if (!ShAmtC)
4602 return SDValue();
4603
4604 uint64_t ShAmt = ShAmtC->getAPIntValue().urem(BitWidth);
4605 if (ShAmt == 0)
4606 return SDValue();
4607
4608 // Canonicalize fshr as fshl to reduce pattern-matching.
4609 if (N0.getOpcode() == ISD::FSHR)
4610 ShAmt = BitWidth - ShAmt;
4611
4612 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4613 SDValue X, Y;
4614 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4615 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4616 return false;
4617 if (Or.getOperand(0) == Other) {
4618 X = Or.getOperand(0);
4619 Y = Or.getOperand(1);
4620 return true;
4621 }
4622 if (Or.getOperand(1) == Other) {
4623 X = Or.getOperand(1);
4624 Y = Or.getOperand(0);
4625 return true;
4626 }
4627 return false;
4628 };
4629
4630 EVT OpVT = N0.getValueType();
4631 EVT ShAmtVT = N0.getOperand(2).getValueType();
4632 SDValue F0 = N0.getOperand(0);
4633 SDValue F1 = N0.getOperand(1);
4634 if (matchOr(F0, F1)) {
4635 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4636 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4637 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4638 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4639 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4640 }
4641 if (matchOr(F1, F0)) {
4642 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4643 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4644 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4645 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4646 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4647 }
4648
4649 return SDValue();
4650}
4651
4652/// Try to simplify a setcc built with the specified operands and cc. If it is
4653/// unable to simplify it, return a null SDValue.
4655 ISD::CondCode Cond, bool foldBooleans,
4656 DAGCombinerInfo &DCI,
4657 const SDLoc &dl) const {
4658 SelectionDAG &DAG = DCI.DAG;
4659 const DataLayout &Layout = DAG.getDataLayout();
4660 EVT OpVT = N0.getValueType();
4661 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4662
4663 // Constant fold or commute setcc.
4664 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4665 return Fold;
4666
4667 bool N0ConstOrSplat =
4668 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4669 bool N1ConstOrSplat =
4670 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4671
4672 // Canonicalize toward having the constant on the RHS.
4673 // TODO: Handle non-splat vector constants. All undef causes trouble.
4674 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4675 // infinite loop here when we encounter one.
4677 if (N0ConstOrSplat && !N1ConstOrSplat &&
4678 (DCI.isBeforeLegalizeOps() ||
4679 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4680 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4681
4682 // If we have a subtract with the same 2 non-constant operands as this setcc
4683 // -- but in reverse order -- then try to commute the operands of this setcc
4684 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4685 // instruction on some targets.
4686 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4687 (DCI.isBeforeLegalizeOps() ||
4688 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4689 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4690 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4691 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4692
4693 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4694 return V;
4695
4696 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4697 return V;
4698
4699 if (auto *N1C = isConstOrConstSplat(N1)) {
4700 const APInt &C1 = N1C->getAPIntValue();
4701
4702 // Optimize some CTPOP cases.
4703 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4704 return V;
4705
4706 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4707 // X * Y == 0 --> (X == 0) || (Y == 0)
4708 // X * Y != 0 --> (X != 0) && (Y != 0)
4709 // TODO: This bails out if minsize is set, but if the target doesn't have a
4710 // single instruction multiply for this type, it would likely be
4711 // smaller to decompose.
4712 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4713 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4714 (N0->getFlags().hasNoUnsignedWrap() ||
4715 N0->getFlags().hasNoSignedWrap()) &&
4716 !Attr.hasFnAttr(Attribute::MinSize)) {
4717 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4718 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4719 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4720 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4721 }
4722
4723 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4724 // equality comparison, then we're just comparing whether X itself is
4725 // zero.
4726 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4727 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4729 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4730 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4731 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4732 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4733 // (srl (ctlz x), 5) == 0 -> X != 0
4734 // (srl (ctlz x), 5) != 1 -> X != 0
4735 Cond = ISD::SETNE;
4736 } else {
4737 // (srl (ctlz x), 5) != 0 -> X == 0
4738 // (srl (ctlz x), 5) == 1 -> X == 0
4739 Cond = ISD::SETEQ;
4740 }
4741 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4742 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4743 Cond);
4744 }
4745 }
4746 }
4747 }
4748
4749 // FIXME: Support vectors.
4750 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4751 const APInt &C1 = N1C->getAPIntValue();
4752
4753 // (zext x) == C --> x == (trunc C)
4754 // (sext x) == C --> x == (trunc C)
4755 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4756 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4757 unsigned MinBits = N0.getValueSizeInBits();
4758 SDValue PreExt;
4759 bool Signed = false;
4760 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4761 // ZExt
4762 MinBits = N0->getOperand(0).getValueSizeInBits();
4763 PreExt = N0->getOperand(0);
4764 } else if (N0->getOpcode() == ISD::AND) {
4765 // DAGCombine turns costly ZExts into ANDs
4766 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4767 if ((C->getAPIntValue()+1).isPowerOf2()) {
4768 MinBits = C->getAPIntValue().countr_one();
4769 PreExt = N0->getOperand(0);
4770 }
4771 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4772 // SExt
4773 MinBits = N0->getOperand(0).getValueSizeInBits();
4774 PreExt = N0->getOperand(0);
4775 Signed = true;
4776 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4777 // ZEXTLOAD / SEXTLOAD
4778 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4779 MinBits = LN0->getMemoryVT().getSizeInBits();
4780 PreExt = N0;
4781 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4782 Signed = true;
4783 MinBits = LN0->getMemoryVT().getSizeInBits();
4784 PreExt = N0;
4785 }
4786 }
4787
4788 // Figure out how many bits we need to preserve this constant.
4789 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4790
4791 // Make sure we're not losing bits from the constant.
4792 if (MinBits > 0 &&
4793 MinBits < C1.getBitWidth() &&
4794 MinBits >= ReqdBits) {
4795 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4796 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4797 // Will get folded away.
4798 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4799 if (MinBits == 1 && C1 == 1)
4800 // Invert the condition.
4801 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4803 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4804 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4805 }
4806
4807 // If truncating the setcc operands is not desirable, we can still
4808 // simplify the expression in some cases:
4809 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4810 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4811 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4812 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4813 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4814 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4815 SDValue TopSetCC = N0->getOperand(0);
4816 unsigned N0Opc = N0->getOpcode();
4817 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4818 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4819 TopSetCC.getOpcode() == ISD::SETCC &&
4820 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4821 (isConstFalseVal(N1) ||
4822 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4823
4824 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4825 (!N1C->isZero() && Cond == ISD::SETNE);
4826
4827 if (!Inverse)
4828 return TopSetCC;
4829
4831 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4832 TopSetCC.getOperand(0).getValueType());
4833 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4834 TopSetCC.getOperand(1),
4835 InvCond);
4836 }
4837 }
4838 }
4839
4840 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4841 // equality or unsigned, and all 1 bits of the const are in the same
4842 // partial word, see if we can shorten the load.
4843 if (DCI.isBeforeLegalize() &&
4845 N0.getOpcode() == ISD::AND && C1 == 0 &&
4846 N0.getNode()->hasOneUse() &&
4847 isa<LoadSDNode>(N0.getOperand(0)) &&
4848 N0.getOperand(0).getNode()->hasOneUse() &&
4850 auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4851 APInt bestMask;
4852 unsigned bestWidth = 0, bestOffset = 0;
4853 if (Lod->isSimple() && Lod->isUnindexed() &&
4854 (Lod->getMemoryVT().isByteSized() ||
4855 isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4856 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4857 unsigned origWidth = N0.getValueSizeInBits();
4858 unsigned maskWidth = origWidth;
4859 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4860 // 8 bits, but have to be careful...
4861 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4862 origWidth = Lod->getMemoryVT().getSizeInBits();
4863 const APInt &Mask = N0.getConstantOperandAPInt(1);
4864 // Only consider power-of-2 widths (and at least one byte) as candiates
4865 // for the narrowed load.
4866 for (unsigned width = 8; width < origWidth; width *= 2) {
4867 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4868 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4869 // Avoid accessing any padding here for now (we could use memWidth
4870 // instead of origWidth here otherwise).
4871 unsigned maxOffset = origWidth - width;
4872 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4873 if (Mask.isSubsetOf(newMask)) {
4874 unsigned ptrOffset =
4875 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4876 unsigned IsFast = 0;
4877 assert((ptrOffset % 8) == 0 && "Non-Bytealigned pointer offset");
4878 Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4880 ptrOffset / 8) &&
4882 *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4883 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4884 IsFast) {
4885 bestOffset = ptrOffset / 8;
4886 bestMask = Mask.lshr(offset);
4887 bestWidth = width;
4888 break;
4889 }
4890 }
4891 newMask <<= 8;
4892 }
4893 if (bestWidth)
4894 break;
4895 }
4896 }
4897 if (bestWidth) {
4898 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4899 SDValue Ptr = Lod->getBasePtr();
4900 if (bestOffset != 0)
4901 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4902 SDValue NewLoad =
4903 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4904 Lod->getPointerInfo().getWithOffset(bestOffset),
4905 Lod->getBaseAlign());
4906 SDValue And =
4907 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4908 DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4909 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4910 }
4911 }
4912
4913 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4914 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4915 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4916
4917 // If the comparison constant has bits in the upper part, the
4918 // zero-extended value could never match.
4920 C1.getBitWidth() - InSize))) {
4921 switch (Cond) {
4922 case ISD::SETUGT:
4923 case ISD::SETUGE:
4924 case ISD::SETEQ:
4925 return DAG.getConstant(0, dl, VT);
4926 case ISD::SETULT:
4927 case ISD::SETULE:
4928 case ISD::SETNE:
4929 return DAG.getConstant(1, dl, VT);
4930 case ISD::SETGT:
4931 case ISD::SETGE:
4932 // True if the sign bit of C1 is set.
4933 return DAG.getConstant(C1.isNegative(), dl, VT);
4934 case ISD::SETLT:
4935 case ISD::SETLE:
4936 // True if the sign bit of C1 isn't set.
4937 return DAG.getConstant(C1.isNonNegative(), dl, VT);
4938 default:
4939 break;
4940 }
4941 }
4942
4943 // Otherwise, we can perform the comparison with the low bits.
4944 switch (Cond) {
4945 case ISD::SETEQ:
4946 case ISD::SETNE:
4947 case ISD::SETUGT:
4948 case ISD::SETUGE:
4949 case ISD::SETULT:
4950 case ISD::SETULE: {
4951 EVT newVT = N0.getOperand(0).getValueType();
4952 // FIXME: Should use isNarrowingProfitable.
4953 if (DCI.isBeforeLegalizeOps() ||
4954 (isOperationLegal(ISD::SETCC, newVT) &&
4955 isCondCodeLegal(Cond, newVT.getSimpleVT()) &&
4957 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4958 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4959
4960 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4961 NewConst, Cond);
4962 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4963 }
4964 break;
4965 }
4966 default:
4967 break; // todo, be more careful with signed comparisons
4968 }
4969 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4970 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4972 OpVT)) {
4973 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4974 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4975 EVT ExtDstTy = N0.getValueType();
4976 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4977
4978 // If the constant doesn't fit into the number of bits for the source of
4979 // the sign extension, it is impossible for both sides to be equal.
4980 if (C1.getSignificantBits() > ExtSrcTyBits)
4981 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4982
4983 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4984 ExtDstTy != ExtSrcTy && "Unexpected types!");
4985 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4986 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4987 DAG.getConstant(Imm, dl, ExtDstTy));
4988 if (!DCI.isCalledByLegalizer())
4989 DCI.AddToWorklist(ZextOp.getNode());
4990 // Otherwise, make this a use of a zext.
4991 return DAG.getSetCC(dl, VT, ZextOp,
4992 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4993 } else if ((N1C->isZero() || N1C->isOne()) &&
4994 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4995 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
4996 // excluded as they are handled below whilst checking for foldBooleans.
4997 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
4998 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4999 (N0.getValueType() == MVT::i1 ||
5003 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
5004 if (TrueWhenTrue)
5005 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
5006 // Invert the condition.
5007 if (N0.getOpcode() == ISD::SETCC) {
5010 if (DCI.isBeforeLegalizeOps() ||
5012 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
5013 }
5014 }
5015
5016 if ((N0.getOpcode() == ISD::XOR ||
5017 (N0.getOpcode() == ISD::AND &&
5018 N0.getOperand(0).getOpcode() == ISD::XOR &&
5019 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
5020 isOneConstant(N0.getOperand(1))) {
5021 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
5022 // can only do this if the top bits are known zero.
5023 unsigned BitWidth = N0.getValueSizeInBits();
5024 if (DAG.MaskedValueIsZero(N0,
5026 BitWidth-1))) {
5027 // Okay, get the un-inverted input value.
5028 SDValue Val;
5029 if (N0.getOpcode() == ISD::XOR) {
5030 Val = N0.getOperand(0);
5031 } else {
5032 assert(N0.getOpcode() == ISD::AND &&
5033 N0.getOperand(0).getOpcode() == ISD::XOR);
5034 // ((X^1)&1)^1 -> X & 1
5035 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
5036 N0.getOperand(0).getOperand(0),
5037 N0.getOperand(1));
5038 }
5039
5040 return DAG.getSetCC(dl, VT, Val, N1,
5042 }
5043 } else if (N1C->isOne()) {
5044 SDValue Op0 = N0;
5045 if (Op0.getOpcode() == ISD::TRUNCATE)
5046 Op0 = Op0.getOperand(0);
5047
5048 if ((Op0.getOpcode() == ISD::XOR) &&
5049 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
5050 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
5051 SDValue XorLHS = Op0.getOperand(0);
5052 SDValue XorRHS = Op0.getOperand(1);
5053 // Ensure that the input setccs return an i1 type or 0/1 value.
5054 if (Op0.getValueType() == MVT::i1 ||
5059 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
5061 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
5062 }
5063 }
5064 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
5065 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
5066 if (Op0.getValueType().bitsGT(VT))
5067 Op0 = DAG.getNode(ISD::AND, dl, VT,
5068 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
5069 DAG.getConstant(1, dl, VT));
5070 else if (Op0.getValueType().bitsLT(VT))
5071 Op0 = DAG.getNode(ISD::AND, dl, VT,
5072 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
5073 DAG.getConstant(1, dl, VT));
5074
5075 return DAG.getSetCC(dl, VT, Op0,
5076 DAG.getConstant(0, dl, Op0.getValueType()),
5078 }
5079 if (Op0.getOpcode() == ISD::AssertZext &&
5080 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
5081 return DAG.getSetCC(dl, VT, Op0,
5082 DAG.getConstant(0, dl, Op0.getValueType()),
5084 }
5085 }
5086
5087 // Given:
5088 // icmp eq/ne (urem %x, %y), 0
5089 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
5090 // icmp eq/ne %x, 0
5091 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
5092 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5093 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
5094 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
5095 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
5096 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
5097 }
5098
5099 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
5100 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
5101 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5103 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
5104 N1C->isAllOnes()) {
5105 return DAG.getSetCC(dl, VT, N0.getOperand(0),
5106 DAG.getConstant(0, dl, OpVT),
5108 }
5109
5110 // fold (setcc (trunc x) c) -> (setcc x c)
5111 if (N0.getOpcode() == ISD::TRUNCATE &&
5113 (N0->getFlags().hasNoSignedWrap() &&
5116 EVT NewVT = N0.getOperand(0).getValueType();
5117 SDValue NewConst = DAG.getConstant(
5119 ? C1.sext(NewVT.getSizeInBits())
5120 : C1.zext(NewVT.getSizeInBits()),
5121 dl, NewVT);
5122 return DAG.getSetCC(dl, VT, N0.getOperand(0), NewConst, Cond);
5123 }
5124
5125 if (SDValue V =
5126 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
5127 return V;
5128 }
5129
5130 // These simplifications apply to splat vectors as well.
5131 // TODO: Handle more splat vector cases.
5132 if (auto *N1C = isConstOrConstSplat(N1)) {
5133 const APInt &C1 = N1C->getAPIntValue();
5134
5135 APInt MinVal, MaxVal;
5136 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
5138 MinVal = APInt::getSignedMinValue(OperandBitSize);
5139 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
5140 } else {
5141 MinVal = APInt::getMinValue(OperandBitSize);
5142 MaxVal = APInt::getMaxValue(OperandBitSize);
5143 }
5144
5145 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
5146 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
5147 // X >= MIN --> true
5148 if (C1 == MinVal)
5149 return DAG.getBoolConstant(true, dl, VT, OpVT);
5150
5151 if (!VT.isVector()) { // TODO: Support this for vectors.
5152 // X >= C0 --> X > (C0 - 1)
5153 APInt C = C1 - 1;
5155 if ((DCI.isBeforeLegalizeOps() ||
5156 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5157 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5158 isLegalICmpImmediate(C.getSExtValue())))) {
5159 return DAG.getSetCC(dl, VT, N0,
5160 DAG.getConstant(C, dl, N1.getValueType()),
5161 NewCC);
5162 }
5163 }
5164 }
5165
5166 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
5167 // X <= MAX --> true
5168 if (C1 == MaxVal)
5169 return DAG.getBoolConstant(true, dl, VT, OpVT);
5170
5171 // X <= C0 --> X < (C0 + 1)
5172 if (!VT.isVector()) { // TODO: Support this for vectors.
5173 APInt C = C1 + 1;
5175 if ((DCI.isBeforeLegalizeOps() ||
5176 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5177 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5178 isLegalICmpImmediate(C.getSExtValue())))) {
5179 return DAG.getSetCC(dl, VT, N0,
5180 DAG.getConstant(C, dl, N1.getValueType()),
5181 NewCC);
5182 }
5183 }
5184 }
5185
5186 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
5187 if (C1 == MinVal)
5188 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
5189
5190 // TODO: Support this for vectors after legalize ops.
5191 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5192 // Canonicalize setlt X, Max --> setne X, Max
5193 if (C1 == MaxVal)
5194 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5195
5196 // If we have setult X, 1, turn it into seteq X, 0
5197 if (C1 == MinVal+1)
5198 return DAG.getSetCC(dl, VT, N0,
5199 DAG.getConstant(MinVal, dl, N0.getValueType()),
5200 ISD::SETEQ);
5201 }
5202 }
5203
5204 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5205 if (C1 == MaxVal)
5206 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
5207
5208 // TODO: Support this for vectors after legalize ops.
5209 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5210 // Canonicalize setgt X, Min --> setne X, Min
5211 if (C1 == MinVal)
5212 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5213
5214 // If we have setugt X, Max-1, turn it into seteq X, Max
5215 if (C1 == MaxVal-1)
5216 return DAG.getSetCC(dl, VT, N0,
5217 DAG.getConstant(MaxVal, dl, N0.getValueType()),
5218 ISD::SETEQ);
5219 }
5220 }
5221
5222 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5223 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5224 if (C1.isZero())
5225 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5226 VT, N0, N1, Cond, DCI, dl))
5227 return CC;
5228
5229 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5230 // For example, when high 32-bits of i64 X are known clear:
5231 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5232 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5233 bool CmpZero = N1C->isZero();
5234 bool CmpNegOne = N1C->isAllOnes();
5235 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5236 // Match or(lo,shl(hi,bw/2)) pattern.
5237 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5238 unsigned EltBits = V.getScalarValueSizeInBits();
5239 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5240 return false;
5241 SDValue LHS = V.getOperand(0);
5242 SDValue RHS = V.getOperand(1);
5243 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5244 // Unshifted element must have zero upperbits.
5245 if (RHS.getOpcode() == ISD::SHL &&
5246 isa<ConstantSDNode>(RHS.getOperand(1)) &&
5247 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5248 DAG.MaskedValueIsZero(LHS, HiBits)) {
5249 Lo = LHS;
5250 Hi = RHS.getOperand(0);
5251 return true;
5252 }
5253 if (LHS.getOpcode() == ISD::SHL &&
5254 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5255 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5256 DAG.MaskedValueIsZero(RHS, HiBits)) {
5257 Lo = RHS;
5258 Hi = LHS.getOperand(0);
5259 return true;
5260 }
5261 return false;
5262 };
5263
5264 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5265 unsigned EltBits = N0.getScalarValueSizeInBits();
5266 unsigned HalfBits = EltBits / 2;
5267 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5268 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5269 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5270 SDValue NewN0 =
5271 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5272 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5273 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5274 };
5275
5276 SDValue Lo, Hi;
5277 if (IsConcat(N0, Lo, Hi))
5278 return MergeConcat(Lo, Hi);
5279
5280 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5281 SDValue Lo0, Lo1, Hi0, Hi1;
5282 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5283 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5284 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5285 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5286 }
5287 }
5288 }
5289 }
5290
5291 // If we have "setcc X, C0", check to see if we can shrink the immediate
5292 // by changing cc.
5293 // TODO: Support this for vectors after legalize ops.
5294 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5295 // SETUGT X, SINTMAX -> SETLT X, 0
5296 // SETUGE X, SINTMIN -> SETLT X, 0
5297 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5298 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5299 return DAG.getSetCC(dl, VT, N0,
5300 DAG.getConstant(0, dl, N1.getValueType()),
5301 ISD::SETLT);
5302
5303 // SETULT X, SINTMIN -> SETGT X, -1
5304 // SETULE X, SINTMAX -> SETGT X, -1
5305 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5306 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5307 return DAG.getSetCC(dl, VT, N0,
5308 DAG.getAllOnesConstant(dl, N1.getValueType()),
5309 ISD::SETGT);
5310 }
5311 }
5312
5313 // Back to non-vector simplifications.
5314 // TODO: Can we do these for vector splats?
5315 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5316 const APInt &C1 = N1C->getAPIntValue();
5317 EVT ShValTy = N0.getValueType();
5318
5319 // Fold bit comparisons when we can. This will result in an
5320 // incorrect value when boolean false is negative one, unless
5321 // the bitsize is 1 in which case the false value is the same
5322 // in practice regardless of the representation.
5323 if ((VT.getSizeInBits() == 1 ||
5325 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5326 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5327 N0.getOpcode() == ISD::AND) {
5328 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5329 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5330 // Perform the xform if the AND RHS is a single bit.
5331 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5332 if (AndRHS->getAPIntValue().isPowerOf2() &&
5333 !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5334 return DAG.getNode(
5335 ISD::TRUNCATE, dl, VT,
5336 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5337 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5338 }
5339 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5340 // (X & 8) == 8 --> (X & 8) >> 3
5341 // Perform the xform if C1 is a single bit.
5342 unsigned ShCt = C1.logBase2();
5343 if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5344 return DAG.getNode(
5345 ISD::TRUNCATE, dl, VT,
5346 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5347 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5348 }
5349 }
5350 }
5351 }
5352
5353 if (C1.getSignificantBits() <= 64 &&
5355 // (X & -256) == 256 -> (X >> 8) == 1
5356 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5357 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5358 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5359 const APInt &AndRHSC = AndRHS->getAPIntValue();
5360 if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5361 unsigned ShiftBits = AndRHSC.countr_zero();
5362 if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5363 // If using an unsigned shift doesn't yield a legal compare
5364 // immediate, try using sra instead.
5365 APInt NewC = C1.lshr(ShiftBits);
5366 if (NewC.getSignificantBits() <= 64 &&
5368 APInt SignedC = C1.ashr(ShiftBits);
5369 if (SignedC.getSignificantBits() <= 64 &&
5371 SDValue Shift = DAG.getNode(
5372 ISD::SRA, dl, ShValTy, N0.getOperand(0),
5373 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5374 SDValue CmpRHS = DAG.getConstant(SignedC, dl, ShValTy);
5375 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5376 }
5377 }
5378 SDValue Shift = DAG.getNode(
5379 ISD::SRL, dl, ShValTy, N0.getOperand(0),
5380 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5381 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5382 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5383 }
5384 }
5385 }
5386 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5387 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5388 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5389 // X < 0x100000000 -> (X >> 32) < 1
5390 // X >= 0x100000000 -> (X >> 32) >= 1
5391 // X <= 0x0ffffffff -> (X >> 32) < 1
5392 // X > 0x0ffffffff -> (X >> 32) >= 1
5393 unsigned ShiftBits;
5394 APInt NewC = C1;
5395 ISD::CondCode NewCond = Cond;
5396 if (AdjOne) {
5397 ShiftBits = C1.countr_one();
5398 NewC = NewC + 1;
5399 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5400 } else {
5401 ShiftBits = C1.countr_zero();
5402 }
5403 NewC.lshrInPlace(ShiftBits);
5404 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5406 !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5407 SDValue Shift =
5408 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5409 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5410 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5411 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5412 }
5413 }
5414 }
5415 }
5416
5418 auto *CFP = cast<ConstantFPSDNode>(N1);
5419 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5420
5421 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5422 // constant if knowing that the operand is non-nan is enough. We prefer to
5423 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5424 // materialize 0.0.
5425 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5426 return DAG.getSetCC(dl, VT, N0, N0, Cond);
5427
5428 // setcc (fneg x), C -> setcc swap(pred) x, -C
5429 if (N0.getOpcode() == ISD::FNEG) {
5431 if (DCI.isBeforeLegalizeOps() ||
5432 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5433 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5434 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5435 }
5436 }
5437
5438 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5440 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5441 bool IsFabs = N0.getOpcode() == ISD::FABS;
5442 SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5443 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5444 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5445 : (IsFabs ? fcInf : fcPosInf);
5446 if (Cond == ISD::SETUEQ)
5447 Flag |= fcNan;
5448 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5449 DAG.getTargetConstant(Flag, dl, MVT::i32));
5450 }
5451 }
5452
5453 // If the condition is not legal, see if we can find an equivalent one
5454 // which is legal.
5456 // If the comparison was an awkward floating-point == or != and one of
5457 // the comparison operands is infinity or negative infinity, convert the
5458 // condition to a less-awkward <= or >=.
5459 if (CFP->getValueAPF().isInfinity()) {
5460 bool IsNegInf = CFP->getValueAPF().isNegative();
5462 switch (Cond) {
5463 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5464 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5465 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5466 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5467 default: break;
5468 }
5469 if (NewCond != ISD::SETCC_INVALID &&
5470 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5471 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5472 }
5473 }
5474 }
5475
5476 if (N0 == N1) {
5477 // The sext(setcc()) => setcc() optimization relies on the appropriate
5478 // constant being emitted.
5479 assert(!N0.getValueType().isInteger() &&
5480 "Integer types should be handled by FoldSetCC");
5481
5482 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5483 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5484 if (UOF == 2) // FP operators that are undefined on NaNs.
5485 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5486 if (UOF == unsigned(EqTrue))
5487 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5488 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5489 // if it is not already.
5490 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5491 if (NewCond != Cond &&
5492 (DCI.isBeforeLegalizeOps() ||
5493 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5494 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5495 }
5496
5497 // ~X > ~Y --> Y > X
5498 // ~X < ~Y --> Y < X
5499 // ~X < C --> X > ~C
5500 // ~X > C --> X < ~C
5501 if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5502 N0.getValueType().isInteger()) {
5503 if (isBitwiseNot(N0)) {
5504 if (isBitwiseNot(N1))
5505 return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5506
5509 SDValue Not = DAG.getNOT(dl, N1, OpVT);
5510 return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5511 }
5512 }
5513 }
5514
5515 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5516 N0.getValueType().isInteger()) {
5517 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5518 N0.getOpcode() == ISD::XOR) {
5519 // Simplify (X+Y) == (X+Z) --> Y == Z
5520 if (N0.getOpcode() == N1.getOpcode()) {
5521 if (N0.getOperand(0) == N1.getOperand(0))
5522 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5523 if (N0.getOperand(1) == N1.getOperand(1))
5524 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5525 if (isCommutativeBinOp(N0.getOpcode())) {
5526 // If X op Y == Y op X, try other combinations.
5527 if (N0.getOperand(0) == N1.getOperand(1))
5528 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5529 Cond);
5530 if (N0.getOperand(1) == N1.getOperand(0))
5531 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5532 Cond);
5533 }
5534 }
5535
5536 // If RHS is a legal immediate value for a compare instruction, we need
5537 // to be careful about increasing register pressure needlessly.
5538 bool LegalRHSImm = false;
5539
5540 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5541 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5542 // Turn (X+C1) == C2 --> X == C2-C1
5543 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5544 return DAG.getSetCC(
5545 dl, VT, N0.getOperand(0),
5546 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5547 dl, N0.getValueType()),
5548 Cond);
5549
5550 // Turn (X^C1) == C2 --> X == C1^C2
5551 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5552 return DAG.getSetCC(
5553 dl, VT, N0.getOperand(0),
5554 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5555 dl, N0.getValueType()),
5556 Cond);
5557 }
5558
5559 // Turn (C1-X) == C2 --> X == C1-C2
5560 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5561 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5562 return DAG.getSetCC(
5563 dl, VT, N0.getOperand(1),
5564 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5565 dl, N0.getValueType()),
5566 Cond);
5567
5568 // Could RHSC fold directly into a compare?
5569 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5570 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5571 }
5572
5573 // (X+Y) == X --> Y == 0 and similar folds.
5574 // Don't do this if X is an immediate that can fold into a cmp
5575 // instruction and X+Y has other uses. It could be an induction variable
5576 // chain, and the transform would increase register pressure.
5577 if (!LegalRHSImm || N0.hasOneUse())
5578 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5579 return V;
5580 }
5581
5582 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5583 N1.getOpcode() == ISD::XOR)
5584 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5585 return V;
5586
5587 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5588 return V;
5589
5590 if (SDValue V = foldSetCCWithOr(VT, N0, N1, Cond, dl, DCI))
5591 return V;
5592 }
5593
5594 // Fold remainder of division by a constant.
5595 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5596 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5597 // When division is cheap or optimizing for minimum size,
5598 // fall through to DIVREM creation by skipping this fold.
5599 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5600 if (N0.getOpcode() == ISD::UREM) {
5601 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5602 return Folded;
5603 } else if (N0.getOpcode() == ISD::SREM) {
5604 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5605 return Folded;
5606 }
5607 }
5608 }
5609
5610 // Fold away ALL boolean setcc's.
5611 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5612 SDValue Temp;
5613 switch (Cond) {
5614 default: llvm_unreachable("Unknown integer setcc!");
5615 case ISD::SETEQ: // X == Y -> ~(X^Y)
5616 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5617 N0 = DAG.getNOT(dl, Temp, OpVT);
5618 if (!DCI.isCalledByLegalizer())
5619 DCI.AddToWorklist(Temp.getNode());
5620 break;
5621 case ISD::SETNE: // X != Y --> (X^Y)
5622 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5623 break;
5624 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5625 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5626 Temp = DAG.getNOT(dl, N0, OpVT);
5627 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5628 if (!DCI.isCalledByLegalizer())
5629 DCI.AddToWorklist(Temp.getNode());
5630 break;
5631 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5632 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5633 Temp = DAG.getNOT(dl, N1, OpVT);
5634 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5635 if (!DCI.isCalledByLegalizer())
5636 DCI.AddToWorklist(Temp.getNode());
5637 break;
5638 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5639 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5640 Temp = DAG.getNOT(dl, N0, OpVT);
5641 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5642 if (!DCI.isCalledByLegalizer())
5643 DCI.AddToWorklist(Temp.getNode());
5644 break;
5645 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5646 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5647 Temp = DAG.getNOT(dl, N1, OpVT);
5648 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5649 break;
5650 }
5651 if (VT.getScalarType() != MVT::i1) {
5652 if (!DCI.isCalledByLegalizer())
5653 DCI.AddToWorklist(N0.getNode());
5654 // FIXME: If running after legalize, we probably can't do this.
5656 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5657 }
5658 return N0;
5659 }
5660
5661 // Fold (setcc (trunc x) (trunc y)) -> (setcc x y)
5662 if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE &&
5663 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
5665 N1->getFlags().hasNoUnsignedWrap()) ||
5667 N1->getFlags().hasNoSignedWrap())) &&
5669 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5670 }
5671
5672 // Could not fold it.
5673 return SDValue();
5674}
5675
5676/// Returns true (and the GlobalValue and the offset) if the node is a
5677/// GlobalAddress + offset.
5679 int64_t &Offset) const {
5680
5681 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5682
5683 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5684 GA = GASD->getGlobal();
5685 Offset += GASD->getOffset();
5686 return true;
5687 }
5688
5689 if (N->isAnyAdd()) {
5690 SDValue N1 = N->getOperand(0);
5691 SDValue N2 = N->getOperand(1);
5692 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5693 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5694 Offset += V->getSExtValue();
5695 return true;
5696 }
5697 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5698 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5699 Offset += V->getSExtValue();
5700 return true;
5701 }
5702 }
5703 }
5704
5705 return false;
5706}
5707
5709 DAGCombinerInfo &DCI) const {
5710 // Default implementation: no optimization.
5711 return SDValue();
5712}
5713
5714//===----------------------------------------------------------------------===//
5715// Inline Assembler Implementation Methods
5716//===----------------------------------------------------------------------===//
5717
5720 unsigned S = Constraint.size();
5721
5722 if (S == 1) {
5723 switch (Constraint[0]) {
5724 default: break;
5725 case 'r':
5726 return C_RegisterClass;
5727 case 'm': // memory
5728 case 'o': // offsetable
5729 case 'V': // not offsetable
5730 return C_Memory;
5731 case 'p': // Address.
5732 return C_Address;
5733 case 'n': // Simple Integer
5734 case 'E': // Floating Point Constant
5735 case 'F': // Floating Point Constant
5736 return C_Immediate;
5737 case 'i': // Simple Integer or Relocatable Constant
5738 case 's': // Relocatable Constant
5739 case 'X': // Allow ANY value.
5740 case 'I': // Target registers.
5741 case 'J':
5742 case 'K':
5743 case 'L':
5744 case 'M':
5745 case 'N':
5746 case 'O':
5747 case 'P':
5748 case '<':
5749 case '>':
5750 return C_Other;
5751 }
5752 }
5753
5754 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5755 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5756 return C_Memory;
5757 return C_Register;
5758 }
5759 return C_Unknown;
5760}
5761
5762/// Try to replace an X constraint, which matches anything, with another that
5763/// has more specific requirements based on the type of the corresponding
5764/// operand.
5765const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5766 if (ConstraintVT.isInteger())
5767 return "r";
5768 if (ConstraintVT.isFloatingPoint())
5769 return "f"; // works for many targets
5770 return nullptr;
5771}
5772
5774 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5775 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5776 return SDValue();
5777}
5778
5779/// Lower the specified operand into the Ops vector.
5780/// If it is invalid, don't add anything to Ops.
5782 StringRef Constraint,
5783 std::vector<SDValue> &Ops,
5784 SelectionDAG &DAG) const {
5785
5786 if (Constraint.size() > 1)
5787 return;
5788
5789 char ConstraintLetter = Constraint[0];
5790 switch (ConstraintLetter) {
5791 default: break;
5792 case 'X': // Allows any operand
5793 case 'i': // Simple Integer or Relocatable Constant
5794 case 'n': // Simple Integer
5795 case 's': { // Relocatable Constant
5796
5798 uint64_t Offset = 0;
5799
5800 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5801 // etc., since getelementpointer is variadic. We can't use
5802 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5803 // while in this case the GA may be furthest from the root node which is
5804 // likely an ISD::ADD.
5805 while (true) {
5806 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5807 // gcc prints these as sign extended. Sign extend value to 64 bits
5808 // now; without this it would get ZExt'd later in
5809 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5810 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5811 BooleanContent BCont = getBooleanContents(MVT::i64);
5812 ISD::NodeType ExtOpc =
5813 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5814 int64_t ExtVal =
5815 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5816 Ops.push_back(
5817 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5818 return;
5819 }
5820 if (ConstraintLetter != 'n') {
5821 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5822 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5823 GA->getValueType(0),
5824 Offset + GA->getOffset()));
5825 return;
5826 }
5827 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5828 Ops.push_back(DAG.getTargetBlockAddress(
5829 BA->getBlockAddress(), BA->getValueType(0),
5830 Offset + BA->getOffset(), BA->getTargetFlags()));
5831 return;
5832 }
5834 Ops.push_back(Op);
5835 return;
5836 }
5837 }
5838 const unsigned OpCode = Op.getOpcode();
5839 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5840 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5841 Op = Op.getOperand(1);
5842 // Subtraction is not commutative.
5843 else if (OpCode == ISD::ADD &&
5844 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5845 Op = Op.getOperand(0);
5846 else
5847 return;
5848 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5849 continue;
5850 }
5851 return;
5852 }
5853 break;
5854 }
5855 }
5856}
5857
5861
5862std::pair<unsigned, const TargetRegisterClass *>
5864 StringRef Constraint,
5865 MVT VT) const {
5866 if (!Constraint.starts_with("{"))
5867 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5868 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5869
5870 // Remove the braces from around the name.
5871 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5872
5873 std::pair<unsigned, const TargetRegisterClass *> R =
5874 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5875
5876 // Figure out which register class contains this reg.
5877 for (const TargetRegisterClass *RC : RI->regclasses()) {
5878 // If none of the value types for this register class are valid, we
5879 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5880 if (!isLegalRC(*RI, *RC))
5881 continue;
5882
5883 for (const MCPhysReg &PR : *RC) {
5884 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5885 std::pair<unsigned, const TargetRegisterClass *> S =
5886 std::make_pair(PR, RC);
5887
5888 // If this register class has the requested value type, return it,
5889 // otherwise keep searching and return the first class found
5890 // if no other is found which explicitly has the requested type.
5891 if (RI->isTypeLegalForClass(*RC, VT))
5892 return S;
5893 if (!R.second)
5894 R = S;
5895 }
5896 }
5897 }
5898
5899 return R;
5900}
5901
5902//===----------------------------------------------------------------------===//
5903// Constraint Selection.
5904
5905/// Return true of this is an input operand that is a matching constraint like
5906/// "4".
5908 assert(!ConstraintCode.empty() && "No known constraint!");
5909 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5910}
5911
5912/// If this is an input matching constraint, this method returns the output
5913/// operand it matches.
5915 assert(!ConstraintCode.empty() && "No known constraint!");
5916 return atoi(ConstraintCode.c_str());
5917}
5918
5919/// Split up the constraint string from the inline assembly value into the
5920/// specific constraints and their prefixes, and also tie in the associated
5921/// operand values.
5922/// If this returns an empty vector, and if the constraint string itself
5923/// isn't empty, there was an error parsing.
5926 const TargetRegisterInfo *TRI,
5927 const CallBase &Call) const {
5928 /// Information about all of the constraints.
5929 AsmOperandInfoVector ConstraintOperands;
5930 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5931 unsigned maCount = 0; // Largest number of multiple alternative constraints.
5932
5933 // Do a prepass over the constraints, canonicalizing them, and building up the
5934 // ConstraintOperands list.
5935 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5936 unsigned ResNo = 0; // ResNo - The result number of the next output.
5937 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5938
5939 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5940 ConstraintOperands.emplace_back(std::move(CI));
5941 AsmOperandInfo &OpInfo = ConstraintOperands.back();
5942
5943 // Update multiple alternative constraint count.
5944 if (OpInfo.multipleAlternatives.size() > maCount)
5945 maCount = OpInfo.multipleAlternatives.size();
5946
5947 OpInfo.ConstraintVT = MVT::Other;
5948
5949 // Compute the value type for each operand.
5950 switch (OpInfo.Type) {
5952 // Indirect outputs just consume an argument.
5953 if (OpInfo.isIndirect) {
5954 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5955 break;
5956 }
5957
5958 // The return value of the call is this value. As such, there is no
5959 // corresponding argument.
5960 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5961 if (auto *STy = dyn_cast<StructType>(Call.getType())) {
5962 OpInfo.ConstraintVT =
5963 getAsmOperandValueType(DL, STy->getElementType(ResNo))
5964 .getSimpleVT();
5965 } else {
5966 assert(ResNo == 0 && "Asm only has one result!");
5967 OpInfo.ConstraintVT =
5969 }
5970 ++ResNo;
5971 break;
5972 case InlineAsm::isInput:
5973 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5974 break;
5975 case InlineAsm::isLabel:
5976 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5977 ++LabelNo;
5978 continue;
5980 // Nothing to do.
5981 break;
5982 }
5983
5984 if (OpInfo.CallOperandVal) {
5985 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5986 if (OpInfo.isIndirect) {
5987 OpTy = Call.getParamElementType(ArgNo);
5988 assert(OpTy && "Indirect operand must have elementtype attribute");
5989 }
5990
5991 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5992 if (StructType *STy = dyn_cast<StructType>(OpTy))
5993 if (STy->getNumElements() == 1)
5994 OpTy = STy->getElementType(0);
5995
5996 // If OpTy is not a single value, it may be a struct/union that we
5997 // can tile with integers.
5998 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5999 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
6000 switch (BitSize) {
6001 default: break;
6002 case 1:
6003 case 8:
6004 case 16:
6005 case 32:
6006 case 64:
6007 case 128:
6008 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
6009 break;
6010 }
6011 }
6012
6013 EVT VT = getAsmOperandValueType(DL, OpTy, true);
6014 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6015 ArgNo++;
6016 }
6017 }
6018
6019 // If we have multiple alternative constraints, select the best alternative.
6020 if (!ConstraintOperands.empty()) {
6021 if (maCount) {
6022 unsigned bestMAIndex = 0;
6023 int bestWeight = -1;
6024 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
6025 int weight = -1;
6026 unsigned maIndex;
6027 // Compute the sums of the weights for each alternative, keeping track
6028 // of the best (highest weight) one so far.
6029 for (maIndex = 0; maIndex < maCount; ++maIndex) {
6030 int weightSum = 0;
6031 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6032 cIndex != eIndex; ++cIndex) {
6033 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6034 if (OpInfo.Type == InlineAsm::isClobber)
6035 continue;
6036
6037 // If this is an output operand with a matching input operand,
6038 // look up the matching input. If their types mismatch, e.g. one
6039 // is an integer, the other is floating point, or their sizes are
6040 // different, flag it as an maCantMatch.
6041 if (OpInfo.hasMatchingInput()) {
6042 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6043 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6044 if ((OpInfo.ConstraintVT.isInteger() !=
6045 Input.ConstraintVT.isInteger()) ||
6046 (OpInfo.ConstraintVT.getSizeInBits() !=
6047 Input.ConstraintVT.getSizeInBits())) {
6048 weightSum = -1; // Can't match.
6049 break;
6050 }
6051 }
6052 }
6053 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
6054 if (weight == -1) {
6055 weightSum = -1;
6056 break;
6057 }
6058 weightSum += weight;
6059 }
6060 // Update best.
6061 if (weightSum > bestWeight) {
6062 bestWeight = weightSum;
6063 bestMAIndex = maIndex;
6064 }
6065 }
6066
6067 // Now select chosen alternative in each constraint.
6068 for (AsmOperandInfo &cInfo : ConstraintOperands)
6069 if (cInfo.Type != InlineAsm::isClobber)
6070 cInfo.selectAlternative(bestMAIndex);
6071 }
6072 }
6073
6074 // Check and hook up tied operands, choose constraint code to use.
6075 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6076 cIndex != eIndex; ++cIndex) {
6077 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6078
6079 // If this is an output operand with a matching input operand, look up the
6080 // matching input. If their types mismatch, e.g. one is an integer, the
6081 // other is floating point, or their sizes are different, flag it as an
6082 // error.
6083 if (OpInfo.hasMatchingInput()) {
6084 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6085
6086 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6087 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
6088 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
6089 OpInfo.ConstraintVT);
6090 std::pair<unsigned, const TargetRegisterClass *> InputRC =
6091 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
6092 Input.ConstraintVT);
6093 const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
6094 OpInfo.ConstraintVT.isFloatingPoint();
6095 const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
6096 Input.ConstraintVT.isFloatingPoint();
6097 if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
6098 (MatchRC.second != InputRC.second)) {
6099 report_fatal_error("Unsupported asm: input constraint"
6100 " with a matching output constraint of"
6101 " incompatible type!");
6102 }
6103 }
6104 }
6105 }
6106
6107 return ConstraintOperands;
6108}
6109
6110/// Return a number indicating our preference for chosing a type of constraint
6111/// over another, for the purpose of sorting them. Immediates are almost always
6112/// preferrable (when they can be emitted). A higher return value means a
6113/// stronger preference for one constraint type relative to another.
6114/// FIXME: We should prefer registers over memory but doing so may lead to
6115/// unrecoverable register exhaustion later.
6116/// https://github.com/llvm/llvm-project/issues/20571
6118 switch (CT) {
6121 return 4;
6124 return 3;
6126 return 2;
6128 return 1;
6130 return 0;
6131 }
6132 llvm_unreachable("Invalid constraint type");
6133}
6134
6135/// Examine constraint type and operand type and determine a weight value.
6136/// This object must already have been set up with the operand type
6137/// and the current alternative constraint selected.
6140 AsmOperandInfo &info, int maIndex) const {
6142 if (maIndex >= (int)info.multipleAlternatives.size())
6143 rCodes = &info.Codes;
6144 else
6145 rCodes = &info.multipleAlternatives[maIndex].Codes;
6146 ConstraintWeight BestWeight = CW_Invalid;
6147
6148 // Loop over the options, keeping track of the most general one.
6149 for (const std::string &rCode : *rCodes) {
6150 ConstraintWeight weight =
6151 getSingleConstraintMatchWeight(info, rCode.c_str());
6152 if (weight > BestWeight)
6153 BestWeight = weight;
6154 }
6155
6156 return BestWeight;
6157}
6158
6159/// Examine constraint type and operand type and determine a weight value.
6160/// This object must already have been set up with the operand type
6161/// and the current alternative constraint selected.
6164 AsmOperandInfo &info, const char *constraint) const {
6166 Value *CallOperandVal = info.CallOperandVal;
6167 // If we don't have a value, we can't do a match,
6168 // but allow it at the lowest weight.
6169 if (!CallOperandVal)
6170 return CW_Default;
6171 // Look at the constraint type.
6172 switch (*constraint) {
6173 case 'i': // immediate integer.
6174 case 'n': // immediate integer with a known value.
6175 if (isa<ConstantInt>(CallOperandVal))
6176 weight = CW_Constant;
6177 break;
6178 case 's': // non-explicit intregal immediate.
6179 if (isa<GlobalValue>(CallOperandVal))
6180 weight = CW_Constant;
6181 break;
6182 case 'E': // immediate float if host format.
6183 case 'F': // immediate float.
6184 if (isa<ConstantFP>(CallOperandVal))
6185 weight = CW_Constant;
6186 break;
6187 case '<': // memory operand with autodecrement.
6188 case '>': // memory operand with autoincrement.
6189 case 'm': // memory operand.
6190 case 'o': // offsettable memory operand
6191 case 'V': // non-offsettable memory operand
6192 weight = CW_Memory;
6193 break;
6194 case 'r': // general register.
6195 case 'g': // general register, memory operand or immediate integer.
6196 // note: Clang converts "g" to "imr".
6197 if (CallOperandVal->getType()->isIntegerTy())
6198 weight = CW_Register;
6199 break;
6200 case 'X': // any operand.
6201 default:
6202 weight = CW_Default;
6203 break;
6204 }
6205 return weight;
6206}
6207
6208/// If there are multiple different constraints that we could pick for this
6209/// operand (e.g. "imr") try to pick the 'best' one.
6210/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6211/// into seven classes:
6212/// Register -> one specific register
6213/// RegisterClass -> a group of regs
6214/// Memory -> memory
6215/// Address -> a symbolic memory reference
6216/// Immediate -> immediate values
6217/// Other -> magic values (such as "Flag Output Operands")
6218/// Unknown -> something we don't recognize yet and can't handle
6219/// Ideally, we would pick the most specific constraint possible: if we have
6220/// something that fits into a register, we would pick it. The problem here
6221/// is that if we have something that could either be in a register or in
6222/// memory that use of the register could cause selection of *other*
6223/// operands to fail: they might only succeed if we pick memory. Because of
6224/// this the heuristic we use is:
6225///
6226/// 1) If there is an 'other' constraint, and if the operand is valid for
6227/// that constraint, use it. This makes us take advantage of 'i'
6228/// constraints when available.
6229/// 2) Otherwise, pick the most general constraint present. This prefers
6230/// 'm' over 'r', for example.
6231///
6233 TargetLowering::AsmOperandInfo &OpInfo) const {
6234 ConstraintGroup Ret;
6235
6236 Ret.reserve(OpInfo.Codes.size());
6237 for (StringRef Code : OpInfo.Codes) {
6239
6240 // Indirect 'other' or 'immediate' constraints are not allowed.
6241 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6242 CType == TargetLowering::C_Register ||
6244 continue;
6245
6246 // Things with matching constraints can only be registers, per gcc
6247 // documentation. This mainly affects "g" constraints.
6248 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6249 continue;
6250
6251 Ret.emplace_back(Code, CType);
6252 }
6253
6255 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6256 });
6257
6258 return Ret;
6259}
6260
6261/// If we have an immediate, see if we can lower it. Return true if we can,
6262/// false otherwise.
6264 SDValue Op, SelectionDAG *DAG,
6265 const TargetLowering &TLI) {
6266
6267 assert((P.second == TargetLowering::C_Other ||
6268 P.second == TargetLowering::C_Immediate) &&
6269 "need immediate or other");
6270
6271 if (!Op.getNode())
6272 return false;
6273
6274 std::vector<SDValue> ResultOps;
6275 TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6276 return !ResultOps.empty();
6277}
6278
6279/// Determines the constraint code and constraint type to use for the specific
6280/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6282 SDValue Op,
6283 SelectionDAG *DAG) const {
6284 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6285
6286 // Single-letter constraints ('r') are very common.
6287 if (OpInfo.Codes.size() == 1) {
6288 OpInfo.ConstraintCode = OpInfo.Codes[0];
6289 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6290 } else {
6292 if (G.empty())
6293 return;
6294
6295 unsigned BestIdx = 0;
6296 for (const unsigned E = G.size();
6297 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6298 G[BestIdx].second == TargetLowering::C_Immediate);
6299 ++BestIdx) {
6300 if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6301 break;
6302 // If we're out of constraints, just pick the first one.
6303 if (BestIdx + 1 == E) {
6304 BestIdx = 0;
6305 break;
6306 }
6307 }
6308
6309 OpInfo.ConstraintCode = G[BestIdx].first;
6310 OpInfo.ConstraintType = G[BestIdx].second;
6311 }
6312
6313 // 'X' matches anything.
6314 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6315 // Constants are handled elsewhere. For Functions, the type here is the
6316 // type of the result, which is not what we want to look at; leave them
6317 // alone.
6318 Value *v = OpInfo.CallOperandVal;
6319 if (isa<ConstantInt>(v) || isa<Function>(v)) {
6320 return;
6321 }
6322
6323 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6324 OpInfo.ConstraintCode = "i";
6325 return;
6326 }
6327
6328 // Otherwise, try to resolve it to something we know about by looking at
6329 // the actual operand type.
6330 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6331 OpInfo.ConstraintCode = Repl;
6332 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6333 }
6334 }
6335}
6336
6337/// Given an exact SDIV by a constant, create a multiplication
6338/// with the multiplicative inverse of the constant.
6339/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6341 const SDLoc &dl, SelectionDAG &DAG,
6342 SmallVectorImpl<SDNode *> &Created) {
6343 SDValue Op0 = N->getOperand(0);
6344 SDValue Op1 = N->getOperand(1);
6345 EVT VT = N->getValueType(0);
6346 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6347 EVT ShSVT = ShVT.getScalarType();
6348
6349 bool UseSRA = false;
6350 SmallVector<SDValue, 16> Shifts, Factors;
6351
6352 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6353 if (C->isZero())
6354 return false;
6355
6356 EVT CT = C->getValueType(0);
6357 APInt Divisor = C->getAPIntValue();
6358 unsigned Shift = Divisor.countr_zero();
6359 if (Shift) {
6360 Divisor.ashrInPlace(Shift);
6361 UseSRA = true;
6362 }
6363 APInt Factor = Divisor.multiplicativeInverse();
6364 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6365 Factors.push_back(DAG.getConstant(Factor, dl, CT));
6366 return true;
6367 };
6368
6369 // Collect all magic values from the build vector.
6370 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern, /*AllowUndefs=*/false,
6371 /*AllowTruncation=*/true))
6372 return SDValue();
6373
6374 SDValue Shift, Factor;
6375 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6376 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6377 Factor = DAG.getBuildVector(VT, dl, Factors);
6378 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6379 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6380 "Expected matchUnaryPredicate to return one element for scalable "
6381 "vectors");
6382 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6383 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6384 } else {
6385 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6386 Shift = Shifts[0];
6387 Factor = Factors[0];
6388 }
6389
6390 SDValue Res = Op0;
6391 if (UseSRA) {
6392 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, SDNodeFlags::Exact);
6393 Created.push_back(Res.getNode());
6394 }
6395
6396 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6397}
6398
6399/// Given an exact UDIV by a constant, create a multiplication
6400/// with the multiplicative inverse of the constant.
6401/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6403 const SDLoc &dl, SelectionDAG &DAG,
6404 SmallVectorImpl<SDNode *> &Created) {
6405 EVT VT = N->getValueType(0);
6406 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6407 EVT ShSVT = ShVT.getScalarType();
6408
6409 bool UseSRL = false;
6410 SmallVector<SDValue, 16> Shifts, Factors;
6411
6412 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6413 if (C->isZero())
6414 return false;
6415
6416 EVT CT = C->getValueType(0);
6417 APInt Divisor = C->getAPIntValue();
6418 unsigned Shift = Divisor.countr_zero();
6419 if (Shift) {
6420 Divisor.lshrInPlace(Shift);
6421 UseSRL = true;
6422 }
6423 // Calculate the multiplicative inverse modulo BW.
6424 APInt Factor = Divisor.multiplicativeInverse();
6425 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6426 Factors.push_back(DAG.getConstant(Factor, dl, CT));
6427 return true;
6428 };
6429
6430 SDValue Op1 = N->getOperand(1);
6431
6432 // Collect all magic values from the build vector.
6433 if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern, /*AllowUndefs=*/false,
6434 /*AllowTruncation=*/true))
6435 return SDValue();
6436
6437 SDValue Shift, Factor;
6438 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6439 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6440 Factor = DAG.getBuildVector(VT, dl, Factors);
6441 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6442 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6443 "Expected matchUnaryPredicate to return one element for scalable "
6444 "vectors");
6445 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6446 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6447 } else {
6448 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6449 Shift = Shifts[0];
6450 Factor = Factors[0];
6451 }
6452
6453 SDValue Res = N->getOperand(0);
6454 if (UseSRL) {
6455 Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, SDNodeFlags::Exact);
6456 Created.push_back(Res.getNode());
6457 }
6458
6459 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6460}
6461
6463 SelectionDAG &DAG,
6464 SmallVectorImpl<SDNode *> &Created) const {
6465 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6466 if (isIntDivCheap(N->getValueType(0), Attr))
6467 return SDValue(N, 0); // Lower SDIV as SDIV
6468 return SDValue();
6469}
6470
6471SDValue
6473 SelectionDAG &DAG,
6474 SmallVectorImpl<SDNode *> &Created) const {
6475 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6476 if (isIntDivCheap(N->getValueType(0), Attr))
6477 return SDValue(N, 0); // Lower SREM as SREM
6478 return SDValue();
6479}
6480
6481/// Build sdiv by power-of-2 with conditional move instructions
6482/// Ref: "Hacker's Delight" by Henry Warren 10-1
6483/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6484/// bgez x, label
6485/// add x, x, 2**k-1
6486/// label:
6487/// sra res, x, k
6488/// neg res, res (when the divisor is negative)
6490 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6491 SmallVectorImpl<SDNode *> &Created) const {
6492 unsigned Lg2 = Divisor.countr_zero();
6493 EVT VT = N->getValueType(0);
6494
6495 SDLoc DL(N);
6496 SDValue N0 = N->getOperand(0);
6497 SDValue Zero = DAG.getConstant(0, DL, VT);
6498 APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6499 SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6500
6501 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6502 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6503 SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6504 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6505 SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6506
6507 Created.push_back(Cmp.getNode());
6508 Created.push_back(Add.getNode());
6509 Created.push_back(CMov.getNode());
6510
6511 // Divide by pow2.
6512 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, CMov,
6513 DAG.getShiftAmountConstant(Lg2, VT, DL));
6514
6515 // If we're dividing by a positive value, we're done. Otherwise, we must
6516 // negate the result.
6517 if (Divisor.isNonNegative())
6518 return SRA;
6519
6520 Created.push_back(SRA.getNode());
6521 return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6522}
6523
6524/// Given an ISD::SDIV node expressing a divide by constant,
6525/// return a DAG expression to select that will generate the same value by
6526/// multiplying by a magic number.
6527/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6529 bool IsAfterLegalization,
6530 bool IsAfterLegalTypes,
6531 SmallVectorImpl<SDNode *> &Created) const {
6532 SDLoc dl(N);
6533 EVT VT = N->getValueType(0);
6534 EVT SVT = VT.getScalarType();
6535 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6536 EVT ShSVT = ShVT.getScalarType();
6537 unsigned EltBits = VT.getScalarSizeInBits();
6538 EVT MulVT;
6539
6540 // Check to see if we can do this.
6541 // FIXME: We should be more aggressive here.
6542 if (!isTypeLegal(VT)) {
6543 // Limit this to simple scalars for now.
6544 if (VT.isVector() || !VT.isSimple())
6545 return SDValue();
6546
6547 // If this type will be promoted to a large enough type with a legal
6548 // multiply operation, we can go ahead and do this transform.
6550 return SDValue();
6551
6552 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6553 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6554 !isOperationLegal(ISD::MUL, MulVT))
6555 return SDValue();
6556 }
6557
6558 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6559 if (N->getFlags().hasExact())
6560 return BuildExactSDIV(*this, N, dl, DAG, Created);
6561
6562 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6563
6564 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6565 if (C->isZero())
6566 return false;
6567 // Truncate the divisor to the target scalar type in case it was promoted
6568 // during type legalization.
6569 APInt Divisor = C->getAPIntValue().trunc(EltBits);
6571 int NumeratorFactor = 0;
6572 int ShiftMask = -1;
6573
6574 if (Divisor.isOne() || Divisor.isAllOnes()) {
6575 // If d is +1/-1, we just multiply the numerator by +1/-1.
6576 NumeratorFactor = Divisor.getSExtValue();
6577 magics.Magic = 0;
6578 magics.ShiftAmount = 0;
6579 ShiftMask = 0;
6580 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6581 // If d > 0 and m < 0, add the numerator.
6582 NumeratorFactor = 1;
6583 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6584 // If d < 0 and m > 0, subtract the numerator.
6585 NumeratorFactor = -1;
6586 }
6587
6588 MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6589 Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6590 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6591 ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6592 return true;
6593 };
6594
6595 SDValue N0 = N->getOperand(0);
6596 SDValue N1 = N->getOperand(1);
6597
6598 // Collect the shifts / magic values from each element.
6599 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern, /*AllowUndefs=*/false,
6600 /*AllowTruncation=*/true))
6601 return SDValue();
6602
6603 SDValue MagicFactor, Factor, Shift, ShiftMask;
6604 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6605 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6606 Factor = DAG.getBuildVector(VT, dl, Factors);
6607 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6608 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6609 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6610 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6611 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6612 "Expected matchUnaryPredicate to return one element for scalable "
6613 "vectors");
6614 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6615 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6616 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6617 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6618 } else {
6619 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6620 MagicFactor = MagicFactors[0];
6621 Factor = Factors[0];
6622 Shift = Shifts[0];
6623 ShiftMask = ShiftMasks[0];
6624 }
6625
6626 // Multiply the numerator (operand 0) by the magic value.
6627 // FIXME: We should support doing a MUL in a wider type.
6628 auto GetMULHS = [&](SDValue X, SDValue Y) {
6629 // If the type isn't legal, use a wider mul of the type calculated
6630 // earlier.
6631 if (!isTypeLegal(VT)) {
6632 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6633 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6634 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6635 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6636 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6637 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6638 }
6639
6640 if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6641 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6642 if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6643 SDValue LoHi =
6644 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6645 return SDValue(LoHi.getNode(), 1);
6646 }
6647 // If type twice as wide legal, widen and use a mul plus a shift.
6648 unsigned Size = VT.getScalarSizeInBits();
6649 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6650 if (VT.isVector())
6651 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6653 // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6654 // custom lowered. This is very expensive so avoid it at all costs for
6655 // constant divisors.
6656 if ((!IsAfterLegalTypes && isOperationExpand(ISD::SDIV, VT) &&
6659 X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
6660 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6661 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6662 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6663 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6664 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6665 }
6666 return SDValue();
6667 };
6668
6669 SDValue Q = GetMULHS(N0, MagicFactor);
6670 if (!Q)
6671 return SDValue();
6672
6673 Created.push_back(Q.getNode());
6674
6675 // (Optionally) Add/subtract the numerator using Factor.
6676 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6677 Created.push_back(Factor.getNode());
6678 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6679 Created.push_back(Q.getNode());
6680
6681 // Shift right algebraic by shift value.
6682 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6683 Created.push_back(Q.getNode());
6684
6685 // Extract the sign bit, mask it and add it to the quotient.
6686 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6687 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6688 Created.push_back(T.getNode());
6689 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6690 Created.push_back(T.getNode());
6691 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6692}
6693
6694/// Given an ISD::UDIV node expressing a divide by constant,
6695/// return a DAG expression to select that will generate the same value by
6696/// multiplying by a magic number.
6697/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6699 bool IsAfterLegalization,
6700 bool IsAfterLegalTypes,
6701 SmallVectorImpl<SDNode *> &Created) const {
6702 SDLoc dl(N);
6703 EVT VT = N->getValueType(0);
6704 EVT SVT = VT.getScalarType();
6705 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6706 EVT ShSVT = ShVT.getScalarType();
6707 unsigned EltBits = VT.getScalarSizeInBits();
6708 EVT MulVT;
6709
6710 // Check to see if we can do this.
6711 // FIXME: We should be more aggressive here.
6712 if (!isTypeLegal(VT)) {
6713 // Limit this to simple scalars for now.
6714 if (VT.isVector() || !VT.isSimple())
6715 return SDValue();
6716
6717 // If this type will be promoted to a large enough type with a legal
6718 // multiply operation, we can go ahead and do this transform.
6720 return SDValue();
6721
6722 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6723 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6724 !isOperationLegal(ISD::MUL, MulVT))
6725 return SDValue();
6726 }
6727
6728 // If the udiv has an 'exact' bit we can use a simpler lowering.
6729 if (N->getFlags().hasExact())
6730 return BuildExactUDIV(*this, N, dl, DAG, Created);
6731
6732 SDValue N0 = N->getOperand(0);
6733 SDValue N1 = N->getOperand(1);
6734
6735 // Try to use leading zeros of the dividend to reduce the multiplier and
6736 // avoid expensive fixups.
6737 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6738
6739 // If we're after type legalization and SVT is not legal, use the
6740 // promoted type for creating constants to avoid creating nodes with
6741 // illegal types.
6742 if (IsAfterLegalTypes && VT.isVector()) {
6743 SVT = getTypeToTransformTo(*DAG.getContext(), SVT);
6744 if (SVT.bitsLT(VT.getScalarType()))
6745 return SDValue();
6746 ShSVT = getTypeToTransformTo(*DAG.getContext(), ShSVT);
6747 if (ShSVT.bitsLT(ShVT.getScalarType()))
6748 return SDValue();
6749 }
6750 const unsigned SVTBits = SVT.getSizeInBits();
6751
6752 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6753 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6754
6755 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6756 if (C->isZero())
6757 return false;
6758 // Truncate the divisor to the target scalar type in case it was promoted
6759 // during type legalization.
6760 APInt Divisor = C->getAPIntValue().trunc(EltBits);
6761
6762 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6763
6764 // Magic algorithm doesn't work for division by 1. We need to emit a select
6765 // at the end.
6766 if (Divisor.isOne()) {
6767 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6768 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6769 } else {
6772 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
6773
6774 MagicFactor = DAG.getConstant(magics.Magic.zext(SVTBits), dl, SVT);
6775
6776 assert(magics.PreShift < Divisor.getBitWidth() &&
6777 "We shouldn't generate an undefined shift!");
6778 assert(magics.PostShift < Divisor.getBitWidth() &&
6779 "We shouldn't generate an undefined shift!");
6780 assert((!magics.IsAdd || magics.PreShift == 0) &&
6781 "Unexpected pre-shift");
6782 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6783 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6784 NPQFactor = DAG.getConstant(
6785 magics.IsAdd ? APInt::getOneBitSet(SVTBits, EltBits - 1)
6786 : APInt::getZero(SVTBits),
6787 dl, SVT);
6788 UseNPQ |= magics.IsAdd;
6789 UsePreShift |= magics.PreShift != 0;
6790 UsePostShift |= magics.PostShift != 0;
6791 }
6792
6793 PreShifts.push_back(PreShift);
6794 MagicFactors.push_back(MagicFactor);
6795 NPQFactors.push_back(NPQFactor);
6796 PostShifts.push_back(PostShift);
6797 return true;
6798 };
6799
6800 // Collect the shifts/magic values from each element.
6801 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern, /*AllowUndefs=*/false,
6802 /*AllowTruncation=*/true))
6803 return SDValue();
6804
6805 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6806 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6807 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6808 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6809 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6810 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6811 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6812 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6813 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6814 "Expected matchUnaryPredicate to return one for scalable vectors");
6815 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6816 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6817 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6818 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6819 } else {
6820 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6821 PreShift = PreShifts[0];
6822 MagicFactor = MagicFactors[0];
6823 PostShift = PostShifts[0];
6824 }
6825
6826 SDValue Q = N0;
6827 if (UsePreShift) {
6828 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6829 Created.push_back(Q.getNode());
6830 }
6831
6832 // FIXME: We should support doing a MUL in a wider type.
6833 auto GetMULHU = [&](SDValue X, SDValue Y) {
6834 // If the type isn't legal, use a wider mul of the type calculated
6835 // earlier.
6836 if (!isTypeLegal(VT)) {
6837 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6838 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6839 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6840 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6841 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6842 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6843 }
6844
6845 if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6846 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6847 if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6848 SDValue LoHi =
6849 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6850 return SDValue(LoHi.getNode(), 1);
6851 }
6852 // If type twice as wide legal, widen and use a mul plus a shift.
6853 unsigned Size = VT.getScalarSizeInBits();
6854 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6855 if (VT.isVector())
6856 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6858 // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6859 // custom lowered. This is very expensive so avoid it at all costs for
6860 // constant divisors.
6861 if ((!IsAfterLegalTypes && isOperationExpand(ISD::UDIV, VT) &&
6864 X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6865 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6866 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6867 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6868 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6869 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6870 }
6871 return SDValue(); // No mulhu or equivalent
6872 };
6873
6874 // Multiply the numerator (operand 0) by the magic value.
6875 Q = GetMULHU(Q, MagicFactor);
6876 if (!Q)
6877 return SDValue();
6878
6879 Created.push_back(Q.getNode());
6880
6881 if (UseNPQ) {
6882 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6883 Created.push_back(NPQ.getNode());
6884
6885 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6886 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6887 if (VT.isVector())
6888 NPQ = GetMULHU(NPQ, NPQFactor);
6889 else
6890 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6891
6892 Created.push_back(NPQ.getNode());
6893
6894 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6895 Created.push_back(Q.getNode());
6896 }
6897
6898 if (UsePostShift) {
6899 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6900 Created.push_back(Q.getNode());
6901 }
6902
6903 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6904
6905 SDValue One = DAG.getConstant(1, dl, VT);
6906 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6907 return DAG.getSelect(dl, VT, IsOne, N0, Q);
6908}
6909
6910/// If all values in Values that *don't* match the predicate are same 'splat'
6911/// value, then replace all values with that splat value.
6912/// Else, if AlternativeReplacement was provided, then replace all values that
6913/// do match predicate with AlternativeReplacement value.
6914static void
6916 std::function<bool(SDValue)> Predicate,
6917 SDValue AlternativeReplacement = SDValue()) {
6918 SDValue Replacement;
6919 // Is there a value for which the Predicate does *NOT* match? What is it?
6920 auto SplatValue = llvm::find_if_not(Values, Predicate);
6921 if (SplatValue != Values.end()) {
6922 // Does Values consist only of SplatValue's and values matching Predicate?
6923 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6924 return Value == *SplatValue || Predicate(Value);
6925 })) // Then we shall replace values matching predicate with SplatValue.
6926 Replacement = *SplatValue;
6927 }
6928 if (!Replacement) {
6929 // Oops, we did not find the "baseline" splat value.
6930 if (!AlternativeReplacement)
6931 return; // Nothing to do.
6932 // Let's replace with provided value then.
6933 Replacement = AlternativeReplacement;
6934 }
6935 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6936}
6937
6938/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6939/// where the divisor is constant and the comparison target is zero,
6940/// return a DAG expression that will generate the same comparison result
6941/// using only multiplications, additions and shifts/rotations.
6942/// Ref: "Hacker's Delight" 10-17.
6943SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6944 SDValue CompTargetNode,
6946 DAGCombinerInfo &DCI,
6947 const SDLoc &DL) const {
6949 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6950 DCI, DL, Built)) {
6951 for (SDNode *N : Built)
6952 DCI.AddToWorklist(N);
6953 return Folded;
6954 }
6955
6956 return SDValue();
6957}
6958
6959SDValue
6960TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6961 SDValue CompTargetNode, ISD::CondCode Cond,
6962 DAGCombinerInfo &DCI, const SDLoc &DL,
6963 SmallVectorImpl<SDNode *> &Created) const {
6964 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6965 // - D must be constant, with D = D0 * 2^K where D0 is odd
6966 // - P is the multiplicative inverse of D0 modulo 2^W
6967 // - Q = floor(((2^W) - 1) / D)
6968 // where W is the width of the common type of N and D.
6969 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6970 "Only applicable for (in)equality comparisons.");
6971
6972 SelectionDAG &DAG = DCI.DAG;
6973
6974 EVT VT = REMNode.getValueType();
6975 EVT SVT = VT.getScalarType();
6976 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6977 EVT ShSVT = ShVT.getScalarType();
6978
6979 // If MUL is unavailable, we cannot proceed in any case.
6980 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6981 return SDValue();
6982
6983 bool ComparingWithAllZeros = true;
6984 bool AllComparisonsWithNonZerosAreTautological = true;
6985 bool HadTautologicalLanes = false;
6986 bool AllLanesAreTautological = true;
6987 bool HadEvenDivisor = false;
6988 bool AllDivisorsArePowerOfTwo = true;
6989 bool HadTautologicalInvertedLanes = false;
6990 SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
6991
6992 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6993 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6994 if (CDiv->isZero())
6995 return false;
6996
6997 const APInt &D = CDiv->getAPIntValue();
6998 const APInt &Cmp = CCmp->getAPIntValue();
6999
7000 ComparingWithAllZeros &= Cmp.isZero();
7001
7002 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7003 // if C2 is not less than C1, the comparison is always false.
7004 // But we will only be able to produce the comparison that will give the
7005 // opposive tautological answer. So this lane would need to be fixed up.
7006 bool TautologicalInvertedLane = D.ule(Cmp);
7007 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
7008
7009 // If all lanes are tautological (either all divisors are ones, or divisor
7010 // is not greater than the constant we are comparing with),
7011 // we will prefer to avoid the fold.
7012 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
7013 HadTautologicalLanes |= TautologicalLane;
7014 AllLanesAreTautological &= TautologicalLane;
7015
7016 // If we are comparing with non-zero, we need'll need to subtract said
7017 // comparison value from the LHS. But there is no point in doing that if
7018 // every lane where we are comparing with non-zero is tautological..
7019 if (!Cmp.isZero())
7020 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
7021
7022 // Decompose D into D0 * 2^K
7023 unsigned K = D.countr_zero();
7024 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7025 APInt D0 = D.lshr(K);
7026
7027 // D is even if it has trailing zeros.
7028 HadEvenDivisor |= (K != 0);
7029 // D is a power-of-two if D0 is one.
7030 // If all divisors are power-of-two, we will prefer to avoid the fold.
7031 AllDivisorsArePowerOfTwo &= D0.isOne();
7032
7033 // P = inv(D0, 2^W)
7034 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7035 unsigned W = D.getBitWidth();
7036 APInt P = D0.multiplicativeInverse();
7037 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7038
7039 // Q = floor((2^W - 1) u/ D)
7040 // R = ((2^W - 1) u% D)
7041 APInt Q, R;
7043
7044 // If we are comparing with zero, then that comparison constant is okay,
7045 // else it may need to be one less than that.
7046 if (Cmp.ugt(R))
7047 Q -= 1;
7048
7050 "We are expecting that K is always less than all-ones for ShSVT");
7051
7052 // If the lane is tautological the result can be constant-folded.
7053 if (TautologicalLane) {
7054 // Set P and K amount to a bogus values so we can try to splat them.
7055 P = 0;
7056 K = -1;
7057 // And ensure that comparison constant is tautological,
7058 // it will always compare true/false.
7059 Q = -1;
7060 }
7061
7062 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7063 KAmts.push_back(
7064 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7065 /*implicitTrunc=*/true),
7066 DL, ShSVT));
7067 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7068 return true;
7069 };
7070
7071 SDValue N = REMNode.getOperand(0);
7072 SDValue D = REMNode.getOperand(1);
7073
7074 // Collect the values from each element.
7075 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
7076 return SDValue();
7077
7078 // If all lanes are tautological, the result can be constant-folded.
7079 if (AllLanesAreTautological)
7080 return SDValue();
7081
7082 // If this is a urem by a powers-of-two, avoid the fold since it can be
7083 // best implemented as a bit test.
7084 if (AllDivisorsArePowerOfTwo)
7085 return SDValue();
7086
7087 SDValue PVal, KVal, QVal;
7088 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7089 if (HadTautologicalLanes) {
7090 // Try to turn PAmts into a splat, since we don't care about the values
7091 // that are currently '0'. If we can't, just keep '0'`s.
7093 // Try to turn KAmts into a splat, since we don't care about the values
7094 // that are currently '-1'. If we can't, change them to '0'`s.
7096 DAG.getConstant(0, DL, ShSVT));
7097 }
7098
7099 PVal = DAG.getBuildVector(VT, DL, PAmts);
7100 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7101 QVal = DAG.getBuildVector(VT, DL, QAmts);
7102 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7103 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
7104 "Expected matchBinaryPredicate to return one element for "
7105 "SPLAT_VECTORs");
7106 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7107 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7108 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7109 } else {
7110 PVal = PAmts[0];
7111 KVal = KAmts[0];
7112 QVal = QAmts[0];
7113 }
7114
7115 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
7116 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
7117 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
7118 assert(CompTargetNode.getValueType() == N.getValueType() &&
7119 "Expecting that the types on LHS and RHS of comparisons match.");
7120 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
7121 }
7122
7123 // (mul N, P)
7124 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7125 Created.push_back(Op0.getNode());
7126
7127 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7128 // divisors as a performance improvement, since rotating by 0 is a no-op.
7129 if (HadEvenDivisor) {
7130 // We need ROTR to do this.
7131 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7132 return SDValue();
7133 // UREM: (rotr (mul N, P), K)
7134 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7135 Created.push_back(Op0.getNode());
7136 }
7137
7138 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
7139 SDValue NewCC =
7140 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7142 if (!HadTautologicalInvertedLanes)
7143 return NewCC;
7144
7145 // If any lanes previously compared always-false, the NewCC will give
7146 // always-true result for them, so we need to fixup those lanes.
7147 // Or the other way around for inequality predicate.
7148 assert(VT.isVector() && "Can/should only get here for vectors.");
7149 Created.push_back(NewCC.getNode());
7150
7151 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7152 // if C2 is not less than C1, the comparison is always false.
7153 // But we have produced the comparison that will give the
7154 // opposive tautological answer. So these lanes would need to be fixed up.
7155 SDValue TautologicalInvertedChannels =
7156 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
7157 Created.push_back(TautologicalInvertedChannels.getNode());
7158
7159 // NOTE: we avoid letting illegal types through even if we're before legalize
7160 // ops – legalization has a hard time producing good code for this.
7161 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
7162 // If we have a vector select, let's replace the comparison results in the
7163 // affected lanes with the correct tautological result.
7164 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
7165 DL, SETCCVT, SETCCVT);
7166 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
7167 Replacement, NewCC);
7168 }
7169
7170 // Else, we can just invert the comparison result in the appropriate lanes.
7171 //
7172 // NOTE: see the note above VSELECT above.
7173 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
7174 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
7175 TautologicalInvertedChannels);
7176
7177 return SDValue(); // Don't know how to lower.
7178}
7179
7180/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
7181/// where the divisor is constant and the comparison target is zero,
7182/// return a DAG expression that will generate the same comparison result
7183/// using only multiplications, additions and shifts/rotations.
7184/// Ref: "Hacker's Delight" 10-17.
7185SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
7186 SDValue CompTargetNode,
7188 DAGCombinerInfo &DCI,
7189 const SDLoc &DL) const {
7191 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7192 DCI, DL, Built)) {
7193 assert(Built.size() <= 7 && "Max size prediction failed.");
7194 for (SDNode *N : Built)
7195 DCI.AddToWorklist(N);
7196 return Folded;
7197 }
7198
7199 return SDValue();
7200}
7201
7202SDValue
7203TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
7204 SDValue CompTargetNode, ISD::CondCode Cond,
7205 DAGCombinerInfo &DCI, const SDLoc &DL,
7206 SmallVectorImpl<SDNode *> &Created) const {
7207 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
7208 // Fold:
7209 // (seteq/ne (srem N, D), 0)
7210 // To:
7211 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
7212 //
7213 // - D must be constant, with D = D0 * 2^K where D0 is odd
7214 // - P is the multiplicative inverse of D0 modulo 2^W
7215 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7216 // - Q = floor((2 * A) / (2^K))
7217 // where W is the width of the common type of N and D.
7218 //
7219 // When D is a power of two (and thus D0 is 1), the normal
7220 // formula for A and Q don't apply, because the derivation
7221 // depends on D not dividing 2^(W-1), and thus theorem ZRS
7222 // does not apply. This specifically fails when N = INT_MIN.
7223 //
7224 // Instead, for power-of-two D, we use:
7225 // - A = 2^(W-1)
7226 // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
7227 // - Q = 2^(W-K) - 1
7228 // |-> Test that the top K bits are zero after rotation
7229 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7230 "Only applicable for (in)equality comparisons.");
7231
7232 SelectionDAG &DAG = DCI.DAG;
7233
7234 EVT VT = REMNode.getValueType();
7235 EVT SVT = VT.getScalarType();
7236 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7237 EVT ShSVT = ShVT.getScalarType();
7238
7239 // If we are after ops legalization, and MUL is unavailable, we can not
7240 // proceed.
7241 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7242 return SDValue();
7243
7244 // TODO: Could support comparing with non-zero too.
7245 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
7246 if (!CompTarget || !CompTarget->isZero())
7247 return SDValue();
7248
7249 bool HadIntMinDivisor = false;
7250 bool HadOneDivisor = false;
7251 bool AllDivisorsAreOnes = true;
7252 bool HadEvenDivisor = false;
7253 bool NeedToApplyOffset = false;
7254 bool AllDivisorsArePowerOfTwo = true;
7255 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7256
7257 auto BuildSREMPattern = [&](ConstantSDNode *C) {
7258 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7259 if (C->isZero())
7260 return false;
7261
7262 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7263
7264 // WARNING: this fold is only valid for positive divisors!
7265 APInt D = C->getAPIntValue();
7266 if (D.isNegative())
7267 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
7268
7269 HadIntMinDivisor |= D.isMinSignedValue();
7270
7271 // If all divisors are ones, we will prefer to avoid the fold.
7272 HadOneDivisor |= D.isOne();
7273 AllDivisorsAreOnes &= D.isOne();
7274
7275 // Decompose D into D0 * 2^K
7276 unsigned K = D.countr_zero();
7277 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7278 APInt D0 = D.lshr(K);
7279
7280 if (!D.isMinSignedValue()) {
7281 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
7282 // we don't care about this lane in this fold, we'll special-handle it.
7283 HadEvenDivisor |= (K != 0);
7284 }
7285
7286 // D is a power-of-two if D0 is one. This includes INT_MIN.
7287 // If all divisors are power-of-two, we will prefer to avoid the fold.
7288 AllDivisorsArePowerOfTwo &= D0.isOne();
7289
7290 // P = inv(D0, 2^W)
7291 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7292 unsigned W = D.getBitWidth();
7293 APInt P = D0.multiplicativeInverse();
7294 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7295
7296 // A = floor((2^(W - 1) - 1) / D0) & -2^K
7297 APInt A = APInt::getSignedMaxValue(W).udiv(D0);
7298 A.clearLowBits(K);
7299
7300 if (!D.isMinSignedValue()) {
7301 // If divisor INT_MIN, then we don't care about this lane in this fold,
7302 // we'll special-handle it.
7303 NeedToApplyOffset |= A != 0;
7304 }
7305
7306 // Q = floor((2 * A) / (2^K))
7307 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7308
7310 "We are expecting that A is always less than all-ones for SVT");
7312 "We are expecting that K is always less than all-ones for ShSVT");
7313
7314 // If D was a power of two, apply the alternate constant derivation.
7315 if (D0.isOne()) {
7316 // A = 2^(W-1)
7318 // - Q = 2^(W-K) - 1
7319 Q = APInt::getAllOnes(W - K).zext(W);
7320 }
7321
7322 // If the divisor is 1 the result can be constant-folded. Likewise, we
7323 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
7324 if (D.isOne()) {
7325 // Set P, A and K to a bogus values so we can try to splat them.
7326 P = 0;
7327 A = -1;
7328 K = -1;
7329
7330 // x ?% 1 == 0 <--> true <--> x u<= -1
7331 Q = -1;
7332 }
7333
7334 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7335 AAmts.push_back(DAG.getConstant(A, DL, SVT));
7336 KAmts.push_back(
7337 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7338 /*implicitTrunc=*/true),
7339 DL, ShSVT));
7340 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7341 return true;
7342 };
7343
7344 SDValue N = REMNode.getOperand(0);
7345 SDValue D = REMNode.getOperand(1);
7346
7347 // Collect the values from each element.
7348 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7349 return SDValue();
7350
7351 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7352 if (AllDivisorsAreOnes)
7353 return SDValue();
7354
7355 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7356 // since it can be best implemented as a bit test.
7357 if (AllDivisorsArePowerOfTwo)
7358 return SDValue();
7359
7360 SDValue PVal, AVal, KVal, QVal;
7361 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7362 if (HadOneDivisor) {
7363 // Try to turn PAmts into a splat, since we don't care about the values
7364 // that are currently '0'. If we can't, just keep '0'`s.
7366 // Try to turn AAmts into a splat, since we don't care about the
7367 // values that are currently '-1'. If we can't, change them to '0'`s.
7369 DAG.getConstant(0, DL, SVT));
7370 // Try to turn KAmts into a splat, since we don't care about the values
7371 // that are currently '-1'. If we can't, change them to '0'`s.
7373 DAG.getConstant(0, DL, ShSVT));
7374 }
7375
7376 PVal = DAG.getBuildVector(VT, DL, PAmts);
7377 AVal = DAG.getBuildVector(VT, DL, AAmts);
7378 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7379 QVal = DAG.getBuildVector(VT, DL, QAmts);
7380 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7381 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7382 QAmts.size() == 1 &&
7383 "Expected matchUnaryPredicate to return one element for scalable "
7384 "vectors");
7385 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7386 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7387 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7388 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7389 } else {
7390 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7391 PVal = PAmts[0];
7392 AVal = AAmts[0];
7393 KVal = KAmts[0];
7394 QVal = QAmts[0];
7395 }
7396
7397 // (mul N, P)
7398 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7399 Created.push_back(Op0.getNode());
7400
7401 if (NeedToApplyOffset) {
7402 // We need ADD to do this.
7403 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7404 return SDValue();
7405
7406 // (add (mul N, P), A)
7407 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7408 Created.push_back(Op0.getNode());
7409 }
7410
7411 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7412 // divisors as a performance improvement, since rotating by 0 is a no-op.
7413 if (HadEvenDivisor) {
7414 // We need ROTR to do this.
7415 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7416 return SDValue();
7417 // SREM: (rotr (add (mul N, P), A), K)
7418 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7419 Created.push_back(Op0.getNode());
7420 }
7421
7422 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7423 SDValue Fold =
7424 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7426
7427 // If we didn't have lanes with INT_MIN divisor, then we're done.
7428 if (!HadIntMinDivisor)
7429 return Fold;
7430
7431 // That fold is only valid for positive divisors. Which effectively means,
7432 // it is invalid for INT_MIN divisors. So if we have such a lane,
7433 // we must fix-up results for said lanes.
7434 assert(VT.isVector() && "Can/should only get here for vectors.");
7435
7436 // NOTE: we avoid letting illegal types through even if we're before legalize
7437 // ops – legalization has a hard time producing good code for the code that
7438 // follows.
7439 if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7443 return SDValue();
7444
7445 Created.push_back(Fold.getNode());
7446
7447 SDValue IntMin = DAG.getConstant(
7449 SDValue IntMax = DAG.getConstant(
7451 SDValue Zero =
7453
7454 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7455 SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7456 Created.push_back(DivisorIsIntMin.getNode());
7457
7458 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7459 SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7460 Created.push_back(Masked.getNode());
7461 SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7462 Created.push_back(MaskedIsZero.getNode());
7463
7464 // To produce final result we need to blend 2 vectors: 'SetCC' and
7465 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7466 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7467 // constant-folded, select can get lowered to a shuffle with constant mask.
7468 SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7469 MaskedIsZero, Fold);
7470
7471 return Blended;
7472}
7473
7475 const DenormalMode &Mode) const {
7476 SDLoc DL(Op);
7477 EVT VT = Op.getValueType();
7478 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7479 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7480
7481 // This is specifically a check for the handling of denormal inputs, not the
7482 // result.
7483 if (Mode.Input == DenormalMode::PreserveSign ||
7484 Mode.Input == DenormalMode::PositiveZero) {
7485 // Test = X == 0.0
7486 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7487 }
7488
7489 // Testing it with denormal inputs to avoid wrong estimate.
7490 //
7491 // Test = fabs(X) < SmallestNormal
7492 const fltSemantics &FltSem = VT.getFltSemantics();
7493 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7494 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7495 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
7496 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
7497}
7498
7500 bool LegalOps, bool OptForSize,
7502 unsigned Depth) const {
7503 // fneg is removable even if it has multiple uses.
7504 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7506 return Op.getOperand(0);
7507 }
7508
7509 // Don't recurse exponentially.
7511 return SDValue();
7512
7513 // Pre-increment recursion depth for use in recursive calls.
7514 ++Depth;
7515 const SDNodeFlags Flags = Op->getFlags();
7516 EVT VT = Op.getValueType();
7517 unsigned Opcode = Op.getOpcode();
7518
7519 // Don't allow anything with multiple uses unless we know it is free.
7520 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7521 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7522 isFPExtFree(VT, Op.getOperand(0).getValueType());
7523 if (!IsFreeExtend)
7524 return SDValue();
7525 }
7526
7527 auto RemoveDeadNode = [&](SDValue N) {
7528 if (N && N.getNode()->use_empty())
7529 DAG.RemoveDeadNode(N.getNode());
7530 };
7531
7532 SDLoc DL(Op);
7533
7534 // Because getNegatedExpression can delete nodes we need a handle to keep
7535 // temporary nodes alive in case the recursion manages to create an identical
7536 // node.
7537 std::list<HandleSDNode> Handles;
7538
7539 switch (Opcode) {
7540 case ISD::ConstantFP: {
7541 // Don't invert constant FP values after legalization unless the target says
7542 // the negated constant is legal.
7543 bool IsOpLegal =
7545 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7546 OptForSize);
7547
7548 if (LegalOps && !IsOpLegal)
7549 break;
7550
7551 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7552 V.changeSign();
7553 SDValue CFP = DAG.getConstantFP(V, DL, VT);
7554
7555 // If we already have the use of the negated floating constant, it is free
7556 // to negate it even it has multiple uses.
7557 if (!Op.hasOneUse() && CFP.use_empty())
7558 break;
7560 return CFP;
7561 }
7562 case ISD::BUILD_VECTOR: {
7563 // Only permit BUILD_VECTOR of constants.
7564 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7565 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7566 }))
7567 break;
7568
7569 bool IsOpLegal =
7572 llvm::all_of(Op->op_values(), [&](SDValue N) {
7573 return N.isUndef() ||
7574 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7575 OptForSize);
7576 });
7577
7578 if (LegalOps && !IsOpLegal)
7579 break;
7580
7582 for (SDValue C : Op->op_values()) {
7583 if (C.isUndef()) {
7584 Ops.push_back(C);
7585 continue;
7586 }
7587 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7588 V.changeSign();
7589 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7590 }
7592 return DAG.getBuildVector(VT, DL, Ops);
7593 }
7594 case ISD::FADD: {
7595 if (!Flags.hasNoSignedZeros())
7596 break;
7597
7598 // After operation legalization, it might not be legal to create new FSUBs.
7599 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7600 break;
7601 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7602
7603 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7605 SDValue NegX =
7606 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7607 // Prevent this node from being deleted by the next call.
7608 if (NegX)
7609 Handles.emplace_back(NegX);
7610
7611 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7613 SDValue NegY =
7614 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7615
7616 // We're done with the handles.
7617 Handles.clear();
7618
7619 // Negate the X if its cost is less or equal than Y.
7620 if (NegX && (CostX <= CostY)) {
7621 Cost = CostX;
7622 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7623 if (NegY != N)
7624 RemoveDeadNode(NegY);
7625 return N;
7626 }
7627
7628 // Negate the Y if it is not expensive.
7629 if (NegY) {
7630 Cost = CostY;
7631 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7632 if (NegX != N)
7633 RemoveDeadNode(NegX);
7634 return N;
7635 }
7636 break;
7637 }
7638 case ISD::FSUB: {
7639 // We can't turn -(A-B) into B-A when we honor signed zeros.
7640 if (!Flags.hasNoSignedZeros())
7641 break;
7642
7643 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7644 // fold (fneg (fsub 0, Y)) -> Y
7645 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7646 if (C->isZero()) {
7648 return Y;
7649 }
7650
7651 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7653 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7654 }
7655 case ISD::FMUL:
7656 case ISD::FDIV: {
7657 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7658
7659 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7661 SDValue NegX =
7662 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7663 // Prevent this node from being deleted by the next call.
7664 if (NegX)
7665 Handles.emplace_back(NegX);
7666
7667 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7669 SDValue NegY =
7670 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7671
7672 // We're done with the handles.
7673 Handles.clear();
7674
7675 // Negate the X if its cost is less or equal than Y.
7676 if (NegX && (CostX <= CostY)) {
7677 Cost = CostX;
7678 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7679 if (NegY != N)
7680 RemoveDeadNode(NegY);
7681 return N;
7682 }
7683
7684 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7685 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7686 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7687 break;
7688
7689 // Negate the Y if it is not expensive.
7690 if (NegY) {
7691 Cost = CostY;
7692 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7693 if (NegX != N)
7694 RemoveDeadNode(NegX);
7695 return N;
7696 }
7697 break;
7698 }
7699 case ISD::FMA:
7700 case ISD::FMULADD:
7701 case ISD::FMAD: {
7702 if (!Flags.hasNoSignedZeros())
7703 break;
7704
7705 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7707 SDValue NegZ =
7708 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7709 // Give up if fail to negate the Z.
7710 if (!NegZ)
7711 break;
7712
7713 // Prevent this node from being deleted by the next two calls.
7714 Handles.emplace_back(NegZ);
7715
7716 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7718 SDValue NegX =
7719 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7720 // Prevent this node from being deleted by the next call.
7721 if (NegX)
7722 Handles.emplace_back(NegX);
7723
7724 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7726 SDValue NegY =
7727 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7728
7729 // We're done with the handles.
7730 Handles.clear();
7731
7732 // Negate the X if its cost is less or equal than Y.
7733 if (NegX && (CostX <= CostY)) {
7734 Cost = std::min(CostX, CostZ);
7735 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7736 if (NegY != N)
7737 RemoveDeadNode(NegY);
7738 return N;
7739 }
7740
7741 // Negate the Y if it is not expensive.
7742 if (NegY) {
7743 Cost = std::min(CostY, CostZ);
7744 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7745 if (NegX != N)
7746 RemoveDeadNode(NegX);
7747 return N;
7748 }
7749 break;
7750 }
7751
7752 case ISD::FP_EXTEND:
7753 case ISD::FSIN:
7754 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7755 OptForSize, Cost, Depth))
7756 return DAG.getNode(Opcode, DL, VT, NegV);
7757 break;
7758 case ISD::FP_ROUND:
7759 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7760 OptForSize, Cost, Depth))
7761 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7762 break;
7763 case ISD::SELECT:
7764 case ISD::VSELECT: {
7765 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7766 // iff at least one cost is cheaper and the other is neutral/cheaper
7767 SDValue LHS = Op.getOperand(1);
7769 SDValue NegLHS =
7770 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7771 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7772 RemoveDeadNode(NegLHS);
7773 break;
7774 }
7775
7776 // Prevent this node from being deleted by the next call.
7777 Handles.emplace_back(NegLHS);
7778
7779 SDValue RHS = Op.getOperand(2);
7781 SDValue NegRHS =
7782 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7783
7784 // We're done with the handles.
7785 Handles.clear();
7786
7787 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7788 (CostLHS != NegatibleCost::Cheaper &&
7789 CostRHS != NegatibleCost::Cheaper)) {
7790 RemoveDeadNode(NegLHS);
7791 RemoveDeadNode(NegRHS);
7792 break;
7793 }
7794
7795 Cost = std::min(CostLHS, CostRHS);
7796 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7797 }
7798 }
7799
7800 return SDValue();
7801}
7802
7803//===----------------------------------------------------------------------===//
7804// Legalization Utilities
7805//===----------------------------------------------------------------------===//
7806
7807bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7808 SDValue LHS, SDValue RHS,
7810 EVT HiLoVT, SelectionDAG &DAG,
7811 MulExpansionKind Kind, SDValue LL,
7812 SDValue LH, SDValue RL, SDValue RH) const {
7813 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7814 Opcode == ISD::SMUL_LOHI);
7815
7816 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7818 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7820 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7822 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7824
7825 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7826 return false;
7827
7828 unsigned OuterBitSize = VT.getScalarSizeInBits();
7829 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7830
7831 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7832 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7833 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7834
7835 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7836 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7837 bool Signed) -> bool {
7838 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7839 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7840 Hi = SDValue(Lo.getNode(), 1);
7841 return true;
7842 }
7843 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7844 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7845 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7846 return true;
7847 }
7848 return false;
7849 };
7850
7851 SDValue Lo, Hi;
7852
7853 if (!LL.getNode() && !RL.getNode() &&
7855 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7856 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7857 }
7858
7859 if (!LL.getNode())
7860 return false;
7861
7862 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7863 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7864 DAG.MaskedValueIsZero(RHS, HighMask)) {
7865 // The inputs are both zero-extended.
7866 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7867 Result.push_back(Lo);
7868 Result.push_back(Hi);
7869 if (Opcode != ISD::MUL) {
7870 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7871 Result.push_back(Zero);
7872 Result.push_back(Zero);
7873 }
7874 return true;
7875 }
7876 }
7877
7878 if (!VT.isVector() && Opcode == ISD::MUL &&
7879 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7880 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7881 // The input values are both sign-extended.
7882 // TODO non-MUL case?
7883 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7884 Result.push_back(Lo);
7885 Result.push_back(Hi);
7886 return true;
7887 }
7888 }
7889
7890 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7891 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7892
7893 if (!LH.getNode() && !RH.getNode() &&
7896 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7897 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7898 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7899 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7900 }
7901
7902 if (!LH.getNode())
7903 return false;
7904
7905 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7906 return false;
7907
7908 Result.push_back(Lo);
7909
7910 if (Opcode == ISD::MUL) {
7911 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7912 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7913 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7914 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7915 Result.push_back(Hi);
7916 return true;
7917 }
7918
7919 // Compute the full width result.
7920 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7921 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7922 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7923 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7924 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7925 };
7926
7927 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7928 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7929 return false;
7930
7931 // This is effectively the add part of a multiply-add of half-sized operands,
7932 // so it cannot overflow.
7933 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7934
7935 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7936 return false;
7937
7938 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7939 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7940
7941 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7943 if (UseGlue)
7944 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7945 Merge(Lo, Hi));
7946 else
7947 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7948 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7949
7950 SDValue Carry = Next.getValue(1);
7951 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7952 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7953
7954 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7955 return false;
7956
7957 if (UseGlue)
7958 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7959 Carry);
7960 else
7961 Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
7962 Zero, Carry);
7963
7964 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7965
7966 if (Opcode == ISD::SMUL_LOHI) {
7967 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7968 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7969 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
7970
7971 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7972 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
7973 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
7974 }
7975
7976 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7977 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7978 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7979 return true;
7980}
7981
7983 SelectionDAG &DAG, MulExpansionKind Kind,
7984 SDValue LL, SDValue LH, SDValue RL,
7985 SDValue RH) const {
7987 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
7988 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
7989 DAG, Kind, LL, LH, RL, RH);
7990 if (Ok) {
7991 assert(Result.size() == 2);
7992 Lo = Result[0];
7993 Hi = Result[1];
7994 }
7995 return Ok;
7996}
7997
7998// Optimize unsigned division or remainder by constants for types twice as large
7999// as a legal VT.
8000//
8001// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
8002// can be computed
8003// as:
8004// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
8005// Remainder = Sum % Constant
8006// This is based on "Remainder by Summing Digits" from Hacker's Delight.
8007//
8008// For division, we can compute the remainder using the algorithm described
8009// above, subtract it from the dividend to get an exact multiple of Constant.
8010// Then multiply that exact multiply by the multiplicative inverse modulo
8011// (1 << (BitWidth / 2)) to get the quotient.
8012
8013// If Constant is even, we can shift right the dividend and the divisor by the
8014// number of trailing zeros in Constant before applying the remainder algorithm.
8015// If we're after the quotient, we can subtract this value from the shifted
8016// dividend and multiply by the multiplicative inverse of the shifted divisor.
8017// If we want the remainder, we shift the value left by the number of trailing
8018// zeros and add the bits that were shifted out of the dividend.
8021 EVT HiLoVT, SelectionDAG &DAG,
8022 SDValue LL, SDValue LH) const {
8023 unsigned Opcode = N->getOpcode();
8024 EVT VT = N->getValueType(0);
8025
8026 // TODO: Support signed division/remainder.
8027 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
8028 return false;
8029 assert(
8030 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
8031 "Unexpected opcode");
8032
8033 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
8034 if (!CN)
8035 return false;
8036
8037 APInt Divisor = CN->getAPIntValue();
8038 unsigned BitWidth = Divisor.getBitWidth();
8039 unsigned HBitWidth = BitWidth / 2;
8041 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
8042
8043 // Divisor needs to less than (1 << HBitWidth).
8044 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
8045 if (Divisor.uge(HalfMaxPlus1))
8046 return false;
8047
8048 // We depend on the UREM by constant optimization in DAGCombiner that requires
8049 // high multiply.
8050 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
8052 return false;
8053
8054 // Don't expand if optimizing for size.
8055 if (DAG.shouldOptForSize())
8056 return false;
8057
8058 // Early out for 0 or 1 divisors.
8059 if (Divisor.ule(1))
8060 return false;
8061
8062 // If the divisor is even, shift it until it becomes odd.
8063 unsigned TrailingZeros = 0;
8064 if (!Divisor[0]) {
8065 TrailingZeros = Divisor.countr_zero();
8066 Divisor.lshrInPlace(TrailingZeros);
8067 }
8068
8069 SDLoc dl(N);
8070 SDValue Sum;
8071 SDValue PartialRem;
8072
8073 // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
8074 // then add in the carry.
8075 // TODO: If we can't split it in half, we might be able to split into 3 or
8076 // more pieces using a smaller bit width.
8077 if (HalfMaxPlus1.urem(Divisor).isOne()) {
8078 assert(!LL == !LH && "Expected both input halves or no input halves!");
8079 if (!LL)
8080 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
8081
8082 // Shift the input by the number of TrailingZeros in the divisor. The
8083 // shifted out bits will be added to the remainder later.
8084 if (TrailingZeros) {
8085 // Save the shifted off bits if we need the remainder.
8086 if (Opcode != ISD::UDIV) {
8087 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
8088 PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
8089 DAG.getConstant(Mask, dl, HiLoVT));
8090 }
8091
8092 LL = DAG.getNode(
8093 ISD::OR, dl, HiLoVT,
8094 DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
8095 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
8096 DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
8097 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
8098 HiLoVT, dl)));
8099 LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
8100 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8101 }
8102
8103 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
8104 EVT SetCCType =
8105 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
8107 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
8108 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
8109 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
8110 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
8111 } else {
8112 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
8113 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
8114 // If the boolean for the target is 0 or 1, we can add the setcc result
8115 // directly.
8116 if (getBooleanContents(HiLoVT) ==
8118 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
8119 else
8120 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
8121 DAG.getConstant(0, dl, HiLoVT));
8122 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
8123 }
8124 }
8125
8126 // If we didn't find a sum, we can't do the expansion.
8127 if (!Sum)
8128 return false;
8129
8130 // Perform a HiLoVT urem on the Sum using truncated divisor.
8131 SDValue RemL =
8132 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
8133 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
8134 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
8135
8136 if (Opcode != ISD::UREM) {
8137 // Subtract the remainder from the shifted dividend.
8138 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
8139 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
8140
8141 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
8142
8143 // Multiply by the multiplicative inverse of the divisor modulo
8144 // (1 << BitWidth).
8145 APInt MulFactor = Divisor.multiplicativeInverse();
8146
8147 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
8148 DAG.getConstant(MulFactor, dl, VT));
8149
8150 // Split the quotient into low and high parts.
8151 SDValue QuotL, QuotH;
8152 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
8153 Result.push_back(QuotL);
8154 Result.push_back(QuotH);
8155 }
8156
8157 if (Opcode != ISD::UDIV) {
8158 // If we shifted the input, shift the remainder left and add the bits we
8159 // shifted off the input.
8160 if (TrailingZeros) {
8161 RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
8162 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8163 RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
8164 }
8165 Result.push_back(RemL);
8166 Result.push_back(DAG.getConstant(0, dl, HiLoVT));
8167 }
8168
8169 return true;
8170}
8171
8172// Check that (every element of) Z is undef or not an exact multiple of BW.
8173static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
8175 Z,
8176 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
8177 /*AllowUndefs=*/true, /*AllowTruncation=*/true);
8178}
8179
8181 EVT VT = Node->getValueType(0);
8182 SDValue ShX, ShY;
8183 SDValue ShAmt, InvShAmt;
8184 SDValue X = Node->getOperand(0);
8185 SDValue Y = Node->getOperand(1);
8186 SDValue Z = Node->getOperand(2);
8187 SDValue Mask = Node->getOperand(3);
8188 SDValue VL = Node->getOperand(4);
8189
8190 unsigned BW = VT.getScalarSizeInBits();
8191 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
8192 SDLoc DL(SDValue(Node, 0));
8193
8194 EVT ShVT = Z.getValueType();
8195 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8196 // fshl: X << C | Y >> (BW - C)
8197 // fshr: X << (BW - C) | Y >> C
8198 // where C = Z % BW is not zero
8199 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8200 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8201 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
8202 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
8203 VL);
8204 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
8205 VL);
8206 } else {
8207 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8208 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8209 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
8210 if (isPowerOf2_32(BW)) {
8211 // Z % BW -> Z & (BW - 1)
8212 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
8213 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8214 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
8215 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
8216 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
8217 } else {
8218 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8219 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8220 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
8221 }
8222
8223 SDValue One = DAG.getConstant(1, DL, ShVT);
8224 if (IsFSHL) {
8225 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
8226 SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
8227 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
8228 } else {
8229 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
8230 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
8231 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
8232 }
8233 }
8234 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
8235}
8236
8238 SelectionDAG &DAG) const {
8239 if (Node->isVPOpcode())
8240 return expandVPFunnelShift(Node, DAG);
8241
8242 EVT VT = Node->getValueType(0);
8243
8244 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8248 return SDValue();
8249
8250 SDValue X = Node->getOperand(0);
8251 SDValue Y = Node->getOperand(1);
8252 SDValue Z = Node->getOperand(2);
8253
8254 unsigned BW = VT.getScalarSizeInBits();
8255 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8256 SDLoc DL(SDValue(Node, 0));
8257
8258 EVT ShVT = Z.getValueType();
8259
8260 // If a funnel shift in the other direction is more supported, use it.
8261 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8262 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8263 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
8264 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8265 // fshl X, Y, Z -> fshr X, Y, -Z
8266 // fshr X, Y, Z -> fshl X, Y, -Z
8267 Z = DAG.getNegative(Z, DL, ShVT);
8268 } else {
8269 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8270 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8271 SDValue One = DAG.getConstant(1, DL, ShVT);
8272 if (IsFSHL) {
8273 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8274 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8275 } else {
8276 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8277 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8278 }
8279 Z = DAG.getNOT(DL, Z, ShVT);
8280 }
8281 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8282 }
8283
8284 SDValue ShX, ShY;
8285 SDValue ShAmt, InvShAmt;
8286 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8287 // fshl: X << C | Y >> (BW - C)
8288 // fshr: X << (BW - C) | Y >> C
8289 // where C = Z % BW is not zero
8290 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8291 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8292 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8293 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8294 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8295 } else {
8296 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8297 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8298 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8299 if (isPowerOf2_32(BW)) {
8300 // Z % BW -> Z & (BW - 1)
8301 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8302 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8303 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8304 } else {
8305 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8306 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8307 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8308 }
8309
8310 SDValue One = DAG.getConstant(1, DL, ShVT);
8311 if (IsFSHL) {
8312 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8313 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8314 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8315 } else {
8316 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8317 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8318 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8319 }
8320 }
8321 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8322}
8323
8324// TODO: Merge with expandFunnelShift.
8326 SelectionDAG &DAG) const {
8327 EVT VT = Node->getValueType(0);
8328 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8329 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8330 SDValue Op0 = Node->getOperand(0);
8331 SDValue Op1 = Node->getOperand(1);
8332 SDLoc DL(SDValue(Node, 0));
8333
8334 EVT ShVT = Op1.getValueType();
8335 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8336
8337 // If a rotate in the other direction is more supported, use it.
8338 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8339 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8340 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8341 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8342 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8343 }
8344
8345 if (!AllowVectorOps && VT.isVector() &&
8351 return SDValue();
8352
8353 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8354 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8355 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8356 SDValue ShVal;
8357 SDValue HsVal;
8358 if (isPowerOf2_32(EltSizeInBits)) {
8359 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8360 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8361 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8362 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8363 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8364 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8365 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8366 } else {
8367 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8368 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8369 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8370 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8371 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8372 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8373 SDValue One = DAG.getConstant(1, DL, ShVT);
8374 HsVal =
8375 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8376 }
8377 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8378}
8379
8381 SelectionDAG &DAG) const {
8382 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8383 EVT VT = Node->getValueType(0);
8384 unsigned VTBits = VT.getScalarSizeInBits();
8385 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8386
8387 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8388 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8389 SDValue ShOpLo = Node->getOperand(0);
8390 SDValue ShOpHi = Node->getOperand(1);
8391 SDValue ShAmt = Node->getOperand(2);
8392 EVT ShAmtVT = ShAmt.getValueType();
8393 EVT ShAmtCCVT =
8394 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8395 SDLoc dl(Node);
8396
8397 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8398 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8399 // away during isel.
8400 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8401 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8402 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8403 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8404 : DAG.getConstant(0, dl, VT);
8405
8406 SDValue Tmp2, Tmp3;
8407 if (IsSHL) {
8408 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8409 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8410 } else {
8411 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8412 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8413 }
8414
8415 // If the shift amount is larger or equal than the width of a part we don't
8416 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8417 // values for large shift amounts.
8418 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8419 DAG.getConstant(VTBits, dl, ShAmtVT));
8420 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8421 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8422
8423 if (IsSHL) {
8424 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8425 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8426 } else {
8427 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8428 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8429 }
8430}
8431
8433 SelectionDAG &DAG) const {
8434 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8435 SDValue Src = Node->getOperand(OpNo);
8436 EVT SrcVT = Src.getValueType();
8437 EVT DstVT = Node->getValueType(0);
8438 SDLoc dl(SDValue(Node, 0));
8439
8440 // FIXME: Only f32 to i64 conversions are supported.
8441 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8442 return false;
8443
8444 if (Node->isStrictFPOpcode())
8445 // When a NaN is converted to an integer a trap is allowed. We can't
8446 // use this expansion here because it would eliminate that trap. Other
8447 // traps are also allowed and cannot be eliminated. See
8448 // IEEE 754-2008 sec 5.8.
8449 return false;
8450
8451 // Expand f32 -> i64 conversion
8452 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8453 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8454 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8455 EVT IntVT = SrcVT.changeTypeToInteger();
8456 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8457
8458 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8459 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8460 SDValue Bias = DAG.getConstant(127, dl, IntVT);
8461 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8462 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8463 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8464
8465 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8466
8467 SDValue ExponentBits = DAG.getNode(
8468 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8469 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8470 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8471
8472 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8473 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8474 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8475 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8476
8477 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8478 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8479 DAG.getConstant(0x00800000, dl, IntVT));
8480
8481 R = DAG.getZExtOrTrunc(R, dl, DstVT);
8482
8483 R = DAG.getSelectCC(
8484 dl, Exponent, ExponentLoBit,
8485 DAG.getNode(ISD::SHL, dl, DstVT, R,
8486 DAG.getZExtOrTrunc(
8487 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8488 dl, IntShVT)),
8489 DAG.getNode(ISD::SRL, dl, DstVT, R,
8490 DAG.getZExtOrTrunc(
8491 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8492 dl, IntShVT)),
8493 ISD::SETGT);
8494
8495 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8496 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8497
8498 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8499 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8500 return true;
8501}
8502
8504 SDValue &Chain,
8505 SelectionDAG &DAG) const {
8506 SDLoc dl(SDValue(Node, 0));
8507 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8508 SDValue Src = Node->getOperand(OpNo);
8509
8510 EVT SrcVT = Src.getValueType();
8511 EVT DstVT = Node->getValueType(0);
8512 EVT SetCCVT =
8513 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8514 EVT DstSetCCVT =
8515 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8516
8517 // Only expand vector types if we have the appropriate vector bit operations.
8518 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8520 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8522 return false;
8523
8524 // If the maximum float value is smaller then the signed integer range,
8525 // the destination signmask can't be represented by the float, so we can
8526 // just use FP_TO_SINT directly.
8527 const fltSemantics &APFSem = SrcVT.getFltSemantics();
8528 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8529 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8531 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8532 if (Node->isStrictFPOpcode()) {
8533 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8534 { Node->getOperand(0), Src });
8535 Chain = Result.getValue(1);
8536 } else
8537 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8538 return true;
8539 }
8540
8541 // Don't expand it if there isn't cheap fsub instruction.
8543 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8544 return false;
8545
8546 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8547 SDValue Sel;
8548
8549 if (Node->isStrictFPOpcode()) {
8550 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8551 Node->getOperand(0), /*IsSignaling*/ true);
8552 Chain = Sel.getValue(1);
8553 } else {
8554 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8555 }
8556
8557 bool Strict = Node->isStrictFPOpcode() ||
8558 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8559
8560 if (Strict) {
8561 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8562 // signmask then offset (the result of which should be fully representable).
8563 // Sel = Src < 0x8000000000000000
8564 // FltOfs = select Sel, 0, 0x8000000000000000
8565 // IntOfs = select Sel, 0, 0x8000000000000000
8566 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8567
8568 // TODO: Should any fast-math-flags be set for the FSUB?
8569 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8570 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8571 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8572 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8573 DAG.getConstant(0, dl, DstVT),
8574 DAG.getConstant(SignMask, dl, DstVT));
8575 SDValue SInt;
8576 if (Node->isStrictFPOpcode()) {
8577 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8578 { Chain, Src, FltOfs });
8579 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8580 { Val.getValue(1), Val });
8581 Chain = SInt.getValue(1);
8582 } else {
8583 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8584 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8585 }
8586 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8587 } else {
8588 // Expand based on maximum range of FP_TO_SINT:
8589 // True = fp_to_sint(Src)
8590 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8591 // Result = select (Src < 0x8000000000000000), True, False
8592
8593 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8594 // TODO: Should any fast-math-flags be set for the FSUB?
8595 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8596 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8597 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8598 DAG.getConstant(SignMask, dl, DstVT));
8599 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8600 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8601 }
8602 return true;
8603}
8604
8606 SDValue &Chain, SelectionDAG &DAG) const {
8607 // This transform is not correct for converting 0 when rounding mode is set
8608 // to round toward negative infinity which will produce -0.0. So disable
8609 // under strictfp.
8610 if (Node->isStrictFPOpcode())
8611 return false;
8612
8613 SDValue Src = Node->getOperand(0);
8614 EVT SrcVT = Src.getValueType();
8615 EVT DstVT = Node->getValueType(0);
8616
8617 // If the input is known to be non-negative and SINT_TO_FP is legal then use
8618 // it.
8619 if (Node->getFlags().hasNonNeg() &&
8621 Result =
8622 DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
8623 return true;
8624 }
8625
8626 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8627 return false;
8628
8629 // Only expand vector types if we have the appropriate vector bit
8630 // operations.
8631 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8636 return false;
8637
8638 SDLoc dl(SDValue(Node, 0));
8639
8640 // Implementation of unsigned i64 to f64 following the algorithm in
8641 // __floatundidf in compiler_rt. This implementation performs rounding
8642 // correctly in all rounding modes with the exception of converting 0
8643 // when rounding toward negative infinity. In that case the fsub will
8644 // produce -0.0. This will be added to +0.0 and produce -0.0 which is
8645 // incorrect.
8646 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8647 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8648 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8649 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8650 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8651 SDValue HiShift = DAG.getShiftAmountConstant(32, SrcVT, dl);
8652
8653 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8654 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8655 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
8656 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
8657 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8658 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8659 SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8660 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8661 return true;
8662}
8663
8664SDValue
8666 SelectionDAG &DAG) const {
8667 unsigned Opcode = Node->getOpcode();
8668 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8669 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8670 "Wrong opcode");
8671
8672 if (Node->getFlags().hasNoNaNs()) {
8673 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8674 EVT VT = Node->getValueType(0);
8675 if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
8677 VT.isVector())
8678 return SDValue();
8679 SDValue Op1 = Node->getOperand(0);
8680 SDValue Op2 = Node->getOperand(1);
8681 return DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred,
8682 Node->getFlags());
8683 }
8684
8685 return SDValue();
8686}
8687
8689 SelectionDAG &DAG) const {
8690 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
8691 return Expanded;
8692
8693 EVT VT = Node->getValueType(0);
8694 if (VT.isScalableVector())
8696 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8697
8698 SDLoc dl(Node);
8699 unsigned NewOp =
8701
8702 if (isOperationLegalOrCustom(NewOp, VT)) {
8703 SDValue Quiet0 = Node->getOperand(0);
8704 SDValue Quiet1 = Node->getOperand(1);
8705
8706 if (!Node->getFlags().hasNoNaNs()) {
8707 // Insert canonicalizes if it's possible we need to quiet to get correct
8708 // sNaN behavior.
8709 if (!DAG.isKnownNeverSNaN(Quiet0)) {
8710 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8711 Node->getFlags());
8712 }
8713 if (!DAG.isKnownNeverSNaN(Quiet1)) {
8714 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8715 Node->getFlags());
8716 }
8717 }
8718
8719 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8720 }
8721
8722 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8723 // instead if there are no NaNs and there can't be an incompatible zero
8724 // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8725 if ((Node->getFlags().hasNoNaNs() ||
8726 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8727 DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
8728 (Node->getFlags().hasNoSignedZeros() ||
8729 DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
8730 DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
8731 unsigned IEEE2018Op =
8732 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8733 if (isOperationLegalOrCustom(IEEE2018Op, VT))
8734 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8735 Node->getOperand(1), Node->getFlags());
8736 }
8737
8739 return SelCC;
8740
8741 return SDValue();
8742}
8743
8745 SelectionDAG &DAG) const {
8746 if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
8747 return Expanded;
8748
8749 SDLoc DL(N);
8750 SDValue LHS = N->getOperand(0);
8751 SDValue RHS = N->getOperand(1);
8752 unsigned Opc = N->getOpcode();
8753 EVT VT = N->getValueType(0);
8754 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8755 bool IsMax = Opc == ISD::FMAXIMUM;
8756 SDNodeFlags Flags = N->getFlags();
8757
8758 // First, implement comparison not propagating NaN. If no native fmin or fmax
8759 // available, use plain select with setcc instead.
8761 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8762 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8763
8764 // FIXME: We should probably define fminnum/fmaxnum variants with correct
8765 // signed zero behavior.
8766 bool MinMaxMustRespectOrderedZero = false;
8767
8768 if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
8769 MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
8770 MinMaxMustRespectOrderedZero = true;
8771 } else if (isOperationLegalOrCustom(CompOpc, VT)) {
8772 MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
8773 } else {
8775 return DAG.UnrollVectorOp(N);
8776
8777 // NaN (if exists) will be propagated later, so orderness doesn't matter.
8778 SDValue Compare =
8779 DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
8780 MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
8781 }
8782
8783 // Propagate any NaN of both operands
8784 if (!N->getFlags().hasNoNaNs() &&
8785 (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
8786 ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
8788 MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
8789 DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
8790 }
8791
8792 // fminimum/fmaximum requires -0.0 less than +0.0
8793 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8794 !DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) {
8795 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8796 DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
8797 SDValue TestZero =
8798 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8799 SDValue LCmp = DAG.getSelect(
8800 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8801 MinMax, Flags);
8802 SDValue RCmp = DAG.getSelect(
8803 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
8804 LCmp, Flags);
8805 MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8806 }
8807
8808 return MinMax;
8809}
8810
8812 SelectionDAG &DAG) const {
8813 SDLoc DL(Node);
8814 SDValue LHS = Node->getOperand(0);
8815 SDValue RHS = Node->getOperand(1);
8816 unsigned Opc = Node->getOpcode();
8817 EVT VT = Node->getValueType(0);
8818 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8819 bool IsMax = Opc == ISD::FMAXIMUMNUM;
8820 SDNodeFlags Flags = Node->getFlags();
8821
8822 unsigned NewOp =
8824
8825 if (isOperationLegalOrCustom(NewOp, VT)) {
8826 if (!Flags.hasNoNaNs()) {
8827 // Insert canonicalizes if it's possible we need to quiet to get correct
8828 // sNaN behavior.
8829 if (!DAG.isKnownNeverSNaN(LHS)) {
8830 LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
8831 }
8832 if (!DAG.isKnownNeverSNaN(RHS)) {
8833 RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
8834 }
8835 }
8836
8837 return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
8838 }
8839
8840 // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
8841 // same behaviors for all of other cases: +0.0 vs -0.0 included.
8842 if (Flags.hasNoNaNs() ||
8843 (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
8844 unsigned IEEE2019Op =
8846 if (isOperationLegalOrCustom(IEEE2019Op, VT))
8847 return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
8848 }
8849
8850 // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
8851 // either one for +0.0 vs -0.0.
8852 if ((Flags.hasNoNaNs() ||
8853 (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
8854 (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
8855 DAG.isKnownNeverZeroFloat(RHS))) {
8856 unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
8857 if (isOperationLegalOrCustom(IEEE2008Op, VT))
8858 return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
8859 }
8860
8861 if (VT.isVector() &&
8864 return DAG.UnrollVectorOp(Node);
8865
8866 // If only one operand is NaN, override it with another operand.
8867 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
8868 LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
8869 }
8870 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
8871 RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
8872 }
8873
8874 // Always prefer RHS if equal.
8875 SDValue MinMax =
8876 DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
8877
8878 // TODO: We need quiet sNaN if strictfp.
8879
8880 // Fixup signed zero behavior.
8881 if (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
8882 DAG.isKnownNeverZeroFloat(RHS)) {
8883 return MinMax;
8884 }
8885 SDValue TestZero =
8886 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8887 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8888 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
8889 EVT IntVT = VT.changeTypeToInteger();
8890 EVT FloatVT = VT.changeElementType(MVT::f32);
8891 SDValue LHSTrunc = LHS;
8893 LHSTrunc = DAG.getNode(ISD::FP_ROUND, DL, FloatVT, LHS,
8894 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
8895 }
8896 // It's OK to select from LHS and MinMax, with only one ISD::IS_FPCLASS, as
8897 // we preferred RHS when generate MinMax, if the operands are equal.
8898 SDValue RetZero = DAG.getSelect(
8899 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHSTrunc, TestZero), LHS,
8900 MinMax, Flags);
8901 return DAG.getSelect(DL, VT, IsZero, RetZero, MinMax, Flags);
8902}
8903
8904/// Returns a true value if if this FPClassTest can be performed with an ordered
8905/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8906/// std::nullopt if it cannot be performed as a compare with 0.
8907static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8908 const fltSemantics &Semantics,
8909 const MachineFunction &MF) {
8910 FPClassTest OrderedMask = Test & ~fcNan;
8911 FPClassTest NanTest = Test & fcNan;
8912 bool IsOrdered = NanTest == fcNone;
8913 bool IsUnordered = NanTest == fcNan;
8914
8915 // Skip cases that are testing for only a qnan or snan.
8916 if (!IsOrdered && !IsUnordered)
8917 return std::nullopt;
8918
8919 if (OrderedMask == fcZero &&
8920 MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
8921 return IsOrdered;
8922 if (OrderedMask == (fcZero | fcSubnormal) &&
8923 MF.getDenormalMode(Semantics).inputsAreZero())
8924 return IsOrdered;
8925 return std::nullopt;
8926}
8927
8929 const FPClassTest OrigTestMask,
8930 SDNodeFlags Flags, const SDLoc &DL,
8931 SelectionDAG &DAG) const {
8932 EVT OperandVT = Op.getValueType();
8933 assert(OperandVT.isFloatingPoint());
8934 FPClassTest Test = OrigTestMask;
8935
8936 // Degenerated cases.
8937 if (Test == fcNone)
8938 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8939 if (Test == fcAllFlags)
8940 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
8941
8942 // PPC double double is a pair of doubles, of which the higher part determines
8943 // the value class.
8944 if (OperandVT == MVT::ppcf128) {
8945 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8946 DAG.getConstant(1, DL, MVT::i32));
8947 OperandVT = MVT::f64;
8948 }
8949
8950 // Floating-point type properties.
8951 EVT ScalarFloatVT = OperandVT.getScalarType();
8952 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
8953 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8954 bool IsF80 = (ScalarFloatVT == MVT::f80);
8955
8956 // Some checks can be implemented using float comparisons, if floating point
8957 // exceptions are ignored.
8958 if (Flags.hasNoFPExcept() &&
8960 FPClassTest FPTestMask = Test;
8961 bool IsInvertedFP = false;
8962
8963 if (FPClassTest InvertedFPCheck =
8964 invertFPClassTestIfSimpler(FPTestMask, true)) {
8965 FPTestMask = InvertedFPCheck;
8966 IsInvertedFP = true;
8967 }
8968
8969 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
8970 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
8971
8972 // See if we can fold an | fcNan into an unordered compare.
8973 FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
8974
8975 // Can't fold the ordered check if we're only testing for snan or qnan
8976 // individually.
8977 if ((FPTestMask & fcNan) != fcNan)
8978 OrderedFPTestMask = FPTestMask;
8979
8980 const bool IsOrdered = FPTestMask == OrderedFPTestMask;
8981
8982 if (std::optional<bool> IsCmp0 =
8983 isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
8984 IsCmp0 && (isCondCodeLegalOrCustom(
8985 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8986 OperandVT.getScalarType().getSimpleVT()))) {
8987
8988 // If denormals could be implicitly treated as 0, this is not equivalent
8989 // to a compare with 0 since it will also be true for denormals.
8990 return DAG.getSetCC(DL, ResultVT, Op,
8991 DAG.getConstantFP(0.0, DL, OperandVT),
8992 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8993 }
8994
8995 if (FPTestMask == fcNan &&
8997 OperandVT.getScalarType().getSimpleVT()))
8998 return DAG.getSetCC(DL, ResultVT, Op, Op,
8999 IsInvertedFP ? ISD::SETO : ISD::SETUO);
9000
9001 bool IsOrderedInf = FPTestMask == fcInf;
9002 if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
9003 isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
9004 : UnorderedCmpOpcode,
9005 OperandVT.getScalarType().getSimpleVT()) &&
9008 (OperandVT.isVector() &&
9010 // isinf(x) --> fabs(x) == inf
9011 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9012 SDValue Inf =
9013 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
9014 return DAG.getSetCC(DL, ResultVT, Abs, Inf,
9015 IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
9016 }
9017
9018 if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
9019 isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
9020 : UnorderedCmpOpcode,
9021 OperandVT.getSimpleVT())) {
9022 // isposinf(x) --> x == inf
9023 // isneginf(x) --> x == -inf
9024 // isposinf(x) || nan --> x u== inf
9025 // isneginf(x) || nan --> x u== -inf
9026
9027 SDValue Inf = DAG.getConstantFP(
9028 APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
9029 OperandVT);
9030 return DAG.getSetCC(DL, ResultVT, Op, Inf,
9031 IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
9032 }
9033
9034 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
9035 // TODO: Could handle ordered case, but it produces worse code for
9036 // x86. Maybe handle ordered if fabs is free?
9037
9038 ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9039 ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
9040
9041 if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
9042 OperandVT.getScalarType().getSimpleVT())) {
9043 // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
9044
9045 // TODO: Maybe only makes sense if fabs is free. Integer test of
9046 // exponent bits seems better for x86.
9047 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9048 SDValue SmallestNormal = DAG.getConstantFP(
9049 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9050 return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
9051 IsOrdered ? OrderedOp : UnorderedOp);
9052 }
9053 }
9054
9055 if (FPTestMask == fcNormal) {
9056 // TODO: Handle unordered
9057 ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9058 ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
9059
9060 if (isCondCodeLegalOrCustom(IsFiniteOp,
9061 OperandVT.getScalarType().getSimpleVT()) &&
9062 isCondCodeLegalOrCustom(IsNormalOp,
9063 OperandVT.getScalarType().getSimpleVT()) &&
9064 isFAbsFree(OperandVT)) {
9065 // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
9066 SDValue Inf =
9067 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
9068 SDValue SmallestNormal = DAG.getConstantFP(
9069 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9070
9071 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9072 SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
9073 SDValue IsNormal =
9074 DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
9075 unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
9076 return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
9077 }
9078 }
9079 }
9080
9081 // Some checks may be represented as inversion of simpler check, for example
9082 // "inf|normal|subnormal|zero" => !"nan".
9083 bool IsInverted = false;
9084
9085 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
9086 Test = InvertedCheck;
9087 IsInverted = true;
9088 }
9089
9090 // In the general case use integer operations.
9091 unsigned BitSize = OperandVT.getScalarSizeInBits();
9092 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
9093 if (OperandVT.isVector())
9094 IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
9095 OperandVT.getVectorElementCount());
9096 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
9097
9098 // Various masks.
9099 APInt SignBit = APInt::getSignMask(BitSize);
9100 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9101 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9102 const unsigned ExplicitIntBitInF80 = 63;
9103 APInt ExpMask = Inf;
9104 if (IsF80)
9105 ExpMask.clearBit(ExplicitIntBitInF80);
9106 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9107 APInt QNaNBitMask =
9108 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9109 APInt InversionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
9110
9111 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
9112 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
9113 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
9114 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
9115 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
9116 SDValue ResultInversionMask = DAG.getConstant(InversionMask, DL, ResultVT);
9117
9118 SDValue Res;
9119 const auto appendResult = [&](SDValue PartialRes) {
9120 if (PartialRes) {
9121 if (Res)
9122 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
9123 else
9124 Res = PartialRes;
9125 }
9126 };
9127
9128 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
9129 const auto getIntBitIsSet = [&]() -> SDValue {
9130 if (!IntBitIsSetV) {
9131 APInt IntBitMask(BitSize, 0);
9132 IntBitMask.setBit(ExplicitIntBitInF80);
9133 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
9134 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
9135 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
9136 }
9137 return IntBitIsSetV;
9138 };
9139
9140 // Split the value into sign bit and absolute value.
9141 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
9142 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
9143 DAG.getConstant(0, DL, IntVT), ISD::SETLT);
9144
9145 // Tests that involve more than one class should be processed first.
9146 SDValue PartialRes;
9147
9148 if (IsF80)
9149 ; // Detect finite numbers of f80 by checking individual classes because
9150 // they have different settings of the explicit integer bit.
9151 else if ((Test & fcFinite) == fcFinite) {
9152 // finite(V) ==> abs(V) < exp_mask
9153 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
9154 Test &= ~fcFinite;
9155 } else if ((Test & fcFinite) == fcPosFinite) {
9156 // finite(V) && V > 0 ==> V < exp_mask
9157 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
9158 Test &= ~fcPosFinite;
9159 } else if ((Test & fcFinite) == fcNegFinite) {
9160 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
9161 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
9162 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9163 Test &= ~fcNegFinite;
9164 }
9165 appendResult(PartialRes);
9166
9167 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
9168 // fcZero | fcSubnormal => test all exponent bits are 0
9169 // TODO: Handle sign bit specific cases
9170 if (PartialCheck == (fcZero | fcSubnormal)) {
9171 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
9172 SDValue ExpIsZero =
9173 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9174 appendResult(ExpIsZero);
9175 Test &= ~PartialCheck & fcAllFlags;
9176 }
9177 }
9178
9179 // Check for individual classes.
9180
9181 if (unsigned PartialCheck = Test & fcZero) {
9182 if (PartialCheck == fcPosZero)
9183 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
9184 else if (PartialCheck == fcZero)
9185 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
9186 else // ISD::fcNegZero
9187 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
9188 appendResult(PartialRes);
9189 }
9190
9191 if (unsigned PartialCheck = Test & fcSubnormal) {
9192 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
9193 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
9194 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
9195 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
9196 SDValue VMinusOneV =
9197 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
9198 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
9199 if (PartialCheck == fcNegSubnormal)
9200 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9201 appendResult(PartialRes);
9202 }
9203
9204 if (unsigned PartialCheck = Test & fcInf) {
9205 if (PartialCheck == fcPosInf)
9206 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
9207 else if (PartialCheck == fcInf)
9208 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
9209 else { // ISD::fcNegInf
9210 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9211 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
9212 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
9213 }
9214 appendResult(PartialRes);
9215 }
9216
9217 if (unsigned PartialCheck = Test & fcNan) {
9218 APInt InfWithQnanBit = Inf | QNaNBitMask;
9219 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
9220 if (PartialCheck == fcNan) {
9221 // isnan(V) ==> abs(V) > int(inf)
9222 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9223 if (IsF80) {
9224 // Recognize unsupported values as NaNs for compatibility with glibc.
9225 // In them (exp(V)==0) == int_bit.
9226 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
9227 SDValue ExpIsZero =
9228 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9229 SDValue IsPseudo =
9230 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
9231 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
9232 }
9233 } else if (PartialCheck == fcQNan) {
9234 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
9235 PartialRes =
9236 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
9237 } else { // ISD::fcSNan
9238 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
9239 // abs(V) < (unsigned(Inf) | quiet_bit)
9240 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9241 SDValue IsNotQnan =
9242 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
9243 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
9244 }
9245 appendResult(PartialRes);
9246 }
9247
9248 if (unsigned PartialCheck = Test & fcNormal) {
9249 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
9250 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9251 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
9252 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
9253 APInt ExpLimit = ExpMask - ExpLSB;
9254 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
9255 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
9256 if (PartialCheck == fcNegNormal)
9257 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9258 else if (PartialCheck == fcPosNormal) {
9259 SDValue PosSignV =
9260 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInversionMask);
9261 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
9262 }
9263 if (IsF80)
9264 PartialRes =
9265 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
9266 appendResult(PartialRes);
9267 }
9268
9269 if (!Res)
9270 return DAG.getConstant(IsInverted, DL, ResultVT);
9271 if (IsInverted)
9272 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInversionMask);
9273 return Res;
9274}
9275
9276// Only expand vector types if we have the appropriate vector bit operations.
9277static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
9278 assert(VT.isVector() && "Expected vector type");
9279 unsigned Len = VT.getScalarSizeInBits();
9280 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
9283 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
9285}
9286
9288 SDLoc dl(Node);
9289 EVT VT = Node->getValueType(0);
9290 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9291 SDValue Op = Node->getOperand(0);
9292 unsigned Len = VT.getScalarSizeInBits();
9293 assert(VT.isInteger() && "CTPOP not implemented for this type.");
9294
9295 // TODO: Add support for irregular type lengths.
9296 if (!(Len <= 128 && Len % 8 == 0))
9297 return SDValue();
9298
9299 // Only expand vector types if we have the appropriate vector bit operations.
9300 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
9301 return SDValue();
9302
9303 // This is the "best" algorithm from
9304 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9305 SDValue Mask55 =
9306 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9307 SDValue Mask33 =
9308 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9309 SDValue Mask0F =
9310 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9311
9312 // v = v - ((v >> 1) & 0x55555555...)
9313 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
9314 DAG.getNode(ISD::AND, dl, VT,
9315 DAG.getNode(ISD::SRL, dl, VT, Op,
9316 DAG.getConstant(1, dl, ShVT)),
9317 Mask55));
9318 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9319 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
9320 DAG.getNode(ISD::AND, dl, VT,
9321 DAG.getNode(ISD::SRL, dl, VT, Op,
9322 DAG.getConstant(2, dl, ShVT)),
9323 Mask33));
9324 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9325 Op = DAG.getNode(ISD::AND, dl, VT,
9326 DAG.getNode(ISD::ADD, dl, VT, Op,
9327 DAG.getNode(ISD::SRL, dl, VT, Op,
9328 DAG.getConstant(4, dl, ShVT))),
9329 Mask0F);
9330
9331 if (Len <= 8)
9332 return Op;
9333
9334 // Avoid the multiply if we only have 2 bytes to add.
9335 // TODO: Only doing this for scalars because vectors weren't as obviously
9336 // improved.
9337 if (Len == 16 && !VT.isVector()) {
9338 // v = (v + (v >> 8)) & 0x00FF;
9339 return DAG.getNode(ISD::AND, dl, VT,
9340 DAG.getNode(ISD::ADD, dl, VT, Op,
9341 DAG.getNode(ISD::SRL, dl, VT, Op,
9342 DAG.getConstant(8, dl, ShVT))),
9343 DAG.getConstant(0xFF, dl, VT));
9344 }
9345
9346 // v = (v * 0x01010101...) >> (Len - 8)
9347 SDValue V;
9350 SDValue Mask01 =
9351 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9352 V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
9353 } else {
9354 V = Op;
9355 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9356 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9357 V = DAG.getNode(ISD::ADD, dl, VT, V,
9358 DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
9359 }
9360 }
9361 return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
9362}
9363
9365 SDLoc dl(Node);
9366 EVT VT = Node->getValueType(0);
9367 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9368 SDValue Op = Node->getOperand(0);
9369 SDValue Mask = Node->getOperand(1);
9370 SDValue VL = Node->getOperand(2);
9371 unsigned Len = VT.getScalarSizeInBits();
9372 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
9373
9374 // TODO: Add support for irregular type lengths.
9375 if (!(Len <= 128 && Len % 8 == 0))
9376 return SDValue();
9377
9378 // This is same algorithm of expandCTPOP from
9379 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9380 SDValue Mask55 =
9381 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9382 SDValue Mask33 =
9383 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9384 SDValue Mask0F =
9385 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9386
9387 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9388
9389 // v = v - ((v >> 1) & 0x55555555...)
9390 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
9391 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9392 DAG.getConstant(1, dl, ShVT), Mask, VL),
9393 Mask55, Mask, VL);
9394 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
9395
9396 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9397 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
9398 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
9399 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9400 DAG.getConstant(2, dl, ShVT), Mask, VL),
9401 Mask33, Mask, VL);
9402 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
9403
9404 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9405 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
9406 Mask, VL),
9407 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
9408 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
9409
9410 if (Len <= 8)
9411 return Op;
9412
9413 // v = (v * 0x01010101...) >> (Len - 8)
9414 SDValue V;
9416 ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
9417 SDValue Mask01 =
9418 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9419 V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
9420 } else {
9421 V = Op;
9422 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9423 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9424 V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
9425 DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
9426 Mask, VL);
9427 }
9428 }
9429 return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
9430 Mask, VL);
9431}
9432
9434 SDLoc dl(Node);
9435 EVT VT = Node->getValueType(0);
9436 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9437 SDValue Op = Node->getOperand(0);
9438 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9439
9440 // If the non-ZERO_UNDEF version is supported we can use that instead.
9441 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
9443 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
9444
9445 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9447 EVT SetCCVT =
9448 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9449 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
9450 SDValue Zero = DAG.getConstant(0, dl, VT);
9451 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9452 return DAG.getSelect(dl, VT, SrcIsZero,
9453 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
9454 }
9455
9456 // Only expand vector types if we have the appropriate vector bit operations.
9457 // This includes the operations needed to expand CTPOP if it isn't supported.
9458 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9460 !canExpandVectorCTPOP(*this, VT)) ||
9463 return SDValue();
9464
9465 // for now, we do this:
9466 // x = x | (x >> 1);
9467 // x = x | (x >> 2);
9468 // ...
9469 // x = x | (x >>16);
9470 // x = x | (x >>32); // for 64-bit input
9471 // return popcount(~x);
9472 //
9473 // Ref: "Hacker's Delight" by Henry Warren
9474 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9475 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9476 Op = DAG.getNode(ISD::OR, dl, VT, Op,
9477 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
9478 }
9479 Op = DAG.getNOT(dl, Op, VT);
9480 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
9481}
9482
9484 SDLoc dl(Node);
9485 EVT VT = Node->getValueType(0);
9486 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9487 SDValue Op = Node->getOperand(0);
9488 SDValue Mask = Node->getOperand(1);
9489 SDValue VL = Node->getOperand(2);
9490 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9491
9492 // do this:
9493 // x = x | (x >> 1);
9494 // x = x | (x >> 2);
9495 // ...
9496 // x = x | (x >>16);
9497 // x = x | (x >>32); // for 64-bit input
9498 // return popcount(~x);
9499 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9500 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9501 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
9502 DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
9503 VL);
9504 }
9505 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getAllOnesConstant(dl, VT),
9506 Mask, VL);
9507 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
9508}
9509
9511 const SDLoc &DL, EVT VT, SDValue Op,
9512 unsigned BitWidth) const {
9513 if (BitWidth != 32 && BitWidth != 64)
9514 return SDValue();
9515 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
9516 : APInt(64, 0x0218A392CD3D5DBFULL);
9517 const DataLayout &TD = DAG.getDataLayout();
9518 MachinePointerInfo PtrInfo =
9520 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
9521 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
9522 SDValue Lookup = DAG.getNode(
9523 ISD::SRL, DL, VT,
9524 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
9525 DAG.getConstant(DeBruijn, DL, VT)),
9526 DAG.getShiftAmountConstant(ShiftAmt, VT, DL));
9528
9530 for (unsigned i = 0; i < BitWidth; i++) {
9531 APInt Shl = DeBruijn.shl(i);
9532 APInt Lshr = Shl.lshr(ShiftAmt);
9533 Table[Lshr.getZExtValue()] = i;
9534 }
9535
9536 // Create a ConstantArray in Constant Pool
9537 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
9538 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
9539 TD.getPrefTypeAlign(CA->getType()));
9540 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
9541 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
9542 PtrInfo, MVT::i8);
9543 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9544 return ExtLoad;
9545
9546 EVT SetCCVT =
9547 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9548 SDValue Zero = DAG.getConstant(0, DL, VT);
9549 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
9550 return DAG.getSelect(DL, VT, SrcIsZero,
9551 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
9552}
9553
9555 SDLoc dl(Node);
9556 EVT VT = Node->getValueType(0);
9557 SDValue Op = Node->getOperand(0);
9558 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9559
9560 // If the non-ZERO_UNDEF version is supported we can use that instead.
9561 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9563 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
9564
9565 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9567 EVT SetCCVT =
9568 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9569 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
9570 SDValue Zero = DAG.getConstant(0, dl, VT);
9571 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9572 return DAG.getSelect(dl, VT, SrcIsZero,
9573 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
9574 }
9575
9576 // Only expand vector types if we have the appropriate vector bit operations.
9577 // This includes the operations needed to expand CTPOP if it isn't supported.
9578 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9581 !canExpandVectorCTPOP(*this, VT)) ||
9585 return SDValue();
9586
9587 // Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
9588 // to be expanded or converted to a libcall.
9591 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
9592 return V;
9593
9594 // for now, we use: { return popcount(~x & (x - 1)); }
9595 // unless the target has ctlz but not ctpop, in which case we use:
9596 // { return 32 - nlz(~x & (x-1)); }
9597 // Ref: "Hacker's Delight" by Henry Warren
9598 SDValue Tmp = DAG.getNode(
9599 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
9600 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
9601
9602 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9604 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
9605 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
9606 }
9607
9608 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
9609}
9610
9612 SDValue Op = Node->getOperand(0);
9613 SDValue Mask = Node->getOperand(1);
9614 SDValue VL = Node->getOperand(2);
9615 SDLoc dl(Node);
9616 EVT VT = Node->getValueType(0);
9617
9618 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9619 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
9620 DAG.getAllOnesConstant(dl, VT), Mask, VL);
9621 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
9622 DAG.getConstant(1, dl, VT), Mask, VL);
9623 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
9624 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
9625}
9626
9628 SelectionDAG &DAG) const {
9629 // %cond = to_bool_vec %source
9630 // %splat = splat /*val=*/VL
9631 // %tz = step_vector
9632 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
9633 // %r = vp.reduce.umin %v
9634 SDLoc DL(N);
9635 SDValue Source = N->getOperand(0);
9636 SDValue Mask = N->getOperand(1);
9637 SDValue EVL = N->getOperand(2);
9638 EVT SrcVT = Source.getValueType();
9639 EVT ResVT = N->getValueType(0);
9640 EVT ResVecVT =
9641 EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
9642
9643 // Convert to boolean vector.
9644 if (SrcVT.getScalarType() != MVT::i1) {
9645 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
9646 SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
9647 SrcVT.getVectorElementCount());
9648 Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
9649 DAG.getCondCode(ISD::SETNE), Mask, EVL);
9650 }
9651
9652 SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
9653 SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
9654 SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
9655 SDValue Select =
9656 DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
9657 return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
9658}
9659
9661 SelectionDAG &DAG) const {
9662 SDLoc DL(N);
9663 SDValue Mask = N->getOperand(0);
9664 EVT MaskVT = Mask.getValueType();
9665 EVT BoolVT = MaskVT.getScalarType();
9666
9667 // Find a suitable type for a stepvector.
9668 ConstantRange VScaleRange(1, /*isFullSet=*/true); // Fixed length default.
9669 if (MaskVT.isScalableVector())
9670 VScaleRange = getVScaleRange(&DAG.getMachineFunction().getFunction(), 64);
9671 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9672 unsigned EltWidth = TLI.getBitWidthForCttzElements(
9673 BoolVT.getTypeForEVT(*DAG.getContext()), MaskVT.getVectorElementCount(),
9674 /*ZeroIsPoison=*/true, &VScaleRange);
9675 EVT StepVT = MVT::getIntegerVT(EltWidth);
9676 EVT StepVecVT = MaskVT.changeVectorElementType(StepVT);
9677
9678 // If promotion is required to make the type legal, do it here; promotion
9679 // of integers within LegalizeVectorOps is looking for types of the same
9680 // size but with a smaller number of larger elements, not the usual larger
9681 // size with the same number of larger elements.
9682 if (TLI.getTypeAction(StepVecVT.getSimpleVT()) ==
9684 StepVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
9685 StepVT = StepVecVT.getVectorElementType();
9686 }
9687
9688 // Zero out lanes with inactive elements, then find the highest remaining
9689 // value from the stepvector.
9690 SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT);
9691 SDValue StepVec = DAG.getStepVector(DL, StepVecVT);
9692 SDValue ActiveElts = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
9693 SDValue HighestIdx = DAG.getNode(ISD::VECREDUCE_UMAX, DL, StepVT, ActiveElts);
9694 return DAG.getZExtOrTrunc(HighestIdx, DL, N->getValueType(0));
9695}
9696
9698 bool IsNegative) const {
9699 SDLoc dl(N);
9700 EVT VT = N->getValueType(0);
9701 SDValue Op = N->getOperand(0);
9702
9703 // abs(x) -> smax(x,sub(0,x))
9704 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9706 SDValue Zero = DAG.getConstant(0, dl, VT);
9707 Op = DAG.getFreeze(Op);
9708 return DAG.getNode(ISD::SMAX, dl, VT, Op,
9709 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9710 }
9711
9712 // abs(x) -> umin(x,sub(0,x))
9713 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9715 SDValue Zero = DAG.getConstant(0, dl, VT);
9716 Op = DAG.getFreeze(Op);
9717 return DAG.getNode(ISD::UMIN, dl, VT, Op,
9718 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9719 }
9720
9721 // 0 - abs(x) -> smin(x, sub(0,x))
9722 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
9724 SDValue Zero = DAG.getConstant(0, dl, VT);
9725 Op = DAG.getFreeze(Op);
9726 return DAG.getNode(ISD::SMIN, dl, VT, Op,
9727 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9728 }
9729
9730 // Only expand vector types if we have the appropriate vector operations.
9731 if (VT.isVector() &&
9733 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
9734 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
9736 return SDValue();
9737
9738 Op = DAG.getFreeze(Op);
9739 SDValue Shift = DAG.getNode(
9740 ISD::SRA, dl, VT, Op,
9741 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9742 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
9743
9744 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9745 if (!IsNegative)
9746 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
9747
9748 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9749 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
9750}
9751
9753 SDLoc dl(N);
9754 EVT VT = N->getValueType(0);
9755 SDValue LHS = N->getOperand(0);
9756 SDValue RHS = N->getOperand(1);
9757 bool IsSigned = N->getOpcode() == ISD::ABDS;
9758
9759 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9760 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9761 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9762 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9763 if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
9764 LHS = DAG.getFreeze(LHS);
9765 RHS = DAG.getFreeze(RHS);
9766 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
9767 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
9768 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9769 }
9770
9771 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9772 if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT)) {
9773 LHS = DAG.getFreeze(LHS);
9774 RHS = DAG.getFreeze(RHS);
9775 return DAG.getNode(ISD::OR, dl, VT,
9776 DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
9777 DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
9778 }
9779
9780 // If the subtract doesn't overflow then just use abs(sub())
9781 bool IsNonNegative = DAG.SignBitIsZero(LHS) && DAG.SignBitIsZero(RHS);
9782
9783 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, LHS, RHS))
9784 return DAG.getNode(ISD::ABS, dl, VT,
9785 DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
9786
9787 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, RHS, LHS))
9788 return DAG.getNode(ISD::ABS, dl, VT,
9789 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9790
9791 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9793 LHS = DAG.getFreeze(LHS);
9794 RHS = DAG.getFreeze(RHS);
9795 SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
9796
9797 // Branchless expansion iff cmp result is allbits:
9798 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9799 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9800 if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
9801 SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
9802 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
9803 return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
9804 }
9805
9806 // Similar to the branchless expansion, if we don't prefer selects, use the
9807 // (sign-extended) usubo overflow flag if the (scalar) type is illegal as this
9808 // is more likely to legalize cleanly: abdu(lhs, rhs) -> sub(xor(sub(lhs,
9809 // rhs), uof(lhs, rhs)), uof(lhs, rhs))
9810 if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT) &&
9812 SDValue USubO =
9813 DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
9814 SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
9815 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
9816 return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
9817 }
9818
9819 // FIXME: Should really try to split the vector in case it's legal on a
9820 // subvector.
9822 return DAG.UnrollVectorOp(N);
9823
9824 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9825 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9826 return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9827 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9828}
9829
9831 SDLoc dl(N);
9832 EVT VT = N->getValueType(0);
9833 SDValue LHS = N->getOperand(0);
9834 SDValue RHS = N->getOperand(1);
9835
9836 unsigned Opc = N->getOpcode();
9837 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
9838 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
9839 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9840 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
9841 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
9842 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9844 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
9845 "Unknown AVG node");
9846
9847 // If the operands are already extended, we can add+shift.
9848 bool IsExt =
9849 (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
9850 DAG.ComputeNumSignBits(RHS) >= 2) ||
9851 (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
9852 DAG.computeKnownBits(RHS).countMinLeadingZeros() >= 1);
9853 if (IsExt) {
9854 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
9855 if (!IsFloor)
9856 Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
9857 return DAG.getNode(ShiftOpc, dl, VT, Sum,
9858 DAG.getShiftAmountConstant(1, VT, dl));
9859 }
9860
9861 // For scalars, see if we can efficiently extend/truncate to use add+shift.
9862 if (VT.isScalarInteger()) {
9863 unsigned BW = VT.getScalarSizeInBits();
9864 EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
9865 if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
9866 LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
9867 RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
9868 SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
9869 if (!IsFloor)
9870 Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
9871 DAG.getConstant(1, dl, ExtVT));
9872 // Just use SRL as we will be truncating away the extended sign bits.
9873 Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
9874 DAG.getShiftAmountConstant(1, ExtVT, dl));
9875 return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
9876 }
9877 }
9878
9879 // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
9880 if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT)) {
9881 SDValue UAddWithOverflow =
9882 DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
9883
9884 SDValue Sum = UAddWithOverflow.getValue(0);
9885 SDValue Overflow = UAddWithOverflow.getValue(1);
9886
9887 // Right shift the sum by 1
9888 SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,
9889 DAG.getShiftAmountConstant(1, VT, dl));
9890
9891 SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
9892 SDValue OverflowShl = DAG.getNode(
9893 ISD::SHL, dl, VT, ZeroExtOverflow,
9894 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9895
9896 return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
9897 }
9898
9899 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
9900 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
9901 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
9902 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
9903 LHS = DAG.getFreeze(LHS);
9904 RHS = DAG.getFreeze(RHS);
9905 SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
9906 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
9907 SDValue Shift =
9908 DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
9909 return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
9910}
9911
9913 SDLoc dl(N);
9914 EVT VT = N->getValueType(0);
9915 SDValue Op = N->getOperand(0);
9916
9917 if (!VT.isSimple())
9918 return SDValue();
9919
9920 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9921 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9922 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9923 default:
9924 return SDValue();
9925 case MVT::i16:
9926 // Use a rotate by 8. This can be further expanded if necessary.
9927 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9928 case MVT::i32:
9929 // This is meant for ARM speficially, which has ROTR but no ROTL.
9931 SDValue Mask = DAG.getConstant(0x00FF00FF, dl, VT);
9932 // (x & 0x00FF00FF) rotr 8 | (x rotl 8) & 0x00FF00FF
9933 SDValue And = DAG.getNode(ISD::AND, dl, VT, Op, Mask);
9934 SDValue Rotr =
9935 DAG.getNode(ISD::ROTR, dl, VT, And, DAG.getConstant(8, dl, SHVT));
9936 SDValue Rotl =
9937 DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9938 SDValue And2 = DAG.getNode(ISD::AND, dl, VT, Rotl, Mask);
9939 return DAG.getNode(ISD::OR, dl, VT, Rotr, And2);
9940 }
9941 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9942 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
9943 DAG.getConstant(0xFF00, dl, VT));
9944 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
9945 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9946 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
9947 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9948 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9949 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9950 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9951 case MVT::i64:
9952 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9953 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
9954 DAG.getConstant(255ULL<<8, dl, VT));
9955 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
9956 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
9957 DAG.getConstant(255ULL<<16, dl, VT));
9958 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
9959 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
9960 DAG.getConstant(255ULL<<24, dl, VT));
9961 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
9962 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9963 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
9964 DAG.getConstant(255ULL<<24, dl, VT));
9965 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9966 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
9967 DAG.getConstant(255ULL<<16, dl, VT));
9968 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
9969 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
9970 DAG.getConstant(255ULL<<8, dl, VT));
9971 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9972 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
9973 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
9974 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9975 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9976 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
9977 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9978 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
9979 }
9980}
9981
9983 SDLoc dl(N);
9984 EVT VT = N->getValueType(0);
9985 SDValue Op = N->getOperand(0);
9986 SDValue Mask = N->getOperand(1);
9987 SDValue EVL = N->getOperand(2);
9988
9989 if (!VT.isSimple())
9990 return SDValue();
9991
9992 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9993 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9994 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9995 default:
9996 return SDValue();
9997 case MVT::i16:
9998 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9999 Mask, EVL);
10000 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10001 Mask, EVL);
10002 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
10003 case MVT::i32:
10004 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10005 Mask, EVL);
10006 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
10007 Mask, EVL);
10008 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
10009 Mask, EVL);
10010 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10011 Mask, EVL);
10012 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10013 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
10014 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10015 Mask, EVL);
10016 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
10017 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
10018 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
10019 case MVT::i64:
10020 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
10021 Mask, EVL);
10022 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10023 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
10024 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
10025 Mask, EVL);
10026 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10027 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
10028 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
10029 Mask, EVL);
10030 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10031 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
10032 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
10033 Mask, EVL);
10034 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10035 Mask, EVL);
10036 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
10037 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
10038 Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10039 Mask, EVL);
10040 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
10041 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
10042 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
10043 Mask, EVL);
10044 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10045 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
10046 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
10047 Mask, EVL);
10048 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
10049 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
10050 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
10051 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
10052 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
10053 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
10054 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
10055 }
10056}
10057
10059 SDLoc dl(N);
10060 EVT VT = N->getValueType(0);
10061 SDValue Op = N->getOperand(0);
10062 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10063 unsigned Sz = VT.getScalarSizeInBits();
10064
10065 SDValue Tmp, Tmp2, Tmp3;
10066
10067 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10068 // and finally the i1 pairs.
10069 // TODO: We can easily support i4/i2 legal types if any target ever does.
10070 if (Sz >= 8 && isPowerOf2_32(Sz)) {
10071 // Create the masks - repeating the pattern every byte.
10072 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
10073 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
10074 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
10075
10076 // BSWAP if the type is wider than a single byte.
10077 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
10078
10079 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10080 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
10081 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
10082 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
10083 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
10084 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10085
10086 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10087 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
10088 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
10089 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
10090 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
10091 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10092
10093 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10094 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
10095 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
10096 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
10097 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
10098 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10099 return Tmp;
10100 }
10101
10102 Tmp = DAG.getConstant(0, dl, VT);
10103 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
10104 if (I < J)
10105 Tmp2 =
10106 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
10107 else
10108 Tmp2 =
10109 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
10110
10111 APInt Shift = APInt::getOneBitSet(Sz, J);
10112 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
10113 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
10114 }
10115
10116 return Tmp;
10117}
10118
10120 assert(N->getOpcode() == ISD::VP_BITREVERSE);
10121
10122 SDLoc dl(N);
10123 EVT VT = N->getValueType(0);
10124 SDValue Op = N->getOperand(0);
10125 SDValue Mask = N->getOperand(1);
10126 SDValue EVL = N->getOperand(2);
10127 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10128 unsigned Sz = VT.getScalarSizeInBits();
10129
10130 SDValue Tmp, Tmp2, Tmp3;
10131
10132 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10133 // and finally the i1 pairs.
10134 // TODO: We can easily support i4/i2 legal types if any target ever does.
10135 if (Sz >= 8 && isPowerOf2_32(Sz)) {
10136 // Create the masks - repeating the pattern every byte.
10137 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
10138 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
10139 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
10140
10141 // BSWAP if the type is wider than a single byte.
10142 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
10143
10144 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10145 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
10146 Mask, EVL);
10147 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10148 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
10149 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
10150 Mask, EVL);
10151 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
10152 Mask, EVL);
10153 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10154
10155 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10156 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
10157 Mask, EVL);
10158 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10159 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
10160 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
10161 Mask, EVL);
10162 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
10163 Mask, EVL);
10164 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10165
10166 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10167 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
10168 Mask, EVL);
10169 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10170 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
10171 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
10172 Mask, EVL);
10173 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
10174 Mask, EVL);
10175 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10176 return Tmp;
10177 }
10178 return SDValue();
10179}
10180
10181std::pair<SDValue, SDValue>
10183 SelectionDAG &DAG) const {
10184 SDLoc SL(LD);
10185 SDValue Chain = LD->getChain();
10186 SDValue BasePTR = LD->getBasePtr();
10187 EVT SrcVT = LD->getMemoryVT();
10188 EVT DstVT = LD->getValueType(0);
10189 ISD::LoadExtType ExtType = LD->getExtensionType();
10190
10191 if (SrcVT.isScalableVector())
10192 report_fatal_error("Cannot scalarize scalable vector loads");
10193
10194 unsigned NumElem = SrcVT.getVectorNumElements();
10195
10196 EVT SrcEltVT = SrcVT.getScalarType();
10197 EVT DstEltVT = DstVT.getScalarType();
10198
10199 // A vector must always be stored in memory as-is, i.e. without any padding
10200 // between the elements, since various code depend on it, e.g. in the
10201 // handling of a bitcast of a vector type to int, which may be done with a
10202 // vector store followed by an integer load. A vector that does not have
10203 // elements that are byte-sized must therefore be stored as an integer
10204 // built out of the extracted vector elements.
10205 if (!SrcEltVT.isByteSized()) {
10206 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
10207 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
10208
10209 unsigned NumSrcBits = SrcVT.getSizeInBits();
10210 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
10211
10212 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
10213 SDValue SrcEltBitMask = DAG.getConstant(
10214 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
10215
10216 // Load the whole vector and avoid masking off the top bits as it makes
10217 // the codegen worse.
10218 SDValue Load =
10219 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
10220 LD->getPointerInfo(), SrcIntVT, LD->getBaseAlign(),
10221 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10222
10224 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10225 unsigned ShiftIntoIdx =
10226 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10227 SDValue ShiftAmount = DAG.getShiftAmountConstant(
10228 ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
10229 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
10230 SDValue Elt =
10231 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
10232 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
10233
10234 if (ExtType != ISD::NON_EXTLOAD) {
10235 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
10236 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
10237 }
10238
10239 Vals.push_back(Scalar);
10240 }
10241
10242 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10243 return std::make_pair(Value, Load.getValue(1));
10244 }
10245
10246 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
10247 assert(SrcEltVT.isByteSized());
10248
10250 SmallVector<SDValue, 8> LoadChains;
10251
10252 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10253 SDValue ScalarLoad = DAG.getExtLoad(
10254 ExtType, SL, DstEltVT, Chain, BasePTR,
10255 LD->getPointerInfo().getWithOffset(Idx * Stride), SrcEltVT,
10256 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
10257
10258 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
10259
10260 Vals.push_back(ScalarLoad.getValue(0));
10261 LoadChains.push_back(ScalarLoad.getValue(1));
10262 }
10263
10264 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
10265 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10266
10267 return std::make_pair(Value, NewChain);
10268}
10269
10271 SelectionDAG &DAG) const {
10272 SDLoc SL(ST);
10273
10274 SDValue Chain = ST->getChain();
10275 SDValue BasePtr = ST->getBasePtr();
10276 SDValue Value = ST->getValue();
10277 EVT StVT = ST->getMemoryVT();
10278
10279 if (StVT.isScalableVector())
10280 report_fatal_error("Cannot scalarize scalable vector stores");
10281
10282 // The type of the data we want to save
10283 EVT RegVT = Value.getValueType();
10284 EVT RegSclVT = RegVT.getScalarType();
10285
10286 // The type of data as saved in memory.
10287 EVT MemSclVT = StVT.getScalarType();
10288
10289 unsigned NumElem = StVT.getVectorNumElements();
10290
10291 // A vector must always be stored in memory as-is, i.e. without any padding
10292 // between the elements, since various code depend on it, e.g. in the
10293 // handling of a bitcast of a vector type to int, which may be done with a
10294 // vector store followed by an integer load. A vector that does not have
10295 // elements that are byte-sized must therefore be stored as an integer
10296 // built out of the extracted vector elements.
10297 if (!MemSclVT.isByteSized()) {
10298 unsigned NumBits = StVT.getSizeInBits();
10299 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
10300
10301 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
10302
10303 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10304 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
10305 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
10306 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
10307 unsigned ShiftIntoIdx =
10308 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10309 SDValue ShiftAmount =
10310 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
10311 SDValue ShiftedElt =
10312 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
10313 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
10314 }
10315
10316 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
10317 ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
10318 ST->getAAInfo());
10319 }
10320
10321 // Store Stride in bytes
10322 unsigned Stride = MemSclVT.getSizeInBits() / 8;
10323 assert(Stride && "Zero stride!");
10324 // Extract each of the elements from the original vector and save them into
10325 // memory individually.
10327 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10328 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
10329
10330 SDValue Ptr =
10331 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
10332
10333 // This scalar TruncStore may be illegal, but we legalize it later.
10334 SDValue Store = DAG.getTruncStore(
10335 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
10336 MemSclVT, ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
10337 ST->getAAInfo());
10338
10339 Stores.push_back(Store);
10340 }
10341
10342 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
10343}
10344
10345std::pair<SDValue, SDValue>
10347 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
10348 "unaligned indexed loads not implemented!");
10349 SDValue Chain = LD->getChain();
10350 SDValue Ptr = LD->getBasePtr();
10351 EVT VT = LD->getValueType(0);
10352 EVT LoadedVT = LD->getMemoryVT();
10353 SDLoc dl(LD);
10354 auto &MF = DAG.getMachineFunction();
10355
10356 if (VT.isFloatingPoint() || VT.isVector()) {
10357 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
10358 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
10359 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
10360 LoadedVT.isVector()) {
10361 // Scalarize the load and let the individual components be handled.
10362 return scalarizeVectorLoad(LD, DAG);
10363 }
10364
10365 // Expand to a (misaligned) integer load of the same size,
10366 // then bitconvert to floating point or vector.
10367 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
10368 LD->getMemOperand());
10369 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
10370 if (LoadedVT != VT)
10371 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
10372 ISD::ANY_EXTEND, dl, VT, Result);
10373
10374 return std::make_pair(Result, newLoad.getValue(1));
10375 }
10376
10377 // Copy the value to a (aligned) stack slot using (unaligned) integer
10378 // loads and stores, then do a (aligned) load from the stack slot.
10379 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
10380 unsigned LoadedBytes = LoadedVT.getStoreSize();
10381 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10382 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
10383
10384 // Make sure the stack slot is also aligned for the register type.
10385 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
10386 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
10388 SDValue StackPtr = StackBase;
10389 unsigned Offset = 0;
10390
10391 EVT PtrVT = Ptr.getValueType();
10392 EVT StackPtrVT = StackPtr.getValueType();
10393
10394 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10395 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10396
10397 // Do all but one copies using the full register width.
10398 for (unsigned i = 1; i < NumRegs; i++) {
10399 // Load one integer register's worth from the original location.
10400 SDValue Load = DAG.getLoad(
10401 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
10402 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
10403 // Follow the load with a store to the stack slot. Remember the store.
10404 Stores.push_back(DAG.getStore(
10405 Load.getValue(1), dl, Load, StackPtr,
10406 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
10407 // Increment the pointers.
10408 Offset += RegBytes;
10409
10410 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10411 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10412 }
10413
10414 // The last copy may be partial. Do an extending load.
10415 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
10416 8 * (LoadedBytes - Offset));
10417 SDValue Load = DAG.getExtLoad(
10418 ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
10419 LD->getPointerInfo().getWithOffset(Offset), MemVT, LD->getBaseAlign(),
10420 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10421 // Follow the load with a store to the stack slot. Remember the store.
10422 // On big-endian machines this requires a truncating store to ensure
10423 // that the bits end up in the right place.
10424 Stores.push_back(DAG.getTruncStore(
10425 Load.getValue(1), dl, Load, StackPtr,
10426 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
10427
10428 // The order of the stores doesn't matter - say it with a TokenFactor.
10429 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10430
10431 // Finally, perform the original load only redirected to the stack slot.
10432 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
10433 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
10434 LoadedVT);
10435
10436 // Callers expect a MERGE_VALUES node.
10437 return std::make_pair(Load, TF);
10438 }
10439
10440 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
10441 "Unaligned load of unsupported type.");
10442
10443 // Compute the new VT that is half the size of the old one. This is an
10444 // integer MVT.
10445 unsigned NumBits = LoadedVT.getSizeInBits();
10446 EVT NewLoadedVT;
10447 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
10448 NumBits >>= 1;
10449
10450 Align Alignment = LD->getBaseAlign();
10451 unsigned IncrementSize = NumBits / 8;
10452 ISD::LoadExtType HiExtType = LD->getExtensionType();
10453
10454 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10455 if (HiExtType == ISD::NON_EXTLOAD)
10456 HiExtType = ISD::ZEXTLOAD;
10457
10458 // Load the value in two parts
10459 SDValue Lo, Hi;
10460 if (DAG.getDataLayout().isLittleEndian()) {
10461 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10462 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10463 LD->getAAInfo());
10464
10465 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10466 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
10467 LD->getPointerInfo().getWithOffset(IncrementSize),
10468 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10469 LD->getAAInfo());
10470 } else {
10471 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10472 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10473 LD->getAAInfo());
10474
10475 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10476 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
10477 LD->getPointerInfo().getWithOffset(IncrementSize),
10478 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10479 LD->getAAInfo());
10480 }
10481
10482 // aggregate the two parts
10483 SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
10484 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
10485 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
10486
10487 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
10488 Hi.getValue(1));
10489
10490 return std::make_pair(Result, TF);
10491}
10492
10494 SelectionDAG &DAG) const {
10495 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
10496 "unaligned indexed stores not implemented!");
10497 SDValue Chain = ST->getChain();
10498 SDValue Ptr = ST->getBasePtr();
10499 SDValue Val = ST->getValue();
10500 EVT VT = Val.getValueType();
10501 Align Alignment = ST->getBaseAlign();
10502 auto &MF = DAG.getMachineFunction();
10503 EVT StoreMemVT = ST->getMemoryVT();
10504
10505 SDLoc dl(ST);
10506 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
10507 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
10508 if (isTypeLegal(intVT)) {
10509 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
10510 StoreMemVT.isVector()) {
10511 // Scalarize the store and let the individual components be handled.
10512 SDValue Result = scalarizeVectorStore(ST, DAG);
10513 return Result;
10514 }
10515 // Expand to a bitconvert of the value to the integer type of the
10516 // same size, then a (misaligned) int store.
10517 // FIXME: Does not handle truncating floating point stores!
10518 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
10519 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
10520 Alignment, ST->getMemOperand()->getFlags());
10521 return Result;
10522 }
10523 // Do a (aligned) store to a stack slot, then copy from the stack slot
10524 // to the final destination using (unaligned) integer loads and stores.
10525 MVT RegVT = getRegisterType(
10526 *DAG.getContext(),
10527 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
10528 EVT PtrVT = Ptr.getValueType();
10529 unsigned StoredBytes = StoreMemVT.getStoreSize();
10530 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10531 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
10532
10533 // Make sure the stack slot is also aligned for the register type.
10534 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
10535 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
10536
10537 // Perform the original store, only redirected to the stack slot.
10538 SDValue Store = DAG.getTruncStore(
10539 Chain, dl, Val, StackPtr,
10540 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
10541
10542 EVT StackPtrVT = StackPtr.getValueType();
10543
10544 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10545 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10547 unsigned Offset = 0;
10548
10549 // Do all but one copies using the full register width.
10550 for (unsigned i = 1; i < NumRegs; i++) {
10551 // Load one integer register's worth from the stack slot.
10552 SDValue Load = DAG.getLoad(
10553 RegVT, dl, Store, StackPtr,
10554 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
10555 // Store it to the final location. Remember the store.
10556 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
10557 ST->getPointerInfo().getWithOffset(Offset),
10558 ST->getBaseAlign(),
10559 ST->getMemOperand()->getFlags()));
10560 // Increment the pointers.
10561 Offset += RegBytes;
10562 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10563 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10564 }
10565
10566 // The last store may be partial. Do a truncating store. On big-endian
10567 // machines this requires an extending load from the stack slot to ensure
10568 // that the bits are in the right place.
10569 EVT LoadMemVT =
10570 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
10571
10572 // Load from the stack slot.
10573 SDValue Load = DAG.getExtLoad(
10574 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
10575 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
10576
10577 Stores.push_back(DAG.getTruncStore(
10578 Load.getValue(1), dl, Load, Ptr,
10579 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
10580 ST->getBaseAlign(), ST->getMemOperand()->getFlags(), ST->getAAInfo()));
10581 // The order of the stores doesn't matter - say it with a TokenFactor.
10582 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10583 return Result;
10584 }
10585
10586 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10587 "Unaligned store of unknown type.");
10588 // Get the half-size VT
10589 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
10590 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10591 unsigned IncrementSize = NumBits / 8;
10592
10593 // Divide the stored value in two parts.
10594 SDValue ShiftAmount =
10595 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
10596 SDValue Lo = Val;
10597 // If Val is a constant, replace the upper bits with 0. The SRL will constant
10598 // fold and not use the upper bits. A smaller constant may be easier to
10599 // materialize.
10600 if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
10601 Lo = DAG.getNode(
10602 ISD::AND, dl, VT, Lo,
10603 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
10604 VT));
10605 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
10606
10607 // Store the two parts
10608 SDValue Store1, Store2;
10609 Store1 = DAG.getTruncStore(Chain, dl,
10610 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10611 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
10612 ST->getMemOperand()->getFlags());
10613
10614 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10615 Store2 = DAG.getTruncStore(
10616 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10617 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
10618 ST->getMemOperand()->getFlags(), ST->getAAInfo());
10619
10620 SDValue Result =
10621 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
10622 return Result;
10623}
10624
10625SDValue
10627 const SDLoc &DL, EVT DataVT,
10628 SelectionDAG &DAG,
10629 bool IsCompressedMemory) const {
10631 EVT AddrVT = Addr.getValueType();
10632 EVT MaskVT = Mask.getValueType();
10633 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10634 "Incompatible types of Data and Mask");
10635 if (IsCompressedMemory) {
10636 // Incrementing the pointer according to number of '1's in the mask.
10637 if (DataVT.isScalableVector()) {
10638 EVT MaskExtVT = MaskVT.changeElementType(MVT::i32);
10639 SDValue MaskExt = DAG.getNode(ISD::ZERO_EXTEND, DL, MaskExtVT, Mask);
10640 Increment = DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, MaskExt);
10641 } else {
10642 EVT MaskIntVT =
10643 EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
10644 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
10645 if (MaskIntVT.getSizeInBits() < 32) {
10646 MaskInIntReg =
10647 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
10648 MaskIntVT = MVT::i32;
10649 }
10650 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
10651 }
10652 // Scale is an element size in bytes.
10653 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
10654 AddrVT);
10655 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
10656 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
10657 } else
10658 Increment = DAG.getTypeSize(DL, AddrVT, DataVT.getStoreSize());
10659
10660 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
10661}
10662
10664 EVT VecVT, const SDLoc &dl,
10665 ElementCount SubEC) {
10666 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10667 "Cannot index a scalable vector within a fixed-width vector");
10668
10669 unsigned NElts = VecVT.getVectorMinNumElements();
10670 unsigned NumSubElts = SubEC.getKnownMinValue();
10671 EVT IdxVT = Idx.getValueType();
10672
10673 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10674 // If this is a constant index and we know the value plus the number of the
10675 // elements in the subvector minus one is less than the minimum number of
10676 // elements then it's safe to return Idx.
10677 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
10678 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
10679 return Idx;
10680 SDValue VS =
10681 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
10682 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
10683 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
10684 DAG.getConstant(NumSubElts, dl, IdxVT));
10685 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
10686 }
10687 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
10688 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
10689 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
10690 DAG.getConstant(Imm, dl, IdxVT));
10691 }
10692 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
10693 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
10694 DAG.getConstant(MaxIndex, dl, IdxVT));
10695}
10696
10697SDValue
10699 EVT VecVT, SDValue Index,
10700 const SDNodeFlags PtrArithFlags) const {
10702 DAG, VecPtr, VecVT,
10704 Index, PtrArithFlags);
10705}
10706
10707SDValue
10709 EVT VecVT, EVT SubVecVT, SDValue Index,
10710 const SDNodeFlags PtrArithFlags) const {
10711 SDLoc dl(Index);
10712 // Make sure the index type is big enough to compute in.
10713 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
10714
10715 EVT EltVT = VecVT.getVectorElementType();
10716
10717 // Calculate the element offset and add it to the pointer.
10718 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
10719 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
10720 "Converting bits to bytes lost precision");
10721 assert(SubVecVT.getVectorElementType() == EltVT &&
10722 "Sub-vector must be a vector with matching element type");
10723 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
10724 SubVecVT.getVectorElementCount());
10725
10726 EVT IdxVT = Index.getValueType();
10727 if (SubVecVT.isScalableVector())
10728 Index =
10729 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10730 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
10731
10732 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10733 DAG.getConstant(EltSize, dl, IdxVT));
10734 return DAG.getMemBasePlusOffset(VecPtr, Index, dl, PtrArithFlags);
10735}
10736
10737//===----------------------------------------------------------------------===//
10738// Implementation of Emulated TLS Model
10739//===----------------------------------------------------------------------===//
10740
10742 SelectionDAG &DAG) const {
10743 // Access to address of TLS varialbe xyz is lowered to a function call:
10744 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10745 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10746 PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
10747 SDLoc dl(GA);
10748
10749 ArgListTy Args;
10750 const GlobalValue *GV =
10752 SmallString<32> NameString("__emutls_v.");
10753 NameString += GV->getName();
10754 StringRef EmuTlsVarName(NameString);
10755 const GlobalVariable *EmuTlsVar =
10756 GV->getParent()->getNamedGlobal(EmuTlsVarName);
10757 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10758 Args.emplace_back(DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT), VoidPtrType);
10759
10760 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
10761
10763 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10764 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
10765 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10766
10767 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10768 // At last for X86 targets, maybe good for other targets too?
10770 MFI.setAdjustsStack(true); // Is this only for X86 target?
10771 MFI.setHasCalls(true);
10772
10773 assert((GA->getOffset() == 0) &&
10774 "Emulated TLS must have zero offset in GlobalAddressSDNode");
10775 return CallResult.first;
10776}
10777
10779 SelectionDAG &DAG) const {
10780 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10781 if (!isCtlzFast())
10782 return SDValue();
10783 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10784 SDLoc dl(Op);
10785 if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
10786 EVT VT = Op.getOperand(0).getValueType();
10787 SDValue Zext = Op.getOperand(0);
10788 if (VT.bitsLT(MVT::i32)) {
10789 VT = MVT::i32;
10790 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
10791 }
10792 unsigned Log2b = Log2_32(VT.getSizeInBits());
10793 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
10794 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
10795 DAG.getConstant(Log2b, dl, MVT::i32));
10796 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
10797 }
10798 return SDValue();
10799}
10800
10802 SDValue Op0 = Node->getOperand(0);
10803 SDValue Op1 = Node->getOperand(1);
10804 EVT VT = Op0.getValueType();
10805 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10806 unsigned Opcode = Node->getOpcode();
10807 SDLoc DL(Node);
10808
10809 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10810 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
10812 Op0 = DAG.getFreeze(Op0);
10813 SDValue Zero = DAG.getConstant(0, DL, VT);
10814 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10815 DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
10816 }
10817
10818 // umin(x,y) -> sub(x,usubsat(x,y))
10819 // TODO: Missing freeze(Op0)?
10820 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
10822 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10823 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
10824 }
10825
10826 // umax(x,y) -> add(x,usubsat(y,x))
10827 // TODO: Missing freeze(Op0)?
10828 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
10830 return DAG.getNode(ISD::ADD, DL, VT, Op0,
10831 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
10832 }
10833
10834 // FIXME: Should really try to split the vector in case it's legal on a
10835 // subvector.
10837 return DAG.UnrollVectorOp(Node);
10838
10839 // Attempt to find an existing SETCC node that we can reuse.
10840 // TODO: Do we need a generic doesSETCCNodeExist?
10841 // TODO: Missing freeze(Op0)/freeze(Op1)?
10842 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10843 ISD::CondCode PrefCommuteCC,
10844 ISD::CondCode AltCommuteCC) {
10845 SDVTList BoolVTList = DAG.getVTList(BoolVT);
10846 for (ISD::CondCode CC : {PrefCC, AltCC}) {
10847 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10848 {Op0, Op1, DAG.getCondCode(CC)})) {
10849 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10850 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10851 }
10852 }
10853 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10854 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10855 {Op0, Op1, DAG.getCondCode(CC)})) {
10856 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10857 return DAG.getSelect(DL, VT, Cond, Op1, Op0);
10858 }
10859 }
10860 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
10861 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10862 };
10863
10864 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10865 // -> Y = (A < B) ? B : A
10866 // -> Y = (A >= B) ? A : B
10867 // -> Y = (A <= B) ? B : A
10868 switch (Opcode) {
10869 case ISD::SMAX:
10870 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10871 case ISD::SMIN:
10872 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10873 case ISD::UMAX:
10874 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10875 case ISD::UMIN:
10876 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10877 }
10878
10879 llvm_unreachable("How did we get here?");
10880}
10881
10883 unsigned Opcode = Node->getOpcode();
10884 SDValue LHS = Node->getOperand(0);
10885 SDValue RHS = Node->getOperand(1);
10886 EVT VT = LHS.getValueType();
10887 SDLoc dl(Node);
10888
10889 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10890 assert(VT.isInteger() && "Expected operands to be integers");
10891
10892 // usub.sat(a, b) -> umax(a, b) - b
10893 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
10894 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
10895 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
10896 }
10897
10898 // uadd.sat(a, b) -> umin(a, ~b) + b
10899 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
10900 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
10901 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
10902 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
10903 }
10904
10905 unsigned OverflowOp;
10906 switch (Opcode) {
10907 case ISD::SADDSAT:
10908 OverflowOp = ISD::SADDO;
10909 break;
10910 case ISD::UADDSAT:
10911 OverflowOp = ISD::UADDO;
10912 break;
10913 case ISD::SSUBSAT:
10914 OverflowOp = ISD::SSUBO;
10915 break;
10916 case ISD::USUBSAT:
10917 OverflowOp = ISD::USUBO;
10918 break;
10919 default:
10920 llvm_unreachable("Expected method to receive signed or unsigned saturation "
10921 "addition or subtraction node.");
10922 }
10923
10924 // FIXME: Should really try to split the vector in case it's legal on a
10925 // subvector.
10927 return DAG.UnrollVectorOp(Node);
10928
10929 unsigned BitWidth = LHS.getScalarValueSizeInBits();
10930 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10931 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10932 SDValue SumDiff = Result.getValue(0);
10933 SDValue Overflow = Result.getValue(1);
10934 SDValue Zero = DAG.getConstant(0, dl, VT);
10935 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
10936
10937 if (Opcode == ISD::UADDSAT) {
10939 // (LHS + RHS) | OverflowMask
10940 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10941 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
10942 }
10943 // Overflow ? 0xffff.... : (LHS + RHS)
10944 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
10945 }
10946
10947 if (Opcode == ISD::USUBSAT) {
10949 // (LHS - RHS) & ~OverflowMask
10950 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10951 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
10952 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
10953 }
10954 // Overflow ? 0 : (LHS - RHS)
10955 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
10956 }
10957
10958 if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10961
10962 KnownBits KnownLHS = DAG.computeKnownBits(LHS);
10963 KnownBits KnownRHS = DAG.computeKnownBits(RHS);
10964
10965 // If either of the operand signs are known, then they are guaranteed to
10966 // only saturate in one direction. If non-negative they will saturate
10967 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10968 //
10969 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10970 // sign of 'y' has to be flipped.
10971
10972 bool LHSIsNonNegative = KnownLHS.isNonNegative();
10973 bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10974 : KnownRHS.isNegative();
10975 if (LHSIsNonNegative || RHSIsNonNegative) {
10976 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10977 return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
10978 }
10979
10980 bool LHSIsNegative = KnownLHS.isNegative();
10981 bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10982 : KnownRHS.isNonNegative();
10983 if (LHSIsNegative || RHSIsNegative) {
10984 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10985 return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
10986 }
10987 }
10988
10989 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10991 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10992 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
10993 DAG.getConstant(BitWidth - 1, dl, VT));
10994 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
10995 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
10996}
10997
10999 unsigned Opcode = Node->getOpcode();
11000 SDValue LHS = Node->getOperand(0);
11001 SDValue RHS = Node->getOperand(1);
11002 EVT VT = LHS.getValueType();
11003 EVT ResVT = Node->getValueType(0);
11004 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11005 SDLoc dl(Node);
11006
11007 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
11008 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
11009 SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
11010 SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
11011
11012 // We can't perform arithmetic on i1 values. Extending them would
11013 // probably result in worse codegen, so let's just use two selects instead.
11014 // Some targets are also just better off using selects rather than subtraction
11015 // because one of the conditions can be merged with one of the selects.
11016 // And finally, if we don't know the contents of high bits of a boolean value
11017 // we can't perform any arithmetic either.
11019 BoolVT.getScalarSizeInBits() == 1 ||
11021 SDValue SelectZeroOrOne =
11022 DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
11023 DAG.getConstant(0, dl, ResVT));
11024 return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
11025 SelectZeroOrOne);
11026 }
11027
11029 std::swap(IsGT, IsLT);
11030 return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
11031 ResVT);
11032}
11033
11035 unsigned Opcode = Node->getOpcode();
11036 bool IsSigned = Opcode == ISD::SSHLSAT;
11037 SDValue LHS = Node->getOperand(0);
11038 SDValue RHS = Node->getOperand(1);
11039 EVT VT = LHS.getValueType();
11040 SDLoc dl(Node);
11041
11042 assert((Node->getOpcode() == ISD::SSHLSAT ||
11043 Node->getOpcode() == ISD::USHLSAT) &&
11044 "Expected a SHLSAT opcode");
11045 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
11046 assert(VT.isInteger() && "Expected operands to be integers");
11047
11049 return DAG.UnrollVectorOp(Node);
11050
11051 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
11052
11053 unsigned BW = VT.getScalarSizeInBits();
11054 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11055 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
11056 SDValue Orig =
11057 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
11058
11059 SDValue SatVal;
11060 if (IsSigned) {
11061 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
11062 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
11063 SDValue Cond =
11064 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
11065 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
11066 } else {
11067 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
11068 }
11069 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
11070 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
11071}
11072
11074 bool Signed, SDValue &Lo, SDValue &Hi,
11075 SDValue LHS, SDValue RHS,
11076 SDValue HiLHS, SDValue HiRHS) const {
11077 EVT VT = LHS.getValueType();
11078 assert(RHS.getValueType() == VT && "Mismatching operand types");
11079
11080 assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));
11081 assert((!Signed || !HiLHS) &&
11082 "Signed flag should only be set when HiLHS and RiRHS are null");
11083
11084 // We'll expand the multiplication by brute force because we have no other
11085 // options. This is a trivially-generalized version of the code from
11086 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
11087 // 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
11088 // sign bits while calculating the Hi half.
11089 unsigned Bits = VT.getSizeInBits();
11090 unsigned HalfBits = Bits / 2;
11091 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
11092 SDValue LL = DAG.getNode(ISD::AND, dl, VT, LHS, Mask);
11093 SDValue RL = DAG.getNode(ISD::AND, dl, VT, RHS, Mask);
11094
11095 SDValue T = DAG.getNode(ISD::MUL, dl, VT, LL, RL);
11096 SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
11097
11098 SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
11099 // This is always an unsigned shift.
11100 SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
11101
11102 unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
11103 SDValue LH = DAG.getNode(ShiftOpc, dl, VT, LHS, Shift);
11104 SDValue RH = DAG.getNode(ShiftOpc, dl, VT, RHS, Shift);
11105
11106 SDValue U =
11107 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RL), TH);
11108 SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
11109 SDValue UH = DAG.getNode(ShiftOpc, dl, VT, U, Shift);
11110
11111 SDValue V =
11112 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LL, RH), UL);
11113 SDValue VH = DAG.getNode(ShiftOpc, dl, VT, V, Shift);
11114
11115 Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
11116 DAG.getNode(ISD::SHL, dl, VT, V, Shift));
11117
11118 Hi = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RH),
11119 DAG.getNode(ISD::ADD, dl, VT, UH, VH));
11120
11121 // If HiLHS and HiRHS are set, multiply them by the opposite low part and add
11122 // the products to Hi.
11123 if (HiLHS) {
11124 Hi = DAG.getNode(ISD::ADD, dl, VT, Hi,
11125 DAG.getNode(ISD::ADD, dl, VT,
11126 DAG.getNode(ISD::MUL, dl, VT, HiRHS, LHS),
11127 DAG.getNode(ISD::MUL, dl, VT, RHS, HiLHS)));
11128 }
11129}
11130
11132 bool Signed, const SDValue LHS,
11133 const SDValue RHS, SDValue &Lo,
11134 SDValue &Hi) const {
11135 EVT VT = LHS.getValueType();
11136 assert(RHS.getValueType() == VT && "Mismatching operand types");
11137 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
11138 // We can fall back to a libcall with an illegal type for the MUL if we
11139 // have a libcall big enough.
11140 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
11141 if (WideVT == MVT::i16)
11142 LC = RTLIB::MUL_I16;
11143 else if (WideVT == MVT::i32)
11144 LC = RTLIB::MUL_I32;
11145 else if (WideVT == MVT::i64)
11146 LC = RTLIB::MUL_I64;
11147 else if (WideVT == MVT::i128)
11148 LC = RTLIB::MUL_I128;
11149
11150 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC);
11151 if (LibcallImpl == RTLIB::Unsupported) {
11152 forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS);
11153 return;
11154 }
11155
11156 SDValue HiLHS, HiRHS;
11157 if (Signed) {
11158 // The high part is obtained by SRA'ing all but one of the bits of low
11159 // part.
11160 unsigned LoSize = VT.getFixedSizeInBits();
11161 SDValue Shift = DAG.getShiftAmountConstant(LoSize - 1, VT, dl);
11162 HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, Shift);
11163 HiRHS = DAG.getNode(ISD::SRA, dl, VT, RHS, Shift);
11164 } else {
11165 HiLHS = DAG.getConstant(0, dl, VT);
11166 HiRHS = DAG.getConstant(0, dl, VT);
11167 }
11168
11169 // Attempt a libcall.
11170 SDValue Ret;
11172 CallOptions.setIsSigned(Signed);
11173 CallOptions.setIsPostTypeLegalization(true);
11175 // Halves of WideVT are packed into registers in different order
11176 // depending on platform endianness. This is usually handled by
11177 // the C calling convention, but we can't defer to it in
11178 // the legalizer.
11179 SDValue Args[] = {LHS, HiLHS, RHS, HiRHS};
11180 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
11181 } else {
11182 SDValue Args[] = {HiLHS, LHS, HiRHS, RHS};
11183 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
11184 }
11186 "Ret value is a collection of constituent nodes holding result.");
11187 if (DAG.getDataLayout().isLittleEndian()) {
11188 // Same as above.
11189 Lo = Ret.getOperand(0);
11190 Hi = Ret.getOperand(1);
11191 } else {
11192 Lo = Ret.getOperand(1);
11193 Hi = Ret.getOperand(0);
11194 }
11195}
11196
11197SDValue
11199 assert((Node->getOpcode() == ISD::SMULFIX ||
11200 Node->getOpcode() == ISD::UMULFIX ||
11201 Node->getOpcode() == ISD::SMULFIXSAT ||
11202 Node->getOpcode() == ISD::UMULFIXSAT) &&
11203 "Expected a fixed point multiplication opcode");
11204
11205 SDLoc dl(Node);
11206 SDValue LHS = Node->getOperand(0);
11207 SDValue RHS = Node->getOperand(1);
11208 EVT VT = LHS.getValueType();
11209 unsigned Scale = Node->getConstantOperandVal(2);
11210 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
11211 Node->getOpcode() == ISD::UMULFIXSAT);
11212 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
11213 Node->getOpcode() == ISD::SMULFIXSAT);
11214 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11215 unsigned VTSize = VT.getScalarSizeInBits();
11216
11217 if (!Scale) {
11218 // [us]mul.fix(a, b, 0) -> mul(a, b)
11219 if (!Saturating) {
11221 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11222 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
11223 SDValue Result =
11224 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11225 SDValue Product = Result.getValue(0);
11226 SDValue Overflow = Result.getValue(1);
11227 SDValue Zero = DAG.getConstant(0, dl, VT);
11228
11229 APInt MinVal = APInt::getSignedMinValue(VTSize);
11230 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
11231 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11232 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11233 // Xor the inputs, if resulting sign bit is 0 the product will be
11234 // positive, else negative.
11235 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
11236 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
11237 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
11238 return DAG.getSelect(dl, VT, Overflow, Result, Product);
11239 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
11240 SDValue Result =
11241 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11242 SDValue Product = Result.getValue(0);
11243 SDValue Overflow = Result.getValue(1);
11244
11245 APInt MaxVal = APInt::getMaxValue(VTSize);
11246 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11247 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
11248 }
11249 }
11250
11251 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
11252 "Expected scale to be less than the number of bits if signed or at "
11253 "most the number of bits if unsigned.");
11254 assert(LHS.getValueType() == RHS.getValueType() &&
11255 "Expected both operands to be the same type");
11256
11257 // Get the upper and lower bits of the result.
11258 SDValue Lo, Hi;
11259 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
11260 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
11261 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
11262 if (VT.isVector())
11263 WideVT =
11265 if (isOperationLegalOrCustom(LoHiOp, VT)) {
11266 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
11267 Lo = Result.getValue(0);
11268 Hi = Result.getValue(1);
11269 } else if (isOperationLegalOrCustom(HiOp, VT)) {
11270 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11271 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
11272 } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
11273 // Try for a multiplication using a wider type.
11274 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11275 SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
11276 SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
11277 SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
11278 Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
11279 SDValue Shifted =
11280 DAG.getNode(ISD::SRA, dl, WideVT, Res,
11281 DAG.getShiftAmountConstant(VTSize, WideVT, dl));
11282 Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
11283 } else if (VT.isVector()) {
11284 return SDValue();
11285 } else {
11286 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
11287 }
11288
11289 if (Scale == VTSize)
11290 // Result is just the top half since we'd be shifting by the width of the
11291 // operand. Overflow impossible so this works for both UMULFIX and
11292 // UMULFIXSAT.
11293 return Hi;
11294
11295 // The result will need to be shifted right by the scale since both operands
11296 // are scaled. The result is given to us in 2 halves, so we only want part of
11297 // both in the result.
11298 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
11299 DAG.getShiftAmountConstant(Scale, VT, dl));
11300 if (!Saturating)
11301 return Result;
11302
11303 if (!Signed) {
11304 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
11305 // widened multiplication) aren't all zeroes.
11306
11307 // Saturate to max if ((Hi >> Scale) != 0),
11308 // which is the same as if (Hi > ((1 << Scale) - 1))
11309 APInt MaxVal = APInt::getMaxValue(VTSize);
11310 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
11311 dl, VT);
11312 Result = DAG.getSelectCC(dl, Hi, LowMask,
11313 DAG.getConstant(MaxVal, dl, VT), Result,
11314 ISD::SETUGT);
11315
11316 return Result;
11317 }
11318
11319 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
11320 // widened multiplication) aren't all ones or all zeroes.
11321
11322 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
11323 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
11324
11325 if (Scale == 0) {
11326 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
11327 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
11328 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
11329 // Saturated to SatMin if wide product is negative, and SatMax if wide
11330 // product is positive ...
11331 SDValue Zero = DAG.getConstant(0, dl, VT);
11332 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
11333 ISD::SETLT);
11334 // ... but only if we overflowed.
11335 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
11336 }
11337
11338 // We handled Scale==0 above so all the bits to examine is in Hi.
11339
11340 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
11341 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
11342 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
11343 dl, VT);
11344 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
11345 // Saturate to min if (Hi >> (Scale - 1)) < -1),
11346 // which is the same as if (HI < (-1 << (Scale - 1))
11347 SDValue HighMask =
11348 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
11349 dl, VT);
11350 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
11351 return Result;
11352}
11353
11354SDValue
11356 SDValue LHS, SDValue RHS,
11357 unsigned Scale, SelectionDAG &DAG) const {
11358 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
11359 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
11360 "Expected a fixed point division opcode");
11361
11362 EVT VT = LHS.getValueType();
11363 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
11364 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
11365 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11366
11367 // If there is enough room in the type to upscale the LHS or downscale the
11368 // RHS before the division, we can perform it in this type without having to
11369 // resize. For signed operations, the LHS headroom is the number of
11370 // redundant sign bits, and for unsigned ones it is the number of zeroes.
11371 // The headroom for the RHS is the number of trailing zeroes.
11372 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
11374 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11375
11376 // For signed saturating operations, we need to be able to detect true integer
11377 // division overflow; that is, when you have MIN / -EPS. However, this
11378 // is undefined behavior and if we emit divisions that could take such
11379 // values it may cause undesired behavior (arithmetic exceptions on x86, for
11380 // example).
11381 // Avoid this by requiring an extra bit so that we never get this case.
11382 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11383 // signed saturating division, we need to emit a whopping 32-bit division.
11384 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
11385 return SDValue();
11386
11387 unsigned LHSShift = std::min(LHSLead, Scale);
11388 unsigned RHSShift = Scale - LHSShift;
11389
11390 // At this point, we know that if we shift the LHS up by LHSShift and the
11391 // RHS down by RHSShift, we can emit a regular division with a final scaling
11392 // factor of Scale.
11393
11394 if (LHSShift)
11395 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
11396 DAG.getShiftAmountConstant(LHSShift, VT, dl));
11397 if (RHSShift)
11398 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
11399 DAG.getShiftAmountConstant(RHSShift, VT, dl));
11400
11401 SDValue Quot;
11402 if (Signed) {
11403 // For signed operations, if the resulting quotient is negative and the
11404 // remainder is nonzero, subtract 1 from the quotient to round towards
11405 // negative infinity.
11406 SDValue Rem;
11407 // FIXME: Ideally we would always produce an SDIVREM here, but if the
11408 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
11409 // we couldn't just form a libcall, but the type legalizer doesn't do it.
11410 if (isTypeLegal(VT) &&
11412 Quot = DAG.getNode(ISD::SDIVREM, dl,
11413 DAG.getVTList(VT, VT),
11414 LHS, RHS);
11415 Rem = Quot.getValue(1);
11416 Quot = Quot.getValue(0);
11417 } else {
11418 Quot = DAG.getNode(ISD::SDIV, dl, VT,
11419 LHS, RHS);
11420 Rem = DAG.getNode(ISD::SREM, dl, VT,
11421 LHS, RHS);
11422 }
11423 SDValue Zero = DAG.getConstant(0, dl, VT);
11424 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
11425 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
11426 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
11427 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
11428 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
11429 DAG.getConstant(1, dl, VT));
11430 Quot = DAG.getSelect(dl, VT,
11431 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
11432 Sub1, Quot);
11433 } else
11434 Quot = DAG.getNode(ISD::UDIV, dl, VT,
11435 LHS, RHS);
11436
11437 return Quot;
11438}
11439
11441 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11442 SDLoc dl(Node);
11443 SDValue LHS = Node->getOperand(0);
11444 SDValue RHS = Node->getOperand(1);
11445 bool IsAdd = Node->getOpcode() == ISD::UADDO;
11446
11447 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
11448 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
11449 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
11450 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
11451 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
11452 { LHS, RHS, CarryIn });
11453 Result = SDValue(NodeCarry.getNode(), 0);
11454 Overflow = SDValue(NodeCarry.getNode(), 1);
11455 return;
11456 }
11457
11458 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11459 LHS.getValueType(), LHS, RHS);
11460
11461 EVT ResultType = Node->getValueType(1);
11462 EVT SetCCType = getSetCCResultType(
11463 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11464 SDValue SetCC;
11465 if (IsAdd && isOneConstant(RHS)) {
11466 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
11467 // the live range of X. We assume comparing with 0 is cheap.
11468 // The general case (X + C) < C is not necessarily beneficial. Although we
11469 // reduce the live range of X, we may introduce the materialization of
11470 // constant C.
11471 SetCC =
11472 DAG.getSetCC(dl, SetCCType, Result,
11473 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
11474 } else if (IsAdd && isAllOnesConstant(RHS)) {
11475 // Special case: uaddo X, -1 overflows if X != 0.
11476 SetCC =
11477 DAG.getSetCC(dl, SetCCType, LHS,
11478 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
11479 } else {
11480 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
11481 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
11482 }
11483 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11484}
11485
11487 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11488 SDLoc dl(Node);
11489 SDValue LHS = Node->getOperand(0);
11490 SDValue RHS = Node->getOperand(1);
11491 bool IsAdd = Node->getOpcode() == ISD::SADDO;
11492
11493 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11494 LHS.getValueType(), LHS, RHS);
11495
11496 EVT ResultType = Node->getValueType(1);
11497 EVT OType = getSetCCResultType(
11498 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11499
11500 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
11501 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
11502 if (isOperationLegal(OpcSat, LHS.getValueType())) {
11503 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
11504 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
11505 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11506 return;
11507 }
11508
11509 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
11510
11511 // For an addition, the result should be less than one of the operands (LHS)
11512 // if and only if the other operand (RHS) is negative, otherwise there will
11513 // be overflow.
11514 // For a subtraction, the result should be less than one of the operands
11515 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11516 // otherwise there will be overflow.
11517 SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
11518 SDValue ConditionRHS =
11519 DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
11520
11521 Overflow = DAG.getBoolExtOrTrunc(
11522 DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
11523 ResultType, ResultType);
11524}
11525
11527 SDValue &Overflow, SelectionDAG &DAG) const {
11528 SDLoc dl(Node);
11529 EVT VT = Node->getValueType(0);
11530 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11531 SDValue LHS = Node->getOperand(0);
11532 SDValue RHS = Node->getOperand(1);
11533 bool isSigned = Node->getOpcode() == ISD::SMULO;
11534
11535 // For power-of-two multiplications we can use a simpler shift expansion.
11536 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
11537 const APInt &C = RHSC->getAPIntValue();
11538 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
11539 if (C.isPowerOf2()) {
11540 // smulo(x, signed_min) is same as umulo(x, signed_min).
11541 bool UseArithShift = isSigned && !C.isMinSignedValue();
11542 SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
11543 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
11544 Overflow = DAG.getSetCC(dl, SetCCVT,
11545 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
11546 dl, VT, Result, ShiftAmt),
11547 LHS, ISD::SETNE);
11548 return true;
11549 }
11550 }
11551
11552 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
11553 if (VT.isVector())
11554 WideVT =
11556
11557 SDValue BottomHalf;
11558 SDValue TopHalf;
11559 static const unsigned Ops[2][3] =
11562 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
11563 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11564 TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
11565 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
11566 BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
11567 RHS);
11568 TopHalf = BottomHalf.getValue(1);
11569 } else if (isTypeLegal(WideVT)) {
11570 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
11571 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
11572 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
11573 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
11574 SDValue ShiftAmt =
11575 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
11576 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
11577 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
11578 } else {
11579 if (VT.isVector())
11580 return false;
11581
11582 forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
11583 }
11584
11585 Result = BottomHalf;
11586 if (isSigned) {
11587 SDValue ShiftAmt = DAG.getShiftAmountConstant(
11588 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
11589 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
11590 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
11591 } else {
11592 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
11593 DAG.getConstant(0, dl, VT), ISD::SETNE);
11594 }
11595
11596 // Truncate the result if SetCC returns a larger type than needed.
11597 EVT RType = Node->getValueType(1);
11598 if (RType.bitsLT(Overflow.getValueType()))
11599 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
11600
11601 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11602 "Unexpected result type for S/UMULO legalization");
11603 return true;
11604}
11605
11607 SDLoc dl(Node);
11608 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11609 SDValue Op = Node->getOperand(0);
11610 EVT VT = Op.getValueType();
11611
11612 // Try to use a shuffle reduction for power of two vectors.
11613 if (VT.isPow2VectorType()) {
11615 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
11616 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
11617 break;
11618
11619 SDValue Lo, Hi;
11620 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
11621 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
11622 VT = HalfVT;
11623
11624 // Stop if splitting is enough to make the reduction legal.
11625 if (isOperationLegalOrCustom(Node->getOpcode(), HalfVT))
11626 return DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Op,
11627 Node->getFlags());
11628 }
11629 }
11630
11631 if (VT.isScalableVector())
11633 "Expanding reductions for scalable vectors is undefined.");
11634
11635 EVT EltVT = VT.getVectorElementType();
11636 unsigned NumElts = VT.getVectorNumElements();
11637
11639 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
11640
11641 SDValue Res = Ops[0];
11642 for (unsigned i = 1; i < NumElts; i++)
11643 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
11644
11645 // Result type may be wider than element type.
11646 if (EltVT != Node->getValueType(0))
11647 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
11648 return Res;
11649}
11650
11652 SDLoc dl(Node);
11653 SDValue AccOp = Node->getOperand(0);
11654 SDValue VecOp = Node->getOperand(1);
11655 SDNodeFlags Flags = Node->getFlags();
11656
11657 EVT VT = VecOp.getValueType();
11658 EVT EltVT = VT.getVectorElementType();
11659
11660 if (VT.isScalableVector())
11662 "Expanding reductions for scalable vectors is undefined.");
11663
11664 unsigned NumElts = VT.getVectorNumElements();
11665
11667 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
11668
11669 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11670
11671 SDValue Res = AccOp;
11672 for (unsigned i = 0; i < NumElts; i++)
11673 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
11674
11675 return Res;
11676}
11677
11679 SelectionDAG &DAG) const {
11680 EVT VT = Node->getValueType(0);
11681 SDLoc dl(Node);
11682 bool isSigned = Node->getOpcode() == ISD::SREM;
11683 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
11684 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
11685 SDValue Dividend = Node->getOperand(0);
11686 SDValue Divisor = Node->getOperand(1);
11687 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
11688 SDVTList VTs = DAG.getVTList(VT, VT);
11689 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
11690 return true;
11691 }
11692 if (isOperationLegalOrCustom(DivOpc, VT)) {
11693 // X % Y -> X-X/Y*Y
11694 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
11695 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
11696 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
11697 return true;
11698 }
11699 return false;
11700}
11701
11703 SelectionDAG &DAG) const {
11704 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
11705 SDLoc dl(SDValue(Node, 0));
11706 SDValue Src = Node->getOperand(0);
11707
11708 // DstVT is the result type, while SatVT is the size to which we saturate
11709 EVT SrcVT = Src.getValueType();
11710 EVT DstVT = Node->getValueType(0);
11711
11712 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
11713 unsigned SatWidth = SatVT.getScalarSizeInBits();
11714 unsigned DstWidth = DstVT.getScalarSizeInBits();
11715 assert(SatWidth <= DstWidth &&
11716 "Expected saturation width smaller than result width");
11717
11718 // Determine minimum and maximum integer values and their corresponding
11719 // floating-point values.
11720 APInt MinInt, MaxInt;
11721 if (IsSigned) {
11722 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
11723 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
11724 } else {
11725 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
11726 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
11727 }
11728
11729 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11730 // libcall emission cannot handle this. Large result types will fail.
11731 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
11732 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
11733 SrcVT = Src.getValueType();
11734 }
11735
11736 const fltSemantics &Sem = SrcVT.getFltSemantics();
11737 APFloat MinFloat(Sem);
11738 APFloat MaxFloat(Sem);
11739
11740 APFloat::opStatus MinStatus =
11741 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
11742 APFloat::opStatus MaxStatus =
11743 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
11744 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
11745 !(MaxStatus & APFloat::opStatus::opInexact);
11746
11747 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
11748 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
11749
11750 // If the integer bounds are exactly representable as floats and min/max are
11751 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11752 // of comparisons and selects.
11753 bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
11755 if (AreExactFloatBounds && MinMaxLegal) {
11756 SDValue Clamped = Src;
11757
11758 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11759 Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
11760 // Clamp by MaxFloat from above. NaN cannot occur.
11761 Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
11762 // Convert clamped value to integer.
11763 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
11764 dl, DstVT, Clamped);
11765
11766 // In the unsigned case we're done, because we mapped NaN to MinFloat,
11767 // which will cast to zero.
11768 if (!IsSigned)
11769 return FpToInt;
11770
11771 // Otherwise, select 0 if Src is NaN.
11772 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11773 EVT SetCCVT =
11774 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11775 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11776 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
11777 }
11778
11779 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
11780 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
11781
11782 // Result of direct conversion. The assumption here is that the operation is
11783 // non-trapping and it's fine to apply it to an out-of-range value if we
11784 // select it away later.
11785 SDValue FpToInt =
11786 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
11787
11788 SDValue Select = FpToInt;
11789
11790 EVT SetCCVT =
11791 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11792
11793 // If Src ULT MinFloat, select MinInt. In particular, this also selects
11794 // MinInt if Src is NaN.
11795 SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
11796 Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
11797 // If Src OGT MaxFloat, select MaxInt.
11798 SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
11799 Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
11800
11801 // In the unsigned case we are done, because we mapped NaN to MinInt, which
11802 // is already zero.
11803 if (!IsSigned)
11804 return Select;
11805
11806 // Otherwise, select 0 if Src is NaN.
11807 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11808 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11809 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
11810}
11811
11813 const SDLoc &dl,
11814 SelectionDAG &DAG) const {
11815 EVT OperandVT = Op.getValueType();
11816 if (OperandVT.getScalarType() == ResultVT.getScalarType())
11817 return Op;
11818 EVT ResultIntVT = ResultVT.changeTypeToInteger();
11819 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11820 // can induce double-rounding which may alter the results. We can
11821 // correct for this using a trick explained in: Boldo, Sylvie, and
11822 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11823 // World Congress. 2005.
11824 SDValue Narrow = DAG.getFPExtendOrRound(Op, dl, ResultVT);
11825 SDValue NarrowAsWide = DAG.getFPExtendOrRound(Narrow, dl, OperandVT);
11826
11827 // We can keep the narrow value as-is if narrowing was exact (no
11828 // rounding error), the wide value was NaN (the narrow value is also
11829 // NaN and should be preserved) or if we rounded to the odd value.
11830 SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, Narrow);
11831 SDValue One = DAG.getConstant(1, dl, ResultIntVT);
11832 SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
11833 SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
11834 EVT ResultIntVTCCVT = getSetCCResultType(
11835 DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
11836 SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
11837 // The result is already odd so we don't need to do anything.
11838 SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
11839
11840 EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11841 Op.getValueType());
11842 // We keep results which are exact, odd or NaN.
11843 SDValue KeepNarrow =
11844 DAG.getSetCC(dl, WideSetCCVT, Op, NarrowAsWide, ISD::SETUEQ);
11845 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
11846 // We morally performed a round-down if AbsNarrow is smaller than
11847 // AbsWide.
11848 SDValue AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
11849 SDValue AbsNarrowAsWide = DAG.getNode(ISD::FABS, dl, OperandVT, NarrowAsWide);
11850 SDValue NarrowIsRd =
11851 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
11852 // If the narrow value is odd or exact, pick it.
11853 // Otherwise, narrow is even and corresponds to either the rounded-up
11854 // or rounded-down value. If narrow is the rounded-down value, we want
11855 // the rounded-up value as it will be odd.
11856 SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
11857 SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
11858 Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
11859 return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
11860}
11861
11863 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
11864 SDValue Op = Node->getOperand(0);
11865 EVT VT = Node->getValueType(0);
11866 SDLoc dl(Node);
11867 if (VT.getScalarType() == MVT::bf16) {
11868 if (Node->getConstantOperandVal(1) == 1) {
11869 return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
11870 }
11871 EVT OperandVT = Op.getValueType();
11872 SDValue IsNaN = DAG.getSetCC(
11873 dl,
11874 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
11875 Op, Op, ISD::SETUO);
11876
11877 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11878 // can induce double-rounding which may alter the results. We can
11879 // correct for this using a trick explained in: Boldo, Sylvie, and
11880 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11881 // World Congress. 2005.
11882 EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
11883 EVT I32 = F32.changeTypeToInteger();
11884 Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
11885 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11886
11887 // Conversions should set NaN's quiet bit. This also prevents NaNs from
11888 // turning into infinities.
11889 SDValue NaN =
11890 DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
11891
11892 // Factor in the contribution of the low 16 bits.
11893 SDValue One = DAG.getConstant(1, dl, I32);
11894 SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
11895 DAG.getShiftAmountConstant(16, I32, dl));
11896 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
11897 SDValue RoundingBias =
11898 DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
11899 SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
11900
11901 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11902 // 0x80000000.
11903 Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
11904
11905 // Now that we have rounded, shift the bits into position.
11906 Op = DAG.getNode(ISD::SRL, dl, I32, Op,
11907 DAG.getShiftAmountConstant(16, I32, dl));
11908 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11909 EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
11910 Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
11911 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
11912 }
11913 return SDValue();
11914}
11915
11917 SelectionDAG &DAG) const {
11918 assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11919 assert(Node->getValueType(0).isScalableVector() &&
11920 "Fixed length vector types expected to use SHUFFLE_VECTOR!");
11921
11922 EVT VT = Node->getValueType(0);
11923 SDValue V1 = Node->getOperand(0);
11924 SDValue V2 = Node->getOperand(1);
11925 int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
11926 SDLoc DL(Node);
11927
11928 // Expand through memory thusly:
11929 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11930 // Store V1, Ptr
11931 // Store V2, Ptr + sizeof(V1)
11932 // If (Imm < 0)
11933 // TrailingElts = -Imm
11934 // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
11935 // else
11936 // Ptr = Ptr + (Imm * sizeof(VT.Elt))
11937 // Res = Load Ptr
11938
11939 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
11940
11942 VT.getVectorElementCount() * 2);
11943 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
11944 EVT PtrVT = StackPtr.getValueType();
11945 auto &MF = DAG.getMachineFunction();
11946 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11947 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
11948
11949 // Store the lo part of CONCAT_VECTORS(V1, V2)
11950 SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
11951 // Store the hi part of CONCAT_VECTORS(V1, V2)
11952 SDValue VTBytes = DAG.getTypeSize(DL, PtrVT, VT.getStoreSize());
11953 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, VTBytes);
11954 SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
11955
11956 if (Imm >= 0) {
11957 // Load back the required element. getVectorElementPointer takes care of
11958 // clamping the index if it's out-of-bounds.
11959 StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
11960 // Load the spliced result
11961 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
11963 }
11964
11965 uint64_t TrailingElts = -Imm;
11966
11967 // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11968 TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11969 SDValue TrailingBytes =
11970 DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
11971
11972 if (TrailingElts > VT.getVectorMinNumElements())
11973 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VTBytes);
11974
11975 // Calculate the start address of the spliced result.
11976 StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
11977
11978 // Load the spliced result
11979 return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
11981}
11982
11984 SelectionDAG &DAG) const {
11985 SDLoc DL(Node);
11986 SDValue Vec = Node->getOperand(0);
11987 SDValue Mask = Node->getOperand(1);
11988 SDValue Passthru = Node->getOperand(2);
11989
11990 EVT VecVT = Vec.getValueType();
11991 EVT ScalarVT = VecVT.getScalarType();
11992 EVT MaskVT = Mask.getValueType();
11993 EVT MaskScalarVT = MaskVT.getScalarType();
11994
11995 // Needs to be handled by targets that have scalable vector types.
11996 if (VecVT.isScalableVector())
11997 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
11998
11999 SDValue StackPtr = DAG.CreateStackTemporary(
12000 VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
12001 int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12002 MachinePointerInfo PtrInfo =
12004
12005 MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
12006 SDValue Chain = DAG.getEntryNode();
12007 SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
12008
12009 bool HasPassthru = !Passthru.isUndef();
12010
12011 // If we have a passthru vector, store it on the stack, overwrite the matching
12012 // positions and then re-write the last element that was potentially
12013 // overwritten even though mask[i] = false.
12014 if (HasPassthru)
12015 Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo);
12016
12017 SDValue LastWriteVal;
12018 APInt PassthruSplatVal;
12019 bool IsSplatPassthru =
12020 ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
12021
12022 if (IsSplatPassthru) {
12023 // As we do not know which position we wrote to last, we cannot simply
12024 // access that index from the passthru vector. So we first check if passthru
12025 // is a splat vector, to use any element ...
12026 LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
12027 } else if (HasPassthru) {
12028 // ... if it is not a splat vector, we need to get the passthru value at
12029 // position = popcount(mask) and re-load it from the stack before it is
12030 // overwritten in the loop below.
12031 EVT PopcountVT = ScalarVT.changeTypeToInteger();
12032 SDValue Popcount = DAG.getNode(
12033 ISD::TRUNCATE, DL, MaskVT.changeVectorElementType(MVT::i1), Mask);
12034 Popcount =
12036 MaskVT.changeVectorElementType(PopcountVT), Popcount);
12037 Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, PopcountVT, Popcount);
12038 SDValue LastElmtPtr =
12039 getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
12040 LastWriteVal = DAG.getLoad(
12041 ScalarVT, DL, Chain, LastElmtPtr,
12043 Chain = LastWriteVal.getValue(1);
12044 }
12045
12046 unsigned NumElms = VecVT.getVectorNumElements();
12047 for (unsigned I = 0; I < NumElms; I++) {
12048 SDValue ValI = DAG.getExtractVectorElt(DL, ScalarVT, Vec, I);
12049 SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
12050 Chain = DAG.getStore(
12051 Chain, DL, ValI, OutPtr,
12053
12054 // Get the mask value and add it to the current output position. This
12055 // either increments by 1 if MaskI is true or adds 0 otherwise.
12056 // Freeze in case we have poison/undef mask entries.
12057 SDValue MaskI = DAG.getExtractVectorElt(DL, MaskScalarVT, Mask, I);
12058 MaskI = DAG.getFreeze(MaskI);
12059 MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
12060 MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
12061 OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
12062
12063 if (HasPassthru && I == NumElms - 1) {
12064 SDValue EndOfVector =
12065 DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
12066 SDValue AllLanesSelected =
12067 DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
12068 OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
12069 OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
12070
12071 // Re-write the last ValI if all lanes were selected. Otherwise,
12072 // overwrite the last write it with the passthru value.
12073 LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,
12074 LastWriteVal, SDNodeFlags::Unpredictable);
12075 Chain = DAG.getStore(
12076 Chain, DL, LastWriteVal, OutPtr,
12078 }
12079 }
12080
12081 return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
12082}
12083
12085 SelectionDAG &DAG) const {
12086 SDLoc DL(N);
12087 SDValue Acc = N->getOperand(0);
12088 SDValue MulLHS = N->getOperand(1);
12089 SDValue MulRHS = N->getOperand(2);
12090 EVT AccVT = Acc.getValueType();
12091 EVT MulOpVT = MulLHS.getValueType();
12092
12093 EVT ExtMulOpVT =
12095 MulOpVT.getVectorElementCount());
12096
12097 unsigned ExtOpcLHS, ExtOpcRHS;
12098 switch (N->getOpcode()) {
12099 default:
12100 llvm_unreachable("Unexpected opcode");
12102 ExtOpcLHS = ExtOpcRHS = ISD::ZERO_EXTEND;
12103 break;
12105 ExtOpcLHS = ExtOpcRHS = ISD::SIGN_EXTEND;
12106 break;
12108 ExtOpcLHS = ExtOpcRHS = ISD::FP_EXTEND;
12109 break;
12110 }
12111
12112 if (ExtMulOpVT != MulOpVT) {
12113 MulLHS = DAG.getNode(ExtOpcLHS, DL, ExtMulOpVT, MulLHS);
12114 MulRHS = DAG.getNode(ExtOpcRHS, DL, ExtMulOpVT, MulRHS);
12115 }
12116 SDValue Input = MulLHS;
12117 if (N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA) {
12118 if (!llvm::isOneOrOneSplatFP(MulRHS))
12119 Input = DAG.getNode(ISD::FMUL, DL, ExtMulOpVT, MulLHS, MulRHS);
12120 } else if (!llvm::isOneOrOneSplat(MulRHS)) {
12121 Input = DAG.getNode(ISD::MUL, DL, ExtMulOpVT, MulLHS, MulRHS);
12122 }
12123
12124 unsigned Stride = AccVT.getVectorMinNumElements();
12125 unsigned ScaleFactor = MulOpVT.getVectorMinNumElements() / Stride;
12126
12127 // Collect all of the subvectors
12128 std::deque<SDValue> Subvectors = {Acc};
12129 for (unsigned I = 0; I < ScaleFactor; I++)
12130 Subvectors.push_back(DAG.getExtractSubvector(DL, AccVT, Input, I * Stride));
12131
12132 unsigned FlatNode =
12133 N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA ? ISD::FADD : ISD::ADD;
12134
12135 // Flatten the subvector tree
12136 while (Subvectors.size() > 1) {
12137 Subvectors.push_back(
12138 DAG.getNode(FlatNode, DL, AccVT, {Subvectors[0], Subvectors[1]}));
12139 Subvectors.pop_front();
12140 Subvectors.pop_front();
12141 }
12142
12143 assert(Subvectors.size() == 1 &&
12144 "There should only be one subvector after tree flattening");
12145
12146 return Subvectors[0];
12147}
12148
12149/// Given a store node \p StoreNode, return true if it is safe to fold that node
12150/// into \p FPNode, which expands to a library call with output pointers.
12152 SDNode *FPNode) {
12154 SmallVector<const SDNode *, 8> DeferredNodes;
12156
12157 // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode).
12158 for (SDValue Op : StoreNode->ops())
12159 if (Op.getNode() != FPNode)
12160 Worklist.push_back(Op.getNode());
12161
12163 while (!Worklist.empty()) {
12164 const SDNode *Node = Worklist.pop_back_val();
12165 auto [_, Inserted] = Visited.insert(Node);
12166 if (!Inserted)
12167 continue;
12168
12169 if (MaxSteps > 0 && Visited.size() >= MaxSteps)
12170 return false;
12171
12172 // Reached the FPNode (would result in a cycle).
12173 // OR Reached CALLSEQ_START (would result in nested call sequences).
12174 if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START)
12175 return false;
12176
12177 if (Node->getOpcode() == ISD::CALLSEQ_END) {
12178 // Defer looking into call sequences (so we can check we're outside one).
12179 // We still need to look through these for the predecessor check.
12180 DeferredNodes.push_back(Node);
12181 continue;
12182 }
12183
12184 for (SDValue Op : Node->ops())
12185 Worklist.push_back(Op.getNode());
12186 }
12187
12188 // True if we're outside a call sequence and don't have the FPNode as a
12189 // predecessor. No cycles or nested call sequences possible.
12190 return !SDNode::hasPredecessorHelper(FPNode, Visited, DeferredNodes,
12191 MaxSteps);
12192}
12193
12195 SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node,
12197 std::optional<unsigned> CallRetResNo) const {
12198 if (LC == RTLIB::UNKNOWN_LIBCALL)
12199 return false;
12200
12201 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC);
12202 if (LibcallImpl == RTLIB::Unsupported)
12203 return false;
12204
12205 LLVMContext &Ctx = *DAG.getContext();
12206 EVT VT = Node->getValueType(0);
12207 unsigned NumResults = Node->getNumValues();
12208
12209 // Find users of the node that store the results (and share input chains). The
12210 // destination pointers can be used instead of creating stack allocations.
12211 SDValue StoresInChain;
12212 SmallVector<StoreSDNode *, 2> ResultStores(NumResults);
12213 for (SDNode *User : Node->users()) {
12215 continue;
12216 auto *ST = cast<StoreSDNode>(User);
12217 SDValue StoreValue = ST->getValue();
12218 unsigned ResNo = StoreValue.getResNo();
12219 // Ensure the store corresponds to an output pointer.
12220 if (CallRetResNo == ResNo)
12221 continue;
12222 // Ensure the store to the default address space and not atomic or volatile.
12223 if (!ST->isSimple() || ST->getAddressSpace() != 0)
12224 continue;
12225 // Ensure all store chains are the same (so they don't alias).
12226 if (StoresInChain && ST->getChain() != StoresInChain)
12227 continue;
12228 // Ensure the store is properly aligned.
12229 Type *StoreType = StoreValue.getValueType().getTypeForEVT(Ctx);
12230 if (ST->getAlign() <
12231 DAG.getDataLayout().getABITypeAlign(StoreType->getScalarType()))
12232 continue;
12233 // Avoid:
12234 // 1. Creating cyclic dependencies.
12235 // 2. Expanding the node to a call within a call sequence.
12237 continue;
12238 ResultStores[ResNo] = ST;
12239 StoresInChain = ST->getChain();
12240 }
12241
12242 ArgListTy Args;
12243
12244 // Pass the arguments.
12245 for (const SDValue &Op : Node->op_values()) {
12246 EVT ArgVT = Op.getValueType();
12247 Type *ArgTy = ArgVT.getTypeForEVT(Ctx);
12248 Args.emplace_back(Op, ArgTy);
12249 }
12250
12251 // Pass the output pointers.
12252 SmallVector<SDValue, 2> ResultPtrs(NumResults);
12254 for (auto [ResNo, ST] : llvm::enumerate(ResultStores)) {
12255 if (ResNo == CallRetResNo)
12256 continue;
12257 EVT ResVT = Node->getValueType(ResNo);
12258 SDValue ResultPtr = ST ? ST->getBasePtr() : DAG.CreateStackTemporary(ResVT);
12259 ResultPtrs[ResNo] = ResultPtr;
12260 Args.emplace_back(ResultPtr, PointerTy);
12261 }
12262
12263 SDLoc DL(Node);
12264
12266 // Pass the vector mask (if required).
12267 EVT MaskVT = getSetCCResultType(DAG.getDataLayout(), Ctx, VT);
12268 SDValue Mask = DAG.getBoolConstant(true, DL, MaskVT, VT);
12269 Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx));
12270 }
12271
12272 Type *RetType = CallRetResNo.has_value()
12273 ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx)
12274 : Type::getVoidTy(Ctx);
12275 SDValue InChain = StoresInChain ? StoresInChain : DAG.getEntryNode();
12276 SDValue Callee =
12277 DAG.getExternalSymbol(LibcallImpl, getPointerTy(DAG.getDataLayout()));
12279 CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(
12280 getLibcallImplCallingConv(LibcallImpl), RetType, Callee, std::move(Args));
12281
12282 auto [Call, CallChain] = LowerCallTo(CLI);
12283
12284 for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) {
12285 if (ResNo == CallRetResNo) {
12286 Results.push_back(Call);
12287 continue;
12288 }
12289 MachinePointerInfo PtrInfo;
12290 SDValue LoadResult = DAG.getLoad(Node->getValueType(ResNo), DL, CallChain,
12291 ResultPtr, PtrInfo);
12292 SDValue OutChain = LoadResult.getValue(1);
12293
12294 if (StoreSDNode *ST = ResultStores[ResNo]) {
12295 // Replace store with the library call.
12296 DAG.ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain);
12297 PtrInfo = ST->getPointerInfo();
12298 } else {
12300 DAG.getMachineFunction(),
12301 cast<FrameIndexSDNode>(ResultPtr)->getIndex());
12302 }
12303
12304 Results.push_back(LoadResult);
12305 }
12306
12307 return true;
12308}
12309
12311 SDValue &LHS, SDValue &RHS,
12312 SDValue &CC, SDValue Mask,
12313 SDValue EVL, bool &NeedInvert,
12314 const SDLoc &dl, SDValue &Chain,
12315 bool IsSignaling) const {
12316 MVT OpVT = LHS.getSimpleValueType();
12317 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
12318 NeedInvert = false;
12319 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
12320 bool IsNonVP = !EVL;
12321 switch (getCondCodeAction(CCCode, OpVT)) {
12322 default:
12323 llvm_unreachable("Unknown condition code action!");
12325 // Nothing to do.
12326 break;
12329 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
12330 std::swap(LHS, RHS);
12331 CC = DAG.getCondCode(InvCC);
12332 return true;
12333 }
12334 // Swapping operands didn't work. Try inverting the condition.
12335 bool NeedSwap = false;
12336 InvCC = getSetCCInverse(CCCode, OpVT);
12337 if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
12338 // If inverting the condition is not enough, try swapping operands
12339 // on top of it.
12340 InvCC = ISD::getSetCCSwappedOperands(InvCC);
12341 NeedSwap = true;
12342 }
12343 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
12344 CC = DAG.getCondCode(InvCC);
12345 NeedInvert = true;
12346 if (NeedSwap)
12347 std::swap(LHS, RHS);
12348 return true;
12349 }
12350
12351 // Special case: expand i1 comparisons using logical operations.
12352 if (OpVT == MVT::i1) {
12353 SDValue Ret;
12354 switch (CCCode) {
12355 default:
12356 llvm_unreachable("Unknown integer setcc!");
12357 case ISD::SETEQ: // X == Y --> ~(X ^ Y)
12358 Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS),
12359 MVT::i1);
12360 break;
12361 case ISD::SETNE: // X != Y --> (X ^ Y)
12362 Ret = DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS);
12363 break;
12364 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
12365 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
12366 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, RHS,
12367 DAG.getNOT(dl, LHS, MVT::i1));
12368 break;
12369 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
12370 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
12371 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, LHS,
12372 DAG.getNOT(dl, RHS, MVT::i1));
12373 break;
12374 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
12375 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
12376 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, RHS,
12377 DAG.getNOT(dl, LHS, MVT::i1));
12378 break;
12379 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
12380 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
12381 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, LHS,
12382 DAG.getNOT(dl, RHS, MVT::i1));
12383 break;
12384 }
12385
12386 LHS = DAG.getZExtOrTrunc(Ret, dl, VT);
12387 RHS = SDValue();
12388 CC = SDValue();
12389 return true;
12390 }
12391
12393 unsigned Opc = 0;
12394 switch (CCCode) {
12395 default:
12396 llvm_unreachable("Don't know how to expand this condition!");
12397 case ISD::SETUO:
12398 if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
12399 CC1 = ISD::SETUNE;
12400 CC2 = ISD::SETUNE;
12401 Opc = ISD::OR;
12402 break;
12403 }
12405 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
12406 NeedInvert = true;
12407 [[fallthrough]];
12408 case ISD::SETO:
12410 "If SETO is expanded, SETOEQ must be legal!");
12411 CC1 = ISD::SETOEQ;
12412 CC2 = ISD::SETOEQ;
12413 Opc = ISD::AND;
12414 break;
12415 case ISD::SETONE:
12416 case ISD::SETUEQ:
12417 // If the SETUO or SETO CC isn't legal, we might be able to use
12418 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
12419 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
12420 // the operands.
12421 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12422 if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
12423 isCondCodeLegal(ISD::SETOLT, OpVT))) {
12424 CC1 = ISD::SETOGT;
12425 CC2 = ISD::SETOLT;
12426 Opc = ISD::OR;
12427 NeedInvert = ((unsigned)CCCode & 0x8U);
12428 break;
12429 }
12430 [[fallthrough]];
12431 case ISD::SETOEQ:
12432 case ISD::SETOGT:
12433 case ISD::SETOGE:
12434 case ISD::SETOLT:
12435 case ISD::SETOLE:
12436 case ISD::SETUNE:
12437 case ISD::SETUGT:
12438 case ISD::SETUGE:
12439 case ISD::SETULT:
12440 case ISD::SETULE:
12441 // If we are floating point, assign and break, otherwise fall through.
12442 if (!OpVT.isInteger()) {
12443 // We can use the 4th bit to tell if we are the unordered
12444 // or ordered version of the opcode.
12445 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12446 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
12447 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
12448 break;
12449 }
12450 // Fallthrough if we are unsigned integer.
12451 [[fallthrough]];
12452 case ISD::SETLE:
12453 case ISD::SETGT:
12454 case ISD::SETGE:
12455 case ISD::SETLT:
12456 case ISD::SETNE:
12457 case ISD::SETEQ:
12458 // If all combinations of inverting the condition and swapping operands
12459 // didn't work then we have no means to expand the condition.
12460 llvm_unreachable("Don't know how to expand this condition!");
12461 }
12462
12463 SDValue SetCC1, SetCC2;
12464 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
12465 // If we aren't the ordered or unorder operation,
12466 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
12467 if (IsNonVP) {
12468 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
12469 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
12470 } else {
12471 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
12472 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
12473 }
12474 } else {
12475 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
12476 if (IsNonVP) {
12477 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
12478 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
12479 } else {
12480 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
12481 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
12482 }
12483 }
12484 if (Chain)
12485 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
12486 SetCC2.getValue(1));
12487 if (IsNonVP)
12488 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
12489 else {
12490 // Transform the binary opcode to the VP equivalent.
12491 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
12492 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
12493 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
12494 }
12495 RHS = SDValue();
12496 CC = SDValue();
12497 return true;
12498 }
12499 }
12500 return false;
12501}
12502
12504 SelectionDAG &DAG) const {
12505 EVT VT = Node->getValueType(0);
12506 // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
12507 // split into two equal parts.
12508 if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))
12509 return SDValue();
12510
12511 // Restrict expansion to cases where both parts can be concatenated.
12512 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
12513 if (LoVT != HiVT || !isTypeLegal(LoVT))
12514 return SDValue();
12515
12516 SDLoc DL(Node);
12517 unsigned Opcode = Node->getOpcode();
12518
12519 // Don't expand if the result is likely to be unrolled anyway.
12520 if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))
12521 return SDValue();
12522
12523 SmallVector<SDValue, 4> LoOps, HiOps;
12524 for (const SDValue &V : Node->op_values()) {
12525 auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT);
12526 LoOps.push_back(Lo);
12527 HiOps.push_back(Hi);
12528 }
12529
12530 SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps);
12531 SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps);
12532 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi);
12533}
12534
12536 const SDLoc &DL,
12537 EVT InVecVT, SDValue EltNo,
12538 LoadSDNode *OriginalLoad,
12539 SelectionDAG &DAG) const {
12540 assert(OriginalLoad->isSimple());
12541
12542 EVT VecEltVT = InVecVT.getVectorElementType();
12543
12544 // If the vector element type is not a multiple of a byte then we are unable
12545 // to correctly compute an address to load only the extracted element as a
12546 // scalar.
12547 if (!VecEltVT.isByteSized())
12548 return SDValue();
12549
12550 ISD::LoadExtType ExtTy =
12551 ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
12552 if (!isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
12553 return SDValue();
12554
12555 std::optional<unsigned> ByteOffset;
12556 Align Alignment = OriginalLoad->getAlign();
12558 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
12559 int Elt = ConstEltNo->getZExtValue();
12560 ByteOffset = VecEltVT.getSizeInBits() * Elt / 8;
12561 MPI = OriginalLoad->getPointerInfo().getWithOffset(*ByteOffset);
12562 Alignment = commonAlignment(Alignment, *ByteOffset);
12563 } else {
12564 // Discard the pointer info except the address space because the memory
12565 // operand can't represent this new access since the offset is variable.
12566 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
12567 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
12568 }
12569
12570 if (!shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT, ByteOffset))
12571 return SDValue();
12572
12573 unsigned IsFast = 0;
12574 if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
12575 OriginalLoad->getAddressSpace(), Alignment,
12576 OriginalLoad->getMemOperand()->getFlags(), &IsFast) ||
12577 !IsFast)
12578 return SDValue();
12579
12580 // The original DAG loaded the entire vector from memory, so arithmetic
12581 // within it must be inbounds.
12583 DAG, OriginalLoad->getBasePtr(), InVecVT, EltNo);
12584
12585 // We are replacing a vector load with a scalar load. The new load must have
12586 // identical memory op ordering to the original.
12587 SDValue Load;
12588 if (ResultVT.bitsGT(VecEltVT)) {
12589 // If the result type of vextract is wider than the load, then issue an
12590 // extending load instead.
12591 ISD::LoadExtType ExtType = isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT)
12593 : ISD::EXTLOAD;
12594 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
12595 NewPtr, MPI, VecEltVT, Alignment,
12596 OriginalLoad->getMemOperand()->getFlags(),
12597 OriginalLoad->getAAInfo());
12598 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
12599 } else {
12600 // The result type is narrower or the same width as the vector element
12601 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
12602 Alignment, OriginalLoad->getMemOperand()->getFlags(),
12603 OriginalLoad->getAAInfo());
12604 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
12605 if (ResultVT.bitsLT(VecEltVT))
12606 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
12607 else
12608 Load = DAG.getBitcast(ResultVT, Load);
12609 }
12610
12611 return Load;
12612}
unsigned const MachineRegisterInfo * MRI
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT F32
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
#define _
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
lazy value info
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static bool isUndef(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
#define T
#define T1
#define P(N)
Function const char * Passes
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact UDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode, SDNode *FPNode)
Given a store node StoreNode, return true if it is safe to fold that node into FPNode,...
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineShiftToAVG(SDValue Op, TargetLowering::TargetLoweringOpt &TLO, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
This file describes how to lower LLVM code to machine code.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
Value * RHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1329
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.h:1140
APInt bitcastToAPInt() const
Definition APFloat.h:1335
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1120
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1080
void changeSign()
Definition APFloat.h:1279
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1091
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1573
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1758
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1407
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition APInt.h:424
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1392
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1386
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1513
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1331
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1183
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
void setSignBit()
Set the sign bit to 1.
Definition APInt.h:1341
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:217
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1250
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1397
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:835
void negate()
Negate this APInt in place.
Definition APInt.h:1469
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1640
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1599
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1532
unsigned countLeadingZeros() const
Definition APInt.h:1607
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
void clearLowBits(unsigned loBits)
Set bottom loBits bits to 0.
Definition APInt.h:1436
unsigned logBase2() const
Definition APInt.h:1762
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:828
void setAllBits()
Set every bit to 1.
Definition APInt.h:1320
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1274
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition APInt.h:406
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1151
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition APInt.h:1368
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:874
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
void clearBits(unsigned LoBit, unsigned HiBit)
Clear the bits from LoBit (inclusive) to HiBit (exclusive) to 0.
Definition APInt.h:1418
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1389
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
void clearHighBits(unsigned hiBits)
Set top hiBits bits to 0.
Definition APInt.h:1443
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:859
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1657
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1222
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
Definition APInt.h:1344
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:720
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:282
This class represents a range of values.
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:214
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
std::vector< std::string > ConstraintCodeVector
Definition InlineAsm.h:104
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Machine Value Type.
SimpleValueType SimpleTy
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Function & getFunction()
Return the LLVM function that this machine code represents.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
Flags getFlags() const
Return the raw flags of the source value,.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition Module.h:445
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
iterator end() const
Definition ArrayRef.h:343
iterator begin() const
Definition ArrayRef.h:342
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI std::optional< unsigned > getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has shift amounts that are all less than the element bit-width of the shift n...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
static LLVM_ABI unsigned getHasPredecessorMaxSteps()
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
LLVM_ABI bool shouldOptForSize() const
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI std::optional< unsigned > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getTypeSize(const SDLoc &DL, EVT VT, TypeSize TS)
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI bool isKnownNeverZeroFloat(SDValue Op) const
Test whether the given floating point SDValue is known to never be positive or negative zero.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
iterator end() const
Definition StringRef.h:114
Class to represent struct types.
LLVM_ABI void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool preferSelectsOverBooleanArithmetic(EVT VT) const
Should we prefer selects to doing arithmetic on boolean types.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const
Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
CallingConv::ID getLibcallImplCallingConv(RTLIB::LibcallImpl Call) const
Get the CallingConv that should be used for the specified libcall implementation.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
unsigned getBitWidthForCttzElements(Type *RetTy, ElementCount EC, bool ZeroIsPoison, const ConstantRange *VScaleRange) const
Return the minimum number of bits required to hold the maximum possible number of trailing zero vecto...
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
ISD::CondCode getSoftFloatCmpLibcallPredicate(RTLIB::LibcallImpl Call) const
Get the comparison predicate that's to be used to test the result of the comparison libcall against z...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
TargetLoweringBase(const TargetMachine &TM, const TargetSubtargetInfo &STI)
NOTE: The TargetMachine owns TLOF.
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
BooleanContent
Enum that describes how the target represents true/false values.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
virtual EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom for a comparison of the specified type...
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
bool expandMultipleResultFPLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl< SDValue > &Results, std::optional< unsigned > CallRetResNo={}) const
Expands a node with multiple results to an FP or vector libcall.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
SmallVector< ConstraintPair > ConstraintGroup
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
virtual Align computeKnownAlignForTargetInstr(GISelValueTracking &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, const SDValue LHS, const SDValue RHS, SDValue &Lo, SDValue &Hi) const
Calculate full product of LHS and RHS either via a libcall or through brute force expansion of the mu...
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_UNDEF nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_UNDEF nodes.
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
virtual bool isTargetCanonicalConstantNode(SDValue Op) const
Returns true if the given Opc is considered a canonical constant for the target, which should not be ...
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
virtual unsigned computeNumSignBitsForTargetInstr(GISelValueTracking &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue unwrapAddress(SDValue N) const
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_UNDEF nodes.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimumnum/fmaximumnum into multiple comparison with selects.
void forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl, bool Signed, SDValue &Lo, SDValue &Hi, SDValue LHS, SDValue RHS, SDValue HiLHS=SDValue(), SDValue HiRHS=SDValue()) const
Calculate the product twice the width of LHS and RHS.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index, const SDNodeFlags PtrArithFlags=SDNodeFlags()) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
virtual void computeKnownBitsForTargetInstr(GISelValueTracking &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const
~TargetLowering() override
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isPositionIndependent() const
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
TargetLowering(const TargetLowering &)=delete
virtual bool shouldSimplifyDemandedVectorElts(SDValue Op, const TargetLoweringOpt &TLO) const
Return true if the target supports simplifying demanded vector elements by converting them to undefs.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
virtual bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const
For most targets, an LLVM type must be broken down into multiple smaller types.
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const
Expand a vector VECTOR_COMPRESS into a sequence of extract element, store temporarily,...
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index, const SDNodeFlags PtrArithFlags=SDNodeFlags()) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual void computeKnownFPClassForTargetInstr(GISelValueTracking &Analysis, Register R, KnownFPClass &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]CMP.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue getInboundsVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const
Expand VECTOR_FIND_LAST_ACTIVE nodes.
SDValue expandPartialReduceMLA(SDNode *Node, SelectionDAG &DAG) const
Expands PARTIAL_REDUCE_S/UMLA nodes to a series of simpler operations, consisting of zext/sext,...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad, SelectionDAG &DAG) const
Replace an extraction of a load with a narrowed load.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const
Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
Primary interface to the complete machine description for the target machine.
bool isPositionIndependent() const
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< regclass_iterator > regclasses() const
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition Triple.h:791
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition Type.h:296
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:280
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:311
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:705
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition APInt.cpp:3009
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:780
@ PTRADD
PTRADD represents pointer arithmetic semantics, for targets that opt in using shouldPreservePtrArith(...
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition ISDOpcodes.h:531
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:387
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:289
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:515
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:393
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:577
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:898
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:521
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:981
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition ISDOpcodes.h:400
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:712
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:779
@ PARTIAL_REDUCE_FMLA
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:347
@ BRIND
BRIND - Indirect branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:534
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:541
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:799
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:379
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:351
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:887
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:406
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:966
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:323
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:174
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:707
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:299
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition ISDOpcodes.h:654
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:909
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:933
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:821
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:527
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:719
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
LLVM_ABI NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_ABI bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTruncation=false)
Hook for matching ConstantSDNode predicate.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
void stable_sort(R &&Range)
Definition STLExtras.h:2079
InstructionCost Cost
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2503
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
LLVM_ABI bool isOneOrOneSplatFP(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant floating-point value, or a splatted vector of a constant float...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
void * PointerTy
constexpr bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:345
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:177
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1784
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
Definition ModRef.h:68
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
constexpr bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1748
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition APFloat.h:1551
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:121
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
EVT changeElementType(EVT EltVT) const
Return a VT for a type whose attributes match ourselves with the exception of the element type that i...
Definition ValueTypes.h:113
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition ValueTypes.h:430
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:470
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:412
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:308
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition KnownBits.h:301
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:186
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition KnownBits.h:255
static LLVM_ABI KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108
bool isZero() const
Returns true if value is all zero.
Definition KnownBits.h:80
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:242
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
void setAllConflict()
Make all bits known to be both zero and one.
Definition KnownBits.h:99
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:161
KnownBits byteSwap() const
Definition KnownBits.h:514
static LLVM_ABI std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:289
KnownBits reverseBits() const
Definition KnownBits.h:518
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
Definition KnownBits.h:233
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
static LLVM_ABI KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:172
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition KnownBits.h:321
bool isSignUnknown() const
Returns true if we don't know the sign bit.
Definition KnownBits.h:69
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:311
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:180
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:248
static LLVM_ABI KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
static LLVM_ABI std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
static LLVM_ABI std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
static LLVM_ABI KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition KnownBits.cpp:60
static LLVM_ABI std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
static LLVM_ABI std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:105
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition KnownBits.h:167
static LLVM_ABI std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
static LLVM_ABI std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:286
static LLVM_ABI std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
static LLVM_ABI KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static bool hasVectorMaskArgument(RTLIB::LibcallImpl Impl)
Returns true if the function has a vector mask argument, which is assumed to be the last argument.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
void setNoSignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This contains information for each constraint that we are lowering.
std::string ConstraintCode
This contains the actual string for the code, like "m".
LLVM_ABI unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
LLVM_ABI bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
MakeLibCallOptions & setIsSigned(bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...