LLVM 23.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
27#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/MC/MCAsmInfo.h"
32#include "llvm/MC/MCExpr.h"
38#include <cctype>
39#include <deque>
40using namespace llvm;
41using namespace llvm::SDPatternMatch;
42
43/// NOTE: The TargetMachine owns TLOF.
47
48// Define the virtual destructor out-of-line for build efficiency.
50
51const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
52 return nullptr;
53}
54
58
59/// Check whether a given call node is in tail position within its function. If
60/// so, it sets Chain to the input chain of the tail call.
62 SDValue &Chain) const {
64
65 // First, check if tail calls have been disabled in this function.
66 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
67 return false;
68
69 // Conservatively require the attributes of the call to match those of
70 // the return. Ignore following attributes because they don't affect the
71 // call sequence.
72 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
73 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
74 Attribute::DereferenceableOrNull, Attribute::NoAlias,
75 Attribute::NonNull, Attribute::NoUndef,
76 Attribute::Range, Attribute::NoFPClass})
77 CallerAttrs.removeAttribute(Attr);
78
79 if (CallerAttrs.hasAttributes())
80 return false;
81
82 // It's not safe to eliminate the sign / zero extension of the return value.
83 if (CallerAttrs.contains(Attribute::ZExt) ||
84 CallerAttrs.contains(Attribute::SExt))
85 return false;
86
87 // Check if the only use is a function return node.
88 return isUsedByReturnOnly(Node, Chain);
89}
90
92 const uint32_t *CallerPreservedMask,
93 const SmallVectorImpl<CCValAssign> &ArgLocs,
94 const SmallVectorImpl<SDValue> &OutVals) const {
95 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
96 const CCValAssign &ArgLoc = ArgLocs[I];
97 if (!ArgLoc.isRegLoc())
98 continue;
99 MCRegister Reg = ArgLoc.getLocReg();
100 // Only look at callee saved registers.
101 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
102 continue;
103 // Check that we pass the value used for the caller.
104 // (We look for a CopyFromReg reading a virtual register that is used
105 // for the function live-in value of register Reg)
106 SDValue Value = OutVals[I];
107 if (Value->getOpcode() == ISD::AssertZext)
108 Value = Value.getOperand(0);
109 if (Value->getOpcode() != ISD::CopyFromReg)
110 return false;
111 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
112 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
113 return false;
114 }
115 return true;
116}
117
118/// Set CallLoweringInfo attribute flags based on a call instruction
119/// and called function attributes.
121 unsigned ArgIdx) {
122 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
123 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
124 IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
125 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
126 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
127 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
128 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
129 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
130 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
131 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
132 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
133 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
134 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
135 Alignment = Call->getParamStackAlign(ArgIdx);
136 IndirectType = nullptr;
138 "multiple ABI attributes?");
139 if (IsByVal) {
140 IndirectType = Call->getParamByValType(ArgIdx);
141 if (!Alignment)
142 Alignment = Call->getParamAlign(ArgIdx);
143 }
144 if (IsPreallocated)
145 IndirectType = Call->getParamPreallocatedType(ArgIdx);
146 if (IsInAlloca)
147 IndirectType = Call->getParamInAllocaType(ArgIdx);
148 if (IsSRet)
149 IndirectType = Call->getParamStructRetType(ArgIdx);
150}
151
152/// Generate a libcall taking the given operands as arguments and returning a
153/// result of type RetVT.
154std::pair<SDValue, SDValue>
155TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl,
157 MakeLibCallOptions CallOptions, const SDLoc &dl,
158 SDValue InChain) const {
159 if (LibcallImpl == RTLIB::Unsupported)
160 reportFatalInternalError("unsupported library call operation");
161
162 if (!InChain)
163 InChain = DAG.getEntryNode();
164
166 Args.reserve(Ops.size());
167
168 ArrayRef<Type *> OpsTypeOverrides = CallOptions.OpsTypeOverrides;
169 for (unsigned i = 0; i < Ops.size(); ++i) {
170 SDValue NewOp = Ops[i];
171 Type *Ty = i < OpsTypeOverrides.size() && OpsTypeOverrides[i]
172 ? OpsTypeOverrides[i]
173 : NewOp.getValueType().getTypeForEVT(*DAG.getContext());
174 TargetLowering::ArgListEntry Entry(NewOp, Ty);
175 if (CallOptions.IsSoften)
176 Entry.OrigTy =
177 CallOptions.OpsVTBeforeSoften[i].getTypeForEVT(*DAG.getContext());
178
179 Entry.IsSExt =
180 shouldSignExtendTypeInLibCall(Entry.Ty, CallOptions.IsSigned);
181 Entry.IsZExt = !Entry.IsSExt;
182
183 if (CallOptions.IsSoften &&
185 Entry.IsSExt = Entry.IsZExt = false;
186 }
187 Args.push_back(Entry);
188 }
189
190 SDValue Callee =
191 DAG.getExternalSymbol(LibcallImpl, getPointerTy(DAG.getDataLayout()));
192
193 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
194 Type *OrigRetTy = RetTy;
196 bool signExtend = shouldSignExtendTypeInLibCall(RetTy, CallOptions.IsSigned);
197 bool zeroExtend = !signExtend;
198
199 if (CallOptions.IsSoften) {
200 OrigRetTy = CallOptions.RetVTBeforeSoften.getTypeForEVT(*DAG.getContext());
202 signExtend = zeroExtend = false;
203 }
204
205 CLI.setDebugLoc(dl)
206 .setChain(InChain)
207 .setLibCallee(getLibcallImplCallingConv(LibcallImpl), RetTy, OrigRetTy,
208 Callee, std::move(Args))
209 .setNoReturn(CallOptions.DoesNotReturn)
212 .setSExtResult(signExtend)
213 .setZExtResult(zeroExtend);
214 return LowerCallTo(CLI);
215}
216
218 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
219 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
220 const AttributeList &FuncAttributes, EVT *LargestVT) const {
221 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
222 Op.getSrcAlign() < Op.getDstAlign())
223 return false;
224
225 EVT VT = getOptimalMemOpType(Context, Op, FuncAttributes);
226
227 if (VT == MVT::Other) {
228 // Use the largest integer type whose alignment constraints are satisfied.
229 // We only need to check DstAlign here as SrcAlign is always greater or
230 // equal to DstAlign (or zero).
231 VT = MVT::LAST_INTEGER_VALUETYPE;
232 if (Op.isFixedDstAlign())
233 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
234 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
236 assert(VT.isInteger());
237
238 // Find the largest legal integer type.
239 MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
240 while (!isTypeLegal(LVT))
241 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
242 assert(LVT.isInteger());
243
244 // If the type we've chosen is larger than the largest legal integer type
245 // then use that instead.
246 if (VT.bitsGT(LVT))
247 VT = LVT;
248 }
249
250 unsigned NumMemOps = 0;
251 uint64_t Size = Op.size();
252 while (Size) {
253 unsigned VTSize = VT.getSizeInBits() / 8;
254 while (VTSize > Size) {
255 // For now, only use non-vector load / store's for the left-over pieces.
256 EVT NewVT = VT;
257 unsigned NewVTSize;
258
259 bool Found = false;
260 if (VT.isVector() || VT.isFloatingPoint()) {
261 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
264 Found = true;
265 else if (NewVT == MVT::i64 &&
267 isSafeMemOpType(MVT::f64)) {
268 // i64 is usually not legal on 32-bit targets, but f64 may be.
269 NewVT = MVT::f64;
270 Found = true;
271 }
272 }
273
274 if (!Found) {
275 do {
276 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
277 if (NewVT == MVT::i8)
278 break;
279 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
280 }
281 NewVTSize = NewVT.getSizeInBits() / 8;
282
283 // If the new VT cannot cover all of the remaining bits, then consider
284 // issuing a (or a pair of) unaligned and overlapping load / store.
285 unsigned Fast;
286 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
288 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
290 Fast)
291 VTSize = Size;
292 else {
293 VT = NewVT;
294 VTSize = NewVTSize;
295 }
296 }
297
298 if (++NumMemOps > Limit)
299 return false;
300
301 MemOps.push_back(VT);
302 Size -= VTSize;
303 }
304
305 return true;
306}
307
308/// Soften the operands of a comparison. This code is shared among BR_CC,
309/// SELECT_CC, and SETCC handlers.
311 SDValue &NewLHS, SDValue &NewRHS,
312 ISD::CondCode &CCCode,
313 const SDLoc &dl, const SDValue OldLHS,
314 const SDValue OldRHS) const {
315 SDValue Chain;
316 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
317 OldRHS, Chain);
318}
319
321 SDValue &NewLHS, SDValue &NewRHS,
322 ISD::CondCode &CCCode,
323 const SDLoc &dl, const SDValue OldLHS,
324 const SDValue OldRHS,
325 SDValue &Chain,
326 bool IsSignaling) const {
327 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
328 // not supporting it. We can update this code when libgcc provides such
329 // functions.
330
331 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
332 && "Unsupported setcc type!");
333
334 // Expand into one or more soft-fp libcall(s).
335 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
336 bool ShouldInvertCC = false;
337 switch (CCCode) {
338 case ISD::SETEQ:
339 case ISD::SETOEQ:
340 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
341 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
342 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
343 break;
344 case ISD::SETNE:
345 case ISD::SETUNE:
346 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
347 (VT == MVT::f64) ? RTLIB::UNE_F64 :
348 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
349 break;
350 case ISD::SETGE:
351 case ISD::SETOGE:
352 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
353 (VT == MVT::f64) ? RTLIB::OGE_F64 :
354 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
355 break;
356 case ISD::SETLT:
357 case ISD::SETOLT:
358 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
359 (VT == MVT::f64) ? RTLIB::OLT_F64 :
360 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
361 break;
362 case ISD::SETLE:
363 case ISD::SETOLE:
364 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
365 (VT == MVT::f64) ? RTLIB::OLE_F64 :
366 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
367 break;
368 case ISD::SETGT:
369 case ISD::SETOGT:
370 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
371 (VT == MVT::f64) ? RTLIB::OGT_F64 :
372 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
373 break;
374 case ISD::SETO:
375 ShouldInvertCC = true;
376 [[fallthrough]];
377 case ISD::SETUO:
378 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
379 (VT == MVT::f64) ? RTLIB::UO_F64 :
380 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
381 break;
382 case ISD::SETONE:
383 // SETONE = O && UNE
384 ShouldInvertCC = true;
385 [[fallthrough]];
386 case ISD::SETUEQ:
387 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
388 (VT == MVT::f64) ? RTLIB::UO_F64 :
389 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
390 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
391 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
392 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
393 break;
394 default:
395 // Invert CC for unordered comparisons
396 ShouldInvertCC = true;
397 switch (CCCode) {
398 case ISD::SETULT:
399 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
400 (VT == MVT::f64) ? RTLIB::OGE_F64 :
401 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
402 break;
403 case ISD::SETULE:
404 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
405 (VT == MVT::f64) ? RTLIB::OGT_F64 :
406 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
407 break;
408 case ISD::SETUGT:
409 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
410 (VT == MVT::f64) ? RTLIB::OLE_F64 :
411 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
412 break;
413 case ISD::SETUGE:
414 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
415 (VT == MVT::f64) ? RTLIB::OLT_F64 :
416 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
417 break;
418 default: llvm_unreachable("Do not know how to soften this setcc!");
419 }
420 }
421
422 // Use the target specific return value for comparison lib calls.
424 SDValue Ops[2] = {NewLHS, NewRHS};
426 EVT OpsVT[2] = { OldLHS.getValueType(),
427 OldRHS.getValueType() };
428 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT);
429 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
430 NewLHS = Call.first;
431 NewRHS = DAG.getConstant(0, dl, RetVT);
432
433 RTLIB::LibcallImpl LC1Impl = getLibcallImpl(LC1);
434 if (LC1Impl == RTLIB::Unsupported) {
436 "no libcall available to soften floating-point compare");
437 }
438
439 CCCode = getSoftFloatCmpLibcallPredicate(LC1Impl);
440 if (ShouldInvertCC) {
441 assert(RetVT.isInteger());
442 CCCode = getSetCCInverse(CCCode, RetVT);
443 }
444
445 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
446 // Update Chain.
447 Chain = Call.second;
448 } else {
449 RTLIB::LibcallImpl LC2Impl = getLibcallImpl(LC2);
450 if (LC2Impl == RTLIB::Unsupported) {
452 "no libcall available to soften floating-point compare");
453 }
454
455 assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
456 "unordered call should be simple boolean");
457
458 EVT SetCCVT =
459 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
461 NewLHS = DAG.getNode(ISD::AssertZext, dl, RetVT, Call.first,
462 DAG.getValueType(MVT::i1));
463 }
464
465 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
466 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
467 CCCode = getSoftFloatCmpLibcallPredicate(LC2Impl);
468 if (ShouldInvertCC)
469 CCCode = getSetCCInverse(CCCode, RetVT);
470 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
471 if (Chain)
472 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
473 Call2.second);
474 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
475 Tmp.getValueType(), Tmp, NewLHS);
476 NewRHS = SDValue();
477 }
478}
479
480/// Return the entry encoding for a jump table in the current function. The
481/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
483 // In non-pic modes, just use the address of a block.
486
487 // Otherwise, use a label difference.
489}
490
492 SelectionDAG &DAG) const {
493 return Table;
494}
495
496/// This returns the relocation base for the given PIC jumptable, the same as
497/// getPICJumpTableRelocBase, but as an MCExpr.
498const MCExpr *
500 unsigned JTI,MCContext &Ctx) const{
501 // The normal PIC reloc base is the label at the start of the jump table.
502 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
503}
504
506 SDValue Addr, int JTI,
507 SelectionDAG &DAG) const {
508 SDValue Chain = Value;
509 // Jump table debug info is only needed if CodeView is enabled.
511 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
512 }
513 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
514}
515
516bool
518 const TargetMachine &TM = getTargetMachine();
519 const GlobalValue *GV = GA->getGlobal();
520
521 // If the address is not even local to this DSO we will have to load it from
522 // a got and then add the offset.
523 if (!TM.shouldAssumeDSOLocal(GV))
524 return false;
525
526 // If the code is position independent we will have to add a base register.
528 return false;
529
530 // Otherwise we can do it.
531 return true;
532}
533
534//===----------------------------------------------------------------------===//
535// Optimization Methods
536//===----------------------------------------------------------------------===//
537
538/// If the specified instruction has a constant integer operand and there are
539/// bits set in that constant that are not demanded, then clear those bits and
540/// return true.
542 const APInt &DemandedBits,
543 const APInt &DemandedElts,
544 TargetLoweringOpt &TLO) const {
545 SDLoc DL(Op);
546 unsigned Opcode = Op.getOpcode();
547
548 // Early-out if we've ended up calling an undemanded node, leave this to
549 // constant folding.
550 if (DemandedBits.isZero() || DemandedElts.isZero())
551 return false;
552
553 // Do target-specific constant optimization.
554 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
555 return TLO.New.getNode();
556
557 // FIXME: ISD::SELECT, ISD::SELECT_CC
558 switch (Opcode) {
559 default:
560 break;
561 case ISD::XOR:
562 case ISD::AND:
563 case ISD::OR: {
564 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
565 if (!Op1C || Op1C->isOpaque())
566 return false;
567
568 // If this is a 'not' op, don't touch it because that's a canonical form.
569 const APInt &C = Op1C->getAPIntValue();
570 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
571 return false;
572
573 if (!C.isSubsetOf(DemandedBits)) {
574 EVT VT = Op.getValueType();
575 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
576 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
577 Op->getFlags());
578 return TLO.CombineTo(Op, NewOp);
579 }
580
581 break;
582 }
583 }
584
585 return false;
586}
587
589 const APInt &DemandedBits,
590 TargetLoweringOpt &TLO) const {
591 EVT VT = Op.getValueType();
592 APInt DemandedElts = VT.isVector()
594 : APInt(1, 1);
595 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
596}
597
598/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
599/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
600/// but it could be generalized for targets with other types of implicit
601/// widening casts.
603 const APInt &DemandedBits,
604 TargetLoweringOpt &TLO) const {
605 assert(Op.getNumOperands() == 2 &&
606 "ShrinkDemandedOp only supports binary operators!");
607 assert(Op.getNode()->getNumValues() == 1 &&
608 "ShrinkDemandedOp only supports nodes with one result!");
609
610 EVT VT = Op.getValueType();
611 SelectionDAG &DAG = TLO.DAG;
612 SDLoc dl(Op);
613
614 // Early return, as this function cannot handle vector types.
615 if (VT.isVector())
616 return false;
617
618 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
619 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
620 "ShrinkDemandedOp only supports operands that have the same size!");
621
622 // Don't do this if the node has another user, which may require the
623 // full value.
624 if (!Op.getNode()->hasOneUse())
625 return false;
626
627 // Search for the smallest integer type with free casts to and from
628 // Op's type. For expedience, just check power-of-2 integer types.
629 unsigned DemandedSize = DemandedBits.getActiveBits();
630 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
631 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
632 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
633 if (isTruncateFree(Op, SmallVT) && isZExtFree(SmallVT, VT)) {
634 // We found a type with free casts.
635
636 // If the operation has the 'disjoint' flag, then the
637 // operands on the new node are also disjoint.
638 SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
640 unsigned Opcode = Op.getOpcode();
641 if (Opcode == ISD::PTRADD) {
642 // It isn't a ptradd anymore if it doesn't operate on the entire
643 // pointer.
644 Opcode = ISD::ADD;
645 }
646 SDValue X = DAG.getNode(
647 Opcode, dl, SmallVT,
648 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
649 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags);
650 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
651 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
652 return TLO.CombineTo(Op, Z);
653 }
654 }
655 return false;
656}
657
659 DAGCombinerInfo &DCI) const {
660 SelectionDAG &DAG = DCI.DAG;
661 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
662 !DCI.isBeforeLegalizeOps());
663 KnownBits Known;
664
665 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
666 if (Simplified) {
667 DCI.AddToWorklist(Op.getNode());
669 }
670 return Simplified;
671}
672
674 const APInt &DemandedElts,
675 DAGCombinerInfo &DCI) const {
676 SelectionDAG &DAG = DCI.DAG;
677 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
678 !DCI.isBeforeLegalizeOps());
679 KnownBits Known;
680
681 bool Simplified =
682 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
683 if (Simplified) {
684 DCI.AddToWorklist(Op.getNode());
686 }
687 return Simplified;
688}
689
691 KnownBits &Known,
693 unsigned Depth,
694 bool AssumeSingleUse) const {
695 EVT VT = Op.getValueType();
696
697 // Since the number of lanes in a scalable vector is unknown at compile time,
698 // we track one bit which is implicitly broadcast to all lanes. This means
699 // that all lanes in a scalable vector are considered demanded.
700 APInt DemandedElts = VT.isFixedLengthVector()
702 : APInt(1, 1);
703 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
704 AssumeSingleUse);
705}
706
707// TODO: Under what circumstances can we create nodes? Constant folding?
709 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
710 SelectionDAG &DAG, unsigned Depth) const {
711 EVT VT = Op.getValueType();
712
713 // Limit search depth.
715 return SDValue();
716
717 // Ignore UNDEFs.
718 if (Op.isUndef())
719 return SDValue();
720
721 // Not demanding any bits/elts from Op.
722 if (DemandedBits == 0 || DemandedElts == 0)
723 return DAG.getUNDEF(VT);
724
725 bool IsLE = DAG.getDataLayout().isLittleEndian();
726 unsigned NumElts = DemandedElts.getBitWidth();
727 unsigned BitWidth = DemandedBits.getBitWidth();
728 KnownBits LHSKnown, RHSKnown;
729 switch (Op.getOpcode()) {
730 case ISD::BITCAST: {
731 if (VT.isScalableVector())
732 return SDValue();
733
734 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
735 EVT SrcVT = Src.getValueType();
736 EVT DstVT = Op.getValueType();
737 if (SrcVT == DstVT)
738 return Src;
739
740 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
741 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
742 if (NumSrcEltBits == NumDstEltBits)
744 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
745 return DAG.getBitcast(DstVT, V);
746
747 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
748 unsigned Scale = NumDstEltBits / NumSrcEltBits;
749 unsigned NumSrcElts = SrcVT.getVectorNumElements();
750 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
751 for (unsigned i = 0; i != Scale; ++i) {
752 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
753 unsigned BitOffset = EltOffset * NumSrcEltBits;
754 DemandedSrcBits |= DemandedBits.extractBits(NumSrcEltBits, BitOffset);
755 }
756 // Recursive calls below may turn not demanded elements into poison, so we
757 // need to demand all smaller source elements that maps to a demanded
758 // destination element.
759 APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
760
762 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
763 return DAG.getBitcast(DstVT, V);
764 }
765
766 // TODO - bigendian once we have test coverage.
767 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
768 unsigned Scale = NumSrcEltBits / NumDstEltBits;
769 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
770 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
771 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
772 for (unsigned i = 0; i != NumElts; ++i)
773 if (DemandedElts[i]) {
774 unsigned Offset = (i % Scale) * NumDstEltBits;
775 DemandedSrcBits.insertBits(DemandedBits, Offset);
776 DemandedSrcElts.setBit(i / Scale);
777 }
778
780 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
781 return DAG.getBitcast(DstVT, V);
782 }
783
784 break;
785 }
786 case ISD::AND: {
787 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
788 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
789
790 // If all of the demanded bits are known 1 on one side, return the other.
791 // These bits cannot contribute to the result of the 'and' in this
792 // context.
793 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
794 return Op.getOperand(0);
795 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
796 return Op.getOperand(1);
797 break;
798 }
799 case ISD::OR: {
800 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
801 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
802
803 // If all of the demanded bits are known zero on one side, return the
804 // other. These bits cannot contribute to the result of the 'or' in this
805 // context.
806 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
807 return Op.getOperand(0);
808 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
809 return Op.getOperand(1);
810 break;
811 }
812 case ISD::XOR: {
813 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
814 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
815
816 // If all of the demanded bits are known zero on one side, return the
817 // other.
818 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
819 return Op.getOperand(0);
820 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
821 return Op.getOperand(1);
822 break;
823 }
824 case ISD::ADD: {
825 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
826 if (RHSKnown.isZero())
827 return Op.getOperand(0);
828
829 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
830 if (LHSKnown.isZero())
831 return Op.getOperand(1);
832 break;
833 }
834 case ISD::SHL: {
835 // If we are only demanding sign bits then we can use the shift source
836 // directly.
837 if (std::optional<unsigned> MaxSA =
838 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
839 SDValue Op0 = Op.getOperand(0);
840 unsigned ShAmt = *MaxSA;
841 unsigned NumSignBits =
842 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
843 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
844 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
845 return Op0;
846 }
847 break;
848 }
849 case ISD::SRL: {
850 // If we are only demanding sign bits then we can use the shift source
851 // directly.
852 if (std::optional<unsigned> MaxSA =
853 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
854 SDValue Op0 = Op.getOperand(0);
855 unsigned ShAmt = *MaxSA;
856 // Must already be signbits in DemandedBits bounds, and can't demand any
857 // shifted in zeroes.
858 if (DemandedBits.countl_zero() >= ShAmt) {
859 unsigned NumSignBits =
860 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
861 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
862 return Op0;
863 }
864 }
865 break;
866 }
867 case ISD::SETCC: {
868 SDValue Op0 = Op.getOperand(0);
869 SDValue Op1 = Op.getOperand(1);
870 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
871 // If (1) we only need the sign-bit, (2) the setcc operands are the same
872 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
873 // -1, we may be able to bypass the setcc.
874 if (DemandedBits.isSignMask() &&
878 // If we're testing X < 0, then this compare isn't needed - just use X!
879 // FIXME: We're limiting to integer types here, but this should also work
880 // if we don't care about FP signed-zero. The use of SETLT with FP means
881 // that we don't care about NaNs.
882 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
884 return Op0;
885 }
886 break;
887 }
889 // If none of the extended bits are demanded, eliminate the sextinreg.
890 SDValue Op0 = Op.getOperand(0);
891 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
892 unsigned ExBits = ExVT.getScalarSizeInBits();
893 if (DemandedBits.getActiveBits() <= ExBits &&
895 return Op0;
896 // If the input is already sign extended, just drop the extension.
897 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
898 if (NumSignBits >= (BitWidth - ExBits + 1))
899 return Op0;
900 break;
901 }
905 if (VT.isScalableVector())
906 return SDValue();
907
908 // If we only want the lowest element and none of extended bits, then we can
909 // return the bitcasted source vector.
910 SDValue Src = Op.getOperand(0);
911 EVT SrcVT = Src.getValueType();
912 EVT DstVT = Op.getValueType();
913 if (IsLE && DemandedElts == 1 &&
914 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
915 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
916 return DAG.getBitcast(DstVT, Src);
917 }
918 break;
919 }
921 if (VT.isScalableVector())
922 return SDValue();
923
924 // If we don't demand the inserted element, return the base vector.
925 SDValue Vec = Op.getOperand(0);
926 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
927 EVT VecVT = Vec.getValueType();
928 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
929 !DemandedElts[CIdx->getZExtValue()])
930 return Vec;
931 break;
932 }
934 if (VT.isScalableVector())
935 return SDValue();
936
937 SDValue Vec = Op.getOperand(0);
938 SDValue Sub = Op.getOperand(1);
939 uint64_t Idx = Op.getConstantOperandVal(2);
940 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
941 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
942 // If we don't demand the inserted subvector, return the base vector.
943 if (DemandedSubElts == 0)
944 return Vec;
945 break;
946 }
947 case ISD::VECTOR_SHUFFLE: {
949 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
950
951 // If all the demanded elts are from one operand and are inline,
952 // then we can use the operand directly.
953 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
954 for (unsigned i = 0; i != NumElts; ++i) {
955 int M = ShuffleMask[i];
956 if (M < 0 || !DemandedElts[i])
957 continue;
958 AllUndef = false;
959 IdentityLHS &= (M == (int)i);
960 IdentityRHS &= ((M - NumElts) == i);
961 }
962
963 if (AllUndef)
964 return DAG.getUNDEF(Op.getValueType());
965 if (IdentityLHS)
966 return Op.getOperand(0);
967 if (IdentityRHS)
968 return Op.getOperand(1);
969 break;
970 }
971 default:
972 // TODO: Probably okay to remove after audit; here to reduce change size
973 // in initial enablement patch for scalable vectors
974 if (VT.isScalableVector())
975 return SDValue();
976
977 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
979 Op, DemandedBits, DemandedElts, DAG, Depth))
980 return V;
981 break;
982 }
983 return SDValue();
984}
985
988 unsigned Depth) const {
989 EVT VT = Op.getValueType();
990 // Since the number of lanes in a scalable vector is unknown at compile time,
991 // we track one bit which is implicitly broadcast to all lanes. This means
992 // that all lanes in a scalable vector are considered demanded.
993 APInt DemandedElts = VT.isFixedLengthVector()
995 : APInt(1, 1);
996 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
997 Depth);
998}
999
1001 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
1002 unsigned Depth) const {
1003 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
1004 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1005 Depth);
1006}
1007
1008// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
1009// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
1012 const TargetLowering &TLI,
1013 const APInt &DemandedBits,
1014 const APInt &DemandedElts, unsigned Depth) {
1015 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
1016 "SRL or SRA node is required here!");
1017 // Is the right shift using an immediate value of 1?
1018 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
1019 if (!N1C || !N1C->isOne())
1020 return SDValue();
1021
1022 // We are looking for an avgfloor
1023 // add(ext, ext)
1024 // or one of these as a avgceil
1025 // add(add(ext, ext), 1)
1026 // add(add(ext, 1), ext)
1027 // add(ext, add(ext, 1))
1028 SDValue Add = Op.getOperand(0);
1029 if (Add.getOpcode() != ISD::ADD)
1030 return SDValue();
1031
1032 SDValue ExtOpA = Add.getOperand(0);
1033 SDValue ExtOpB = Add.getOperand(1);
1034 SDValue Add2;
1035 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1036 ConstantSDNode *ConstOp;
1037 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
1038 ConstOp->isOne()) {
1039 ExtOpA = Op1;
1040 ExtOpB = Op3;
1041 Add2 = A;
1042 return true;
1043 }
1044 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
1045 ConstOp->isOne()) {
1046 ExtOpA = Op1;
1047 ExtOpB = Op2;
1048 Add2 = A;
1049 return true;
1050 }
1051 return false;
1052 };
1053 bool IsCeil =
1054 (ExtOpA.getOpcode() == ISD::ADD &&
1055 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1056 (ExtOpB.getOpcode() == ISD::ADD &&
1057 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1058
1059 // If the shift is signed (sra):
1060 // - Needs >= 2 sign bit for both operands.
1061 // - Needs >= 2 zero bits.
1062 // If the shift is unsigned (srl):
1063 // - Needs >= 1 zero bit for both operands.
1064 // - Needs 1 demanded bit zero and >= 2 sign bits.
1065 SelectionDAG &DAG = TLO.DAG;
1066 unsigned ShiftOpc = Op.getOpcode();
1067 bool IsSigned = false;
1068 unsigned KnownBits;
1069 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1070 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1071 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1072 unsigned NumZeroA =
1073 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1074 unsigned NumZeroB =
1075 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1076 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1077
1078 switch (ShiftOpc) {
1079 default:
1080 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1081 case ISD::SRA: {
1082 if (NumZero >= 2 && NumSigned < NumZero) {
1083 IsSigned = false;
1084 KnownBits = NumZero;
1085 break;
1086 }
1087 if (NumSigned >= 1) {
1088 IsSigned = true;
1089 KnownBits = NumSigned;
1090 break;
1091 }
1092 return SDValue();
1093 }
1094 case ISD::SRL: {
1095 if (NumZero >= 1 && NumSigned < NumZero) {
1096 IsSigned = false;
1097 KnownBits = NumZero;
1098 break;
1099 }
1100 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1101 IsSigned = true;
1102 KnownBits = NumSigned;
1103 break;
1104 }
1105 return SDValue();
1106 }
1107 }
1108
1109 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1110 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1111
1112 // Find the smallest power-2 type that is legal for this vector size and
1113 // operation, given the original type size and the number of known sign/zero
1114 // bits.
1115 EVT VT = Op.getValueType();
1116 unsigned MinWidth =
1117 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1118 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1120 return SDValue();
1121 if (VT.isVector())
1122 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1123 if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1124 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1125 // larger type size to do the transform.
1126 if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1127 return SDValue();
1128 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1129 Add.getOperand(1)) &&
1130 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1131 Add2.getOperand(1))))
1132 NVT = VT;
1133 else
1134 return SDValue();
1135 }
1136
1137 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1138 // this is likely to stop other folds (reassociation, value tracking etc.)
1139 if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1140 (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1141 return SDValue();
1142
1143 SDLoc DL(Op);
1144 SDValue ResultAVG =
1145 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1146 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1147 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1148}
1149
1150/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1151/// result of Op are ever used downstream. If we can use this information to
1152/// simplify Op, create a new simplified DAG node and return true, returning the
1153/// original and new nodes in Old and New. Otherwise, analyze the expression and
1154/// return a mask of Known bits for the expression (used to simplify the
1155/// caller). The Known bits may only be accurate for those bits in the
1156/// OriginalDemandedBits and OriginalDemandedElts.
1158 SDValue Op, const APInt &OriginalDemandedBits,
1159 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1160 unsigned Depth, bool AssumeSingleUse) const {
1161 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1162 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1163 "Mask size mismatches value type size!");
1164
1165 // Don't know anything.
1166 Known = KnownBits(BitWidth);
1167
1168 EVT VT = Op.getValueType();
1169 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1170 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1171 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1172 "Unexpected vector size");
1173
1174 APInt DemandedBits = OriginalDemandedBits;
1175 APInt DemandedElts = OriginalDemandedElts;
1176 SDLoc dl(Op);
1177
1178 // Undef operand.
1179 if (Op.isUndef())
1180 return false;
1181
1182 // We can't simplify target constants.
1183 if (Op.getOpcode() == ISD::TargetConstant)
1184 return false;
1185
1186 if (Op.getOpcode() == ISD::Constant) {
1187 // We know all of the bits for a constant!
1188 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1189 return false;
1190 }
1191
1192 if (Op.getOpcode() == ISD::ConstantFP) {
1193 // We know all of the bits for a floating point constant!
1195 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1196 return false;
1197 }
1198
1199 // Other users may use these bits.
1200 bool HasMultiUse = false;
1201 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1203 // Limit search depth.
1204 return false;
1205 }
1206 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1208 DemandedElts = APInt::getAllOnes(NumElts);
1209 HasMultiUse = true;
1210 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1211 // Not demanding any bits/elts from Op.
1212 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1213 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1214 // Limit search depth.
1215 return false;
1216 }
1217
1218 KnownBits Known2;
1219 switch (Op.getOpcode()) {
1220 case ISD::SCALAR_TO_VECTOR: {
1221 if (VT.isScalableVector())
1222 return false;
1223 if (!DemandedElts[0])
1224 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1225
1226 KnownBits SrcKnown;
1227 SDValue Src = Op.getOperand(0);
1228 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1229 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1230 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1231 return true;
1232
1233 // Upper elements are undef, so only get the knownbits if we just demand
1234 // the bottom element.
1235 if (DemandedElts == 1)
1236 Known = SrcKnown.anyextOrTrunc(BitWidth);
1237 break;
1238 }
1239 case ISD::BUILD_VECTOR:
1240 // Collect the known bits that are shared by every demanded element.
1241 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1242 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1243 return false; // Don't fall through, will infinitely loop.
1244 case ISD::SPLAT_VECTOR: {
1245 SDValue Scl = Op.getOperand(0);
1246 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1247 KnownBits KnownScl;
1248 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1249 return true;
1250
1251 // Implicitly truncate the bits to match the official semantics of
1252 // SPLAT_VECTOR.
1253 Known = KnownScl.trunc(BitWidth);
1254 break;
1255 }
1256 case ISD::LOAD: {
1257 auto *LD = cast<LoadSDNode>(Op);
1258 if (getTargetConstantFromLoad(LD)) {
1259 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1260 return false; // Don't fall through, will infinitely loop.
1261 }
1262 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1263 // If this is a ZEXTLoad and we are looking at the loaded value.
1264 EVT MemVT = LD->getMemoryVT();
1265 unsigned MemBits = MemVT.getScalarSizeInBits();
1266 Known.Zero.setBitsFrom(MemBits);
1267 return false; // Don't fall through, will infinitely loop.
1268 }
1269 break;
1270 }
1272 if (VT.isScalableVector())
1273 return false;
1274 SDValue Vec = Op.getOperand(0);
1275 SDValue Scl = Op.getOperand(1);
1276 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1277 EVT VecVT = Vec.getValueType();
1278
1279 // If index isn't constant, assume we need all vector elements AND the
1280 // inserted element.
1281 APInt DemandedVecElts(DemandedElts);
1282 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1283 unsigned Idx = CIdx->getZExtValue();
1284 DemandedVecElts.clearBit(Idx);
1285
1286 // Inserted element is not required.
1287 if (!DemandedElts[Idx])
1288 return TLO.CombineTo(Op, Vec);
1289 }
1290
1291 KnownBits KnownScl;
1292 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1293 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1294 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1295 return true;
1296
1297 Known = KnownScl.anyextOrTrunc(BitWidth);
1298
1299 KnownBits KnownVec;
1300 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1301 Depth + 1))
1302 return true;
1303
1304 if (!!DemandedVecElts)
1305 Known = Known.intersectWith(KnownVec);
1306
1307 return false;
1308 }
1309 case ISD::INSERT_SUBVECTOR: {
1310 if (VT.isScalableVector())
1311 return false;
1312 // Demand any elements from the subvector and the remainder from the src its
1313 // inserted into.
1314 SDValue Src = Op.getOperand(0);
1315 SDValue Sub = Op.getOperand(1);
1316 uint64_t Idx = Op.getConstantOperandVal(2);
1317 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1318 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1319 APInt DemandedSrcElts = DemandedElts;
1320 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
1321
1322 KnownBits KnownSub, KnownSrc;
1323 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1324 Depth + 1))
1325 return true;
1326 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1327 Depth + 1))
1328 return true;
1329
1330 Known.setAllConflict();
1331 if (!!DemandedSubElts)
1332 Known = Known.intersectWith(KnownSub);
1333 if (!!DemandedSrcElts)
1334 Known = Known.intersectWith(KnownSrc);
1335
1336 // Attempt to avoid multi-use src if we don't need anything from it.
1337 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1338 !DemandedSrcElts.isAllOnes()) {
1340 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1342 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1343 if (NewSub || NewSrc) {
1344 NewSub = NewSub ? NewSub : Sub;
1345 NewSrc = NewSrc ? NewSrc : Src;
1346 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1347 Op.getOperand(2));
1348 return TLO.CombineTo(Op, NewOp);
1349 }
1350 }
1351 break;
1352 }
1354 if (VT.isScalableVector())
1355 return false;
1356 // Offset the demanded elts by the subvector index.
1357 SDValue Src = Op.getOperand(0);
1358 if (Src.getValueType().isScalableVector())
1359 break;
1360 uint64_t Idx = Op.getConstantOperandVal(1);
1361 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1362 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1363
1364 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1365 Depth + 1))
1366 return true;
1367
1368 // Attempt to avoid multi-use src if we don't need anything from it.
1369 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1371 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1372 if (DemandedSrc) {
1373 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1374 Op.getOperand(1));
1375 return TLO.CombineTo(Op, NewOp);
1376 }
1377 }
1378 break;
1379 }
1380 case ISD::CONCAT_VECTORS: {
1381 if (VT.isScalableVector())
1382 return false;
1383 Known.setAllConflict();
1384 EVT SubVT = Op.getOperand(0).getValueType();
1385 unsigned NumSubVecs = Op.getNumOperands();
1386 unsigned NumSubElts = SubVT.getVectorNumElements();
1387 for (unsigned i = 0; i != NumSubVecs; ++i) {
1388 APInt DemandedSubElts =
1389 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1390 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1391 Known2, TLO, Depth + 1))
1392 return true;
1393 // Known bits are shared by every demanded subvector element.
1394 if (!!DemandedSubElts)
1395 Known = Known.intersectWith(Known2);
1396 }
1397 break;
1398 }
1399 case ISD::VECTOR_SHUFFLE: {
1400 assert(!VT.isScalableVector());
1401 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1402
1403 // Collect demanded elements from shuffle operands..
1404 APInt DemandedLHS, DemandedRHS;
1405 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1406 DemandedRHS))
1407 break;
1408
1409 if (!!DemandedLHS || !!DemandedRHS) {
1410 SDValue Op0 = Op.getOperand(0);
1411 SDValue Op1 = Op.getOperand(1);
1412
1413 Known.setAllConflict();
1414 if (!!DemandedLHS) {
1415 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1416 Depth + 1))
1417 return true;
1418 Known = Known.intersectWith(Known2);
1419 }
1420 if (!!DemandedRHS) {
1421 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1422 Depth + 1))
1423 return true;
1424 Known = Known.intersectWith(Known2);
1425 }
1426
1427 // Attempt to avoid multi-use ops if we don't need anything from them.
1429 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1431 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1432 if (DemandedOp0 || DemandedOp1) {
1433 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1434 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1435 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1436 return TLO.CombineTo(Op, NewOp);
1437 }
1438 }
1439 break;
1440 }
1441 case ISD::AND: {
1442 SDValue Op0 = Op.getOperand(0);
1443 SDValue Op1 = Op.getOperand(1);
1444
1445 // If the RHS is a constant, check to see if the LHS would be zero without
1446 // using the bits from the RHS. Below, we use knowledge about the RHS to
1447 // simplify the LHS, here we're using information from the LHS to simplify
1448 // the RHS.
1449 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1450 // Do not increment Depth here; that can cause an infinite loop.
1451 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1452 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1453 if ((LHSKnown.Zero & DemandedBits) ==
1454 (~RHSC->getAPIntValue() & DemandedBits))
1455 return TLO.CombineTo(Op, Op0);
1456
1457 // If any of the set bits in the RHS are known zero on the LHS, shrink
1458 // the constant.
1459 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1460 DemandedElts, TLO))
1461 return true;
1462
1463 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1464 // constant, but if this 'and' is only clearing bits that were just set by
1465 // the xor, then this 'and' can be eliminated by shrinking the mask of
1466 // the xor. For example, for a 32-bit X:
1467 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1468 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1469 LHSKnown.One == ~RHSC->getAPIntValue()) {
1470 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1471 return TLO.CombineTo(Op, Xor);
1472 }
1473 }
1474
1475 // (X +/- Y) & Y --> ~X & Y when Y is a power of 2 (or zero).
1476 SDValue X, Y;
1477 if (sd_match(Op,
1478 m_And(m_Value(Y),
1480 m_Sub(m_Value(X), m_Deferred(Y)))))) &&
1481 TLO.DAG.isKnownToBeAPowerOfTwo(Y, DemandedElts, /*OrZero=*/true)) {
1482 return TLO.CombineTo(
1483 Op, TLO.DAG.getNode(ISD::AND, dl, VT, TLO.DAG.getNOT(dl, X, VT), Y));
1484 }
1485
1486 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1487 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1488 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1489 (Op0.getOperand(0).isUndef() ||
1491 Op0->hasOneUse()) {
1492 unsigned NumSubElts =
1494 unsigned SubIdx = Op0.getConstantOperandVal(2);
1495 APInt DemandedSub =
1496 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1497 KnownBits KnownSubMask =
1498 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1499 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1500 SDValue NewAnd =
1501 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1502 SDValue NewInsert =
1503 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1504 Op0.getOperand(1), Op0.getOperand(2));
1505 return TLO.CombineTo(Op, NewInsert);
1506 }
1507 }
1508
1509 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1510 Depth + 1))
1511 return true;
1512 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1513 Known2, TLO, Depth + 1))
1514 return true;
1515
1516 // If all of the demanded bits are known one on one side, return the other.
1517 // These bits cannot contribute to the result of the 'and'.
1518 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1519 return TLO.CombineTo(Op, Op0);
1520 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1521 return TLO.CombineTo(Op, Op1);
1522 // If all of the demanded bits in the inputs are known zeros, return zero.
1523 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1524 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1525 // If the RHS is a constant, see if we can simplify it.
1526 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1527 TLO))
1528 return true;
1529 // If the operation can be done in a smaller type, do so.
1531 return true;
1532
1533 // Attempt to avoid multi-use ops if we don't need anything from them.
1534 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1536 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1538 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1539 if (DemandedOp0 || DemandedOp1) {
1540 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1541 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1542 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1543 return TLO.CombineTo(Op, NewOp);
1544 }
1545 }
1546
1547 Known &= Known2;
1548 break;
1549 }
1550 case ISD::OR: {
1551 SDValue Op0 = Op.getOperand(0);
1552 SDValue Op1 = Op.getOperand(1);
1553 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1554 Depth + 1)) {
1555 Op->dropFlags(SDNodeFlags::Disjoint);
1556 return true;
1557 }
1558
1559 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1560 Known2, TLO, Depth + 1)) {
1561 Op->dropFlags(SDNodeFlags::Disjoint);
1562 return true;
1563 }
1564
1565 // If all of the demanded bits are known zero on one side, return the other.
1566 // These bits cannot contribute to the result of the 'or'.
1567 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1568 return TLO.CombineTo(Op, Op0);
1569 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1570 return TLO.CombineTo(Op, Op1);
1571 // If the RHS is a constant, see if we can simplify it.
1572 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1573 return true;
1574 // If the operation can be done in a smaller type, do so.
1576 return true;
1577
1578 // Attempt to avoid multi-use ops if we don't need anything from them.
1579 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1581 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1583 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1584 if (DemandedOp0 || DemandedOp1) {
1585 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1586 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1587 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1588 return TLO.CombineTo(Op, NewOp);
1589 }
1590 }
1591
1592 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1593 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1594 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1595 Op0->hasOneUse() && Op1->hasOneUse()) {
1596 // Attempt to match all commutations - m_c_Or would've been useful!
1597 for (int I = 0; I != 2; ++I) {
1598 SDValue X = Op.getOperand(I).getOperand(0);
1599 SDValue C1 = Op.getOperand(I).getOperand(1);
1600 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1601 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1602 if (Alt.getOpcode() == ISD::OR) {
1603 for (int J = 0; J != 2; ++J) {
1604 if (X == Alt.getOperand(J)) {
1605 SDValue Y = Alt.getOperand(1 - J);
1606 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1607 {C1, C2})) {
1608 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1609 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1610 return TLO.CombineTo(
1611 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1612 }
1613 }
1614 }
1615 }
1616 }
1617 }
1618
1619 Known |= Known2;
1620 break;
1621 }
1622 case ISD::XOR: {
1623 SDValue Op0 = Op.getOperand(0);
1624 SDValue Op1 = Op.getOperand(1);
1625
1626 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1627 Depth + 1))
1628 return true;
1629 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1630 Depth + 1))
1631 return true;
1632
1633 // If all of the demanded bits are known zero on one side, return the other.
1634 // These bits cannot contribute to the result of the 'xor'.
1635 if (DemandedBits.isSubsetOf(Known.Zero))
1636 return TLO.CombineTo(Op, Op0);
1637 if (DemandedBits.isSubsetOf(Known2.Zero))
1638 return TLO.CombineTo(Op, Op1);
1639 // If the operation can be done in a smaller type, do so.
1641 return true;
1642
1643 // If all of the unknown bits are known to be zero on one side or the other
1644 // turn this into an *inclusive* or.
1645 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1646 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1647 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1648
1649 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1650 if (C) {
1651 // If one side is a constant, and all of the set bits in the constant are
1652 // also known set on the other side, turn this into an AND, as we know
1653 // the bits will be cleared.
1654 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1655 // NB: it is okay if more bits are known than are requested
1656 if (C->getAPIntValue() == Known2.One) {
1657 SDValue ANDC =
1658 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1659 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1660 }
1661
1662 // If the RHS is a constant, see if we can change it. Don't alter a -1
1663 // constant because that's a 'not' op, and that is better for combining
1664 // and codegen.
1665 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1666 // We're flipping all demanded bits. Flip the undemanded bits too.
1667 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1668 return TLO.CombineTo(Op, New);
1669 }
1670
1671 unsigned Op0Opcode = Op0.getOpcode();
1672 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1673 if (ConstantSDNode *ShiftC =
1674 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1675 // Don't crash on an oversized shift. We can not guarantee that a
1676 // bogus shift has been simplified to undef.
1677 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1678 uint64_t ShiftAmt = ShiftC->getZExtValue();
1680 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1681 : Ones.lshr(ShiftAmt);
1682 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1684 // If the xor constant is a demanded mask, do a 'not' before the
1685 // shift:
1686 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1687 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1688 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1689 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1690 Op0.getOperand(1)));
1691 }
1692 }
1693 }
1694 }
1695 }
1696
1697 // If we can't turn this into a 'not', try to shrink the constant.
1698 if (!C || !C->isAllOnes())
1699 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1700 return true;
1701
1702 // Attempt to avoid multi-use ops if we don't need anything from them.
1703 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1705 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1707 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1708 if (DemandedOp0 || DemandedOp1) {
1709 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1710 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1711 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1712 return TLO.CombineTo(Op, NewOp);
1713 }
1714 }
1715
1716 Known ^= Known2;
1717 break;
1718 }
1719 case ISD::SELECT:
1720 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1721 Known, TLO, Depth + 1))
1722 return true;
1723 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1724 Known2, TLO, Depth + 1))
1725 return true;
1726
1727 // If the operands are constants, see if we can simplify them.
1728 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1729 return true;
1730
1731 // Only known if known in both the LHS and RHS.
1732 Known = Known.intersectWith(Known2);
1733 break;
1734 case ISD::VSELECT:
1735 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1736 Known, TLO, Depth + 1))
1737 return true;
1738 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1739 Known2, TLO, Depth + 1))
1740 return true;
1741
1742 // Only known if known in both the LHS and RHS.
1743 Known = Known.intersectWith(Known2);
1744 break;
1745 case ISD::SELECT_CC:
1746 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1747 Known, TLO, Depth + 1))
1748 return true;
1749 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1750 Known2, TLO, Depth + 1))
1751 return true;
1752
1753 // If the operands are constants, see if we can simplify them.
1754 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1755 return true;
1756
1757 // Only known if known in both the LHS and RHS.
1758 Known = Known.intersectWith(Known2);
1759 break;
1760 case ISD::SETCC: {
1761 SDValue Op0 = Op.getOperand(0);
1762 SDValue Op1 = Op.getOperand(1);
1763 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1764 // If we're testing X < 0, X >= 0, X <= -1 or X > -1
1765 // (X is of integer type) then we only need the sign mask of the previous
1766 // result
1767 if (Op1.getValueType().isInteger() &&
1768 (((CC == ISD::SETLT || CC == ISD::SETGE) && isNullOrNullSplat(Op1)) ||
1769 ((CC == ISD::SETLE || CC == ISD::SETGT) &&
1770 isAllOnesOrAllOnesSplat(Op1)))) {
1771 KnownBits KnownOp0;
1774 DemandedElts, KnownOp0, TLO, Depth + 1))
1775 return true;
1776 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1777 // width as the setcc result, and (3) the result of a setcc conforms to 0
1778 // or -1, we may be able to bypass the setcc.
1779 if (DemandedBits.isSignMask() &&
1783 // If we remove a >= 0 or > -1 (for integers), we need to introduce a
1784 // NOT Operation
1785 if (CC == ISD::SETGE || CC == ISD::SETGT) {
1786 SDLoc DL(Op);
1787 EVT VT = Op0.getValueType();
1788 SDValue NotOp0 = TLO.DAG.getNOT(DL, Op0, VT);
1789 return TLO.CombineTo(Op, NotOp0);
1790 }
1791 return TLO.CombineTo(Op, Op0);
1792 }
1793 }
1794 if (getBooleanContents(Op0.getValueType()) ==
1796 BitWidth > 1)
1797 Known.Zero.setBitsFrom(1);
1798 break;
1799 }
1800 case ISD::SHL: {
1801 SDValue Op0 = Op.getOperand(0);
1802 SDValue Op1 = Op.getOperand(1);
1803 EVT ShiftVT = Op1.getValueType();
1804
1805 if (std::optional<unsigned> KnownSA =
1806 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1807 unsigned ShAmt = *KnownSA;
1808 if (ShAmt == 0)
1809 return TLO.CombineTo(Op, Op0);
1810
1811 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1812 // single shift. We can do this if the bottom bits (which are shifted
1813 // out) are never demanded.
1814 // TODO - support non-uniform vector amounts.
1815 if (Op0.getOpcode() == ISD::SRL) {
1816 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1817 if (std::optional<unsigned> InnerSA =
1818 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1819 unsigned C1 = *InnerSA;
1820 unsigned Opc = ISD::SHL;
1821 int Diff = ShAmt - C1;
1822 if (Diff < 0) {
1823 Diff = -Diff;
1824 Opc = ISD::SRL;
1825 }
1826 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1827 return TLO.CombineTo(
1828 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1829 }
1830 }
1831 }
1832
1833 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1834 // are not demanded. This will likely allow the anyext to be folded away.
1835 // TODO - support non-uniform vector amounts.
1836 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1837 SDValue InnerOp = Op0.getOperand(0);
1838 EVT InnerVT = InnerOp.getValueType();
1839 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1840 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1841 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1842 SDValue NarrowShl = TLO.DAG.getNode(
1843 ISD::SHL, dl, InnerVT, InnerOp,
1844 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1845 return TLO.CombineTo(
1846 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1847 }
1848
1849 // Repeat the SHL optimization above in cases where an extension
1850 // intervenes: (shl (anyext (shr x, c1)), c2) to
1851 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1852 // aren't demanded (as above) and that the shifted upper c1 bits of
1853 // x aren't demanded.
1854 // TODO - support non-uniform vector amounts.
1855 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1856 InnerOp.hasOneUse()) {
1857 if (std::optional<unsigned> SA2 = TLO.DAG.getValidShiftAmount(
1858 InnerOp, DemandedElts, Depth + 2)) {
1859 unsigned InnerShAmt = *SA2;
1860 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1861 DemandedBits.getActiveBits() <=
1862 (InnerBits - InnerShAmt + ShAmt) &&
1863 DemandedBits.countr_zero() >= ShAmt) {
1864 SDValue NewSA =
1865 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1866 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1867 InnerOp.getOperand(0));
1868 return TLO.CombineTo(
1869 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1870 }
1871 }
1872 }
1873 }
1874
1875 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1876 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1877 Depth + 1)) {
1878 // Disable the nsw and nuw flags. We can no longer guarantee that we
1879 // won't wrap after simplification.
1880 Op->dropFlags(SDNodeFlags::NoWrap);
1881 return true;
1882 }
1883 Known <<= ShAmt;
1884 // low bits known zero.
1885 Known.Zero.setLowBits(ShAmt);
1886
1887 // Attempt to avoid multi-use ops if we don't need anything from them.
1888 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1890 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1891 if (DemandedOp0) {
1892 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1893 return TLO.CombineTo(Op, NewOp);
1894 }
1895 }
1896
1897 // TODO: Can we merge this fold with the one below?
1898 // Try shrinking the operation as long as the shift amount will still be
1899 // in range.
1900 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1901 Op.getNode()->hasOneUse()) {
1902 // Search for the smallest integer type with free casts to and from
1903 // Op's type. For expedience, just check power-of-2 integer types.
1904 unsigned DemandedSize = DemandedBits.getActiveBits();
1905 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1906 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1907 EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1908 if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&
1909 isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1910 isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1911 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1912 assert(DemandedSize <= SmallVTBits &&
1913 "Narrowed below demanded bits?");
1914 // We found a type with free casts.
1915 SDValue NarrowShl = TLO.DAG.getNode(
1916 ISD::SHL, dl, SmallVT,
1917 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1918 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1919 return TLO.CombineTo(
1920 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1921 }
1922 }
1923 }
1924
1925 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1926 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1927 // Only do this if we demand the upper half so the knownbits are correct.
1928 unsigned HalfWidth = BitWidth / 2;
1929 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1930 DemandedBits.countLeadingOnes() >= HalfWidth) {
1931 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1932 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1933 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1934 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1935 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1936 // If we're demanding the upper bits at all, we must ensure
1937 // that the upper bits of the shift result are known to be zero,
1938 // which is equivalent to the narrow shift being NUW.
1939 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1940 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1941 SDNodeFlags Flags;
1942 Flags.setNoSignedWrap(IsNSW);
1943 Flags.setNoUnsignedWrap(IsNUW);
1944 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1945 SDValue NewShiftAmt =
1946 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1947 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1948 NewShiftAmt, Flags);
1949 SDValue NewExt =
1950 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1951 return TLO.CombineTo(Op, NewExt);
1952 }
1953 }
1954 }
1955 } else {
1956 // This is a variable shift, so we can't shift the demand mask by a known
1957 // amount. But if we are not demanding high bits, then we are not
1958 // demanding those bits from the pre-shifted operand either.
1959 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1960 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1961 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1962 Depth + 1)) {
1963 // Disable the nsw and nuw flags. We can no longer guarantee that we
1964 // won't wrap after simplification.
1965 Op->dropFlags(SDNodeFlags::NoWrap);
1966 return true;
1967 }
1968 Known.resetAll();
1969 }
1970 }
1971
1972 // If we are only demanding sign bits then we can use the shift source
1973 // directly.
1974 if (std::optional<unsigned> MaxSA =
1975 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1976 unsigned ShAmt = *MaxSA;
1977 unsigned NumSignBits =
1978 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1979 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1980 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1981 return TLO.CombineTo(Op, Op0);
1982 }
1983 break;
1984 }
1985 case ISD::SRL: {
1986 SDValue Op0 = Op.getOperand(0);
1987 SDValue Op1 = Op.getOperand(1);
1988 EVT ShiftVT = Op1.getValueType();
1989
1990 if (std::optional<unsigned> KnownSA =
1991 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1992 unsigned ShAmt = *KnownSA;
1993 if (ShAmt == 0)
1994 return TLO.CombineTo(Op, Op0);
1995
1996 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1997 // single shift. We can do this if the top bits (which are shifted out)
1998 // are never demanded.
1999 // TODO - support non-uniform vector amounts.
2000 if (Op0.getOpcode() == ISD::SHL) {
2001 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
2002 if (std::optional<unsigned> InnerSA =
2003 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2004 unsigned C1 = *InnerSA;
2005 unsigned Opc = ISD::SRL;
2006 int Diff = ShAmt - C1;
2007 if (Diff < 0) {
2008 Diff = -Diff;
2009 Opc = ISD::SHL;
2010 }
2011 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
2012 return TLO.CombineTo(
2013 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
2014 }
2015 }
2016 }
2017
2018 // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
2019 // single sra. We can do this if the top bits are never demanded.
2020 if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
2021 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
2022 if (std::optional<unsigned> InnerSA =
2023 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2024 unsigned C1 = *InnerSA;
2025 // Clamp the combined shift amount if it exceeds the bit width.
2026 unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
2027 SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
2028 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
2029 Op0.getOperand(0), NewSA));
2030 }
2031 }
2032 }
2033
2034 APInt InDemandedMask = (DemandedBits << ShAmt);
2035
2036 // If the shift is exact, then it does demand the low bits (and knows that
2037 // they are zero).
2038 if (Op->getFlags().hasExact())
2039 InDemandedMask.setLowBits(ShAmt);
2040
2041 // Narrow shift to lower half - similar to ShrinkDemandedOp.
2042 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2043 if ((BitWidth % 2) == 0 && !VT.isVector()) {
2045 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
2046 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
2047 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
2048 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
2049 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
2050 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2051 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
2052 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
2053 SDValue NewShiftAmt =
2054 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
2055 SDValue NewShift =
2056 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
2057 return TLO.CombineTo(
2058 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
2059 }
2060 }
2061
2062 // Compute the new bits that are at the top now.
2063 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2064 Depth + 1))
2065 return true;
2066 Known >>= ShAmt;
2067 // High bits known zero.
2068 Known.Zero.setHighBits(ShAmt);
2069
2070 // Attempt to avoid multi-use ops if we don't need anything from them.
2071 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2073 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2074 if (DemandedOp0) {
2075 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2076 return TLO.CombineTo(Op, NewOp);
2077 }
2078 }
2079 } else {
2080 // Use generic knownbits computation as it has support for non-uniform
2081 // shift amounts.
2082 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2083 }
2084
2085 // If we are only demanding sign bits then we can use the shift source
2086 // directly.
2087 if (std::optional<unsigned> MaxSA =
2088 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
2089 unsigned ShAmt = *MaxSA;
2090 // Must already be signbits in DemandedBits bounds, and can't demand any
2091 // shifted in zeroes.
2092 if (DemandedBits.countl_zero() >= ShAmt) {
2093 unsigned NumSignBits =
2094 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
2095 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2096 return TLO.CombineTo(Op, Op0);
2097 }
2098 }
2099
2100 // Try to match AVG patterns (after shift simplification).
2101 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2102 DemandedElts, Depth + 1))
2103 return TLO.CombineTo(Op, AVG);
2104
2105 break;
2106 }
2107 case ISD::SRA: {
2108 SDValue Op0 = Op.getOperand(0);
2109 SDValue Op1 = Op.getOperand(1);
2110 EVT ShiftVT = Op1.getValueType();
2111
2112 // If we only want bits that already match the signbit then we don't need
2113 // to shift.
2114 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2115 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2116 NumHiDemandedBits)
2117 return TLO.CombineTo(Op, Op0);
2118
2119 // If this is an arithmetic shift right and only the low-bit is set, we can
2120 // always convert this into a logical shr, even if the shift amount is
2121 // variable. The low bit of the shift cannot be an input sign bit unless
2122 // the shift amount is >= the size of the datatype, which is undefined.
2123 if (DemandedBits.isOne())
2124 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2125
2126 if (std::optional<unsigned> KnownSA =
2127 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2128 unsigned ShAmt = *KnownSA;
2129 if (ShAmt == 0)
2130 return TLO.CombineTo(Op, Op0);
2131
2132 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2133 // supports sext_inreg.
2134 if (Op0.getOpcode() == ISD::SHL) {
2135 if (std::optional<unsigned> InnerSA =
2136 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2137 unsigned LowBits = BitWidth - ShAmt;
2138 EVT ExtVT = VT.changeElementType(
2139 *TLO.DAG.getContext(),
2140 EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits));
2141
2142 if (*InnerSA == ShAmt) {
2143 if (!TLO.LegalOperations() ||
2145 return TLO.CombineTo(
2146 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2147 Op0.getOperand(0),
2148 TLO.DAG.getValueType(ExtVT)));
2149
2150 // Even if we can't convert to sext_inreg, we might be able to
2151 // remove this shift pair if the input is already sign extended.
2152 unsigned NumSignBits =
2153 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2154 if (NumSignBits > ShAmt)
2155 return TLO.CombineTo(Op, Op0.getOperand(0));
2156 }
2157 }
2158 }
2159
2160 APInt InDemandedMask = (DemandedBits << ShAmt);
2161
2162 // If the shift is exact, then it does demand the low bits (and knows that
2163 // they are zero).
2164 if (Op->getFlags().hasExact())
2165 InDemandedMask.setLowBits(ShAmt);
2166
2167 // If any of the demanded bits are produced by the sign extension, we also
2168 // demand the input sign bit.
2169 if (DemandedBits.countl_zero() < ShAmt)
2170 InDemandedMask.setSignBit();
2171
2172 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2173 Depth + 1))
2174 return true;
2175 Known >>= ShAmt;
2176
2177 // If the input sign bit is known to be zero, or if none of the top bits
2178 // are demanded, turn this into an unsigned shift right.
2179 if (Known.Zero[BitWidth - ShAmt - 1] ||
2180 DemandedBits.countl_zero() >= ShAmt) {
2181 SDNodeFlags Flags;
2182 Flags.setExact(Op->getFlags().hasExact());
2183 return TLO.CombineTo(
2184 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2185 }
2186
2187 int Log2 = DemandedBits.exactLogBase2();
2188 if (Log2 >= 0) {
2189 // The bit must come from the sign.
2190 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2191 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2192 }
2193
2194 if (Known.One[BitWidth - ShAmt - 1])
2195 // New bits are known one.
2196 Known.One.setHighBits(ShAmt);
2197
2198 // Attempt to avoid multi-use ops if we don't need anything from them.
2199 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2201 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2202 if (DemandedOp0) {
2203 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2204 return TLO.CombineTo(Op, NewOp);
2205 }
2206 }
2207 }
2208
2209 // Try to match AVG patterns (after shift simplification).
2210 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2211 DemandedElts, Depth + 1))
2212 return TLO.CombineTo(Op, AVG);
2213
2214 break;
2215 }
2216 case ISD::FSHL:
2217 case ISD::FSHR: {
2218 SDValue Op0 = Op.getOperand(0);
2219 SDValue Op1 = Op.getOperand(1);
2220 SDValue Op2 = Op.getOperand(2);
2221 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2222
2223 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2224 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2225
2226 // For fshl, 0-shift returns the 1st arg.
2227 // For fshr, 0-shift returns the 2nd arg.
2228 if (Amt == 0) {
2229 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2230 Known, TLO, Depth + 1))
2231 return true;
2232 break;
2233 }
2234
2235 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2236 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2237 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2238 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2239 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2240 Depth + 1))
2241 return true;
2242 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2243 Depth + 1))
2244 return true;
2245
2246 Known2 <<= (IsFSHL ? Amt : (BitWidth - Amt));
2247 Known >>= (IsFSHL ? (BitWidth - Amt) : Amt);
2248 Known = Known.unionWith(Known2);
2249
2250 // Attempt to avoid multi-use ops if we don't need anything from them.
2251 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2252 !DemandedElts.isAllOnes()) {
2254 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2256 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2257 if (DemandedOp0 || DemandedOp1) {
2258 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2259 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2260 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2261 DemandedOp1, Op2);
2262 return TLO.CombineTo(Op, NewOp);
2263 }
2264 }
2265 }
2266
2267 if (isPowerOf2_32(BitWidth)) {
2268 // Fold FSHR(Op0,Op1,Op2) -> SRL(Op1,Op2)
2269 // iff we're guaranteed not to use Op0.
2270 // TODO: Add FSHL equivalent?
2271 if (!IsFSHL && !DemandedBits.isAllOnes() &&
2272 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT))) {
2273 KnownBits KnownAmt =
2274 TLO.DAG.computeKnownBits(Op2, DemandedElts, Depth + 1);
2275 unsigned MaxShiftAmt =
2276 KnownAmt.getMaxValue().getLimitedValue(BitWidth - 1);
2277 // Check we don't demand any shifted bits outside Op1.
2278 if (DemandedBits.countl_zero() >= MaxShiftAmt) {
2279 EVT AmtVT = Op2.getValueType();
2280 SDValue NewAmt =
2281 TLO.DAG.getNode(ISD::AND, dl, AmtVT, Op2,
2282 TLO.DAG.getConstant(BitWidth - 1, dl, AmtVT));
2283 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, Op1, NewAmt);
2284 return TLO.CombineTo(Op, NewOp);
2285 }
2286 }
2287
2288 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2289 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2290 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts, Known2, TLO,
2291 Depth + 1))
2292 return true;
2293 }
2294 break;
2295 }
2296 case ISD::ROTL:
2297 case ISD::ROTR: {
2298 SDValue Op0 = Op.getOperand(0);
2299 SDValue Op1 = Op.getOperand(1);
2300 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2301
2302 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2303 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2304 return TLO.CombineTo(Op, Op0);
2305
2306 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2307 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2308 unsigned RevAmt = BitWidth - Amt;
2309
2310 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2311 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2312 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2313 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2314 Depth + 1))
2315 return true;
2316
2317 // rot*(x, 0) --> x
2318 if (Amt == 0)
2319 return TLO.CombineTo(Op, Op0);
2320
2321 // See if we don't demand either half of the rotated bits.
2322 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2323 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2324 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2325 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2326 }
2327 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2328 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2329 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2330 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2331 }
2332 }
2333
2334 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2335 if (isPowerOf2_32(BitWidth)) {
2336 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2337 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2338 Depth + 1))
2339 return true;
2340 }
2341 break;
2342 }
2343 case ISD::SMIN:
2344 case ISD::SMAX:
2345 case ISD::UMIN:
2346 case ISD::UMAX: {
2347 unsigned Opc = Op.getOpcode();
2348 SDValue Op0 = Op.getOperand(0);
2349 SDValue Op1 = Op.getOperand(1);
2350
2351 // If we're only demanding signbits, then we can simplify to OR/AND node.
2352 unsigned BitOp =
2353 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2354 unsigned NumSignBits =
2355 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2356 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2357 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2358 if (NumSignBits >= NumDemandedUpperBits)
2359 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2360
2361 // Check if one arg is always less/greater than (or equal) to the other arg.
2362 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2363 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2364 switch (Opc) {
2365 case ISD::SMIN:
2366 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2367 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2368 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2369 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2370 Known = KnownBits::smin(Known0, Known1);
2371 break;
2372 case ISD::SMAX:
2373 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2374 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2375 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2376 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2377 Known = KnownBits::smax(Known0, Known1);
2378 break;
2379 case ISD::UMIN:
2380 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2381 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2382 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2383 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2384 Known = KnownBits::umin(Known0, Known1);
2385 break;
2386 case ISD::UMAX:
2387 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2388 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2389 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2390 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2391 Known = KnownBits::umax(Known0, Known1);
2392 break;
2393 }
2394 break;
2395 }
2396 case ISD::BITREVERSE: {
2397 SDValue Src = Op.getOperand(0);
2398 APInt DemandedSrcBits = DemandedBits.reverseBits();
2399 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2400 Depth + 1))
2401 return true;
2402 Known = Known2.reverseBits();
2403 break;
2404 }
2405 case ISD::BSWAP: {
2406 SDValue Src = Op.getOperand(0);
2407
2408 // If the only bits demanded come from one byte of the bswap result,
2409 // just shift the input byte into position to eliminate the bswap.
2410 unsigned NLZ = DemandedBits.countl_zero();
2411 unsigned NTZ = DemandedBits.countr_zero();
2412
2413 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2414 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2415 // have 14 leading zeros, round to 8.
2416 NLZ = alignDown(NLZ, 8);
2417 NTZ = alignDown(NTZ, 8);
2418 // If we need exactly one byte, we can do this transformation.
2419 if (BitWidth - NLZ - NTZ == 8) {
2420 // Replace this with either a left or right shift to get the byte into
2421 // the right place.
2422 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2423 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2424 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2425 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2426 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2427 return TLO.CombineTo(Op, NewOp);
2428 }
2429 }
2430
2431 APInt DemandedSrcBits = DemandedBits.byteSwap();
2432 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2433 Depth + 1))
2434 return true;
2435 Known = Known2.byteSwap();
2436 break;
2437 }
2438 case ISD::CTPOP: {
2439 // If only 1 bit is demanded, replace with PARITY as long as we're before
2440 // op legalization.
2441 // FIXME: Limit to scalars for now.
2442 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2443 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2444 Op.getOperand(0)));
2445
2446 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2447 break;
2448 }
2450 SDValue Op0 = Op.getOperand(0);
2451 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2452 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2453
2454 // If we only care about the highest bit, don't bother shifting right.
2455 if (DemandedBits.isSignMask()) {
2456 unsigned MinSignedBits =
2457 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2458 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2459 // However if the input is already sign extended we expect the sign
2460 // extension to be dropped altogether later and do not simplify.
2461 if (!AlreadySignExtended) {
2462 // Compute the correct shift amount type, which must be getShiftAmountTy
2463 // for scalar types after legalization.
2464 SDValue ShiftAmt =
2465 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2466 return TLO.CombineTo(Op,
2467 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2468 }
2469 }
2470
2471 // If none of the extended bits are demanded, eliminate the sextinreg.
2472 if (DemandedBits.getActiveBits() <= ExVTBits)
2473 return TLO.CombineTo(Op, Op0);
2474
2475 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2476
2477 // Since the sign extended bits are demanded, we know that the sign
2478 // bit is demanded.
2479 InputDemandedBits.setBit(ExVTBits - 1);
2480
2481 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2482 Depth + 1))
2483 return true;
2484
2485 // If the sign bit of the input is known set or clear, then we know the
2486 // top bits of the result.
2487
2488 // If the input sign bit is known zero, convert this into a zero extension.
2489 if (Known.Zero[ExVTBits - 1])
2490 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2491
2492 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2493 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2494 Known.One.setBitsFrom(ExVTBits);
2495 Known.Zero &= Mask;
2496 } else { // Input sign bit unknown
2497 Known.Zero &= Mask;
2498 Known.One &= Mask;
2499 }
2500 break;
2501 }
2502 case ISD::BUILD_PAIR: {
2503 EVT HalfVT = Op.getOperand(0).getValueType();
2504 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2505
2506 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2507 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2508
2509 KnownBits KnownLo, KnownHi;
2510
2511 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2512 return true;
2513
2514 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2515 return true;
2516
2517 Known = KnownHi.concat(KnownLo);
2518 break;
2519 }
2521 if (VT.isScalableVector())
2522 return false;
2523 [[fallthrough]];
2524 case ISD::ZERO_EXTEND: {
2525 SDValue Src = Op.getOperand(0);
2526 EVT SrcVT = Src.getValueType();
2527 unsigned InBits = SrcVT.getScalarSizeInBits();
2528 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2529 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2530
2531 // If none of the top bits are demanded, convert this into an any_extend.
2532 if (DemandedBits.getActiveBits() <= InBits) {
2533 // If we only need the non-extended bits of the bottom element
2534 // then we can just bitcast to the result.
2535 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2536 VT.getSizeInBits() == SrcVT.getSizeInBits())
2537 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2538
2539 unsigned Opc =
2541 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2542 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2543 }
2544
2545 APInt InDemandedBits = DemandedBits.trunc(InBits);
2546 APInt InDemandedElts = DemandedElts.zext(InElts);
2547 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2548 Depth + 1)) {
2549 Op->dropFlags(SDNodeFlags::NonNeg);
2550 return true;
2551 }
2552 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2553 Known = Known.zext(BitWidth);
2554
2555 // Attempt to avoid multi-use ops if we don't need anything from them.
2557 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2558 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2559 break;
2560 }
2562 if (VT.isScalableVector())
2563 return false;
2564 [[fallthrough]];
2565 case ISD::SIGN_EXTEND: {
2566 SDValue Src = Op.getOperand(0);
2567 EVT SrcVT = Src.getValueType();
2568 unsigned InBits = SrcVT.getScalarSizeInBits();
2569 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2570 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2571
2572 APInt InDemandedElts = DemandedElts.zext(InElts);
2573 APInt InDemandedBits = DemandedBits.trunc(InBits);
2574
2575 // Since some of the sign extended bits are demanded, we know that the sign
2576 // bit is demanded.
2577 InDemandedBits.setBit(InBits - 1);
2578
2579 // If none of the top bits are demanded, convert this into an any_extend.
2580 if (DemandedBits.getActiveBits() <= InBits) {
2581 // If we only need the non-extended bits of the bottom element
2582 // then we can just bitcast to the result.
2583 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2584 VT.getSizeInBits() == SrcVT.getSizeInBits())
2585 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2586
2587 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2589 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2590 InBits) {
2591 unsigned Opc =
2593 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2594 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2595 }
2596 }
2597
2598 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2599 Depth + 1))
2600 return true;
2601 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2602
2603 // If the sign bit is known one, the top bits match.
2604 Known = Known.sext(BitWidth);
2605
2606 // If the sign bit is known zero, convert this to a zero extend.
2607 if (Known.isNonNegative()) {
2608 unsigned Opc =
2610 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2611 SDNodeFlags Flags;
2612 if (!IsVecInReg)
2613 Flags |= SDNodeFlags::NonNeg;
2614 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2615 }
2616 }
2617
2618 // Attempt to avoid multi-use ops if we don't need anything from them.
2620 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2621 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2622 break;
2623 }
2625 if (VT.isScalableVector())
2626 return false;
2627 [[fallthrough]];
2628 case ISD::ANY_EXTEND: {
2629 SDValue Src = Op.getOperand(0);
2630 EVT SrcVT = Src.getValueType();
2631 unsigned InBits = SrcVT.getScalarSizeInBits();
2632 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2633 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2634
2635 // If we only need the bottom element then we can just bitcast.
2636 // TODO: Handle ANY_EXTEND?
2637 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2638 VT.getSizeInBits() == SrcVT.getSizeInBits())
2639 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2640
2641 APInt InDemandedBits = DemandedBits.trunc(InBits);
2642 APInt InDemandedElts = DemandedElts.zext(InElts);
2643 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2644 Depth + 1))
2645 return true;
2646 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2647 Known = Known.anyext(BitWidth);
2648
2649 // Attempt to avoid multi-use ops if we don't need anything from them.
2651 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2652 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2653 break;
2654 }
2655 case ISD::TRUNCATE: {
2656 SDValue Src = Op.getOperand(0);
2657
2658 // Simplify the input, using demanded bit information, and compute the known
2659 // zero/one bits live out.
2660 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2661 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2662 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2663 Depth + 1)) {
2664 // Disable the nsw and nuw flags. We can no longer guarantee that we
2665 // won't wrap after simplification.
2666 Op->dropFlags(SDNodeFlags::NoWrap);
2667 return true;
2668 }
2669 Known = Known.trunc(BitWidth);
2670
2671 // Attempt to avoid multi-use ops if we don't need anything from them.
2673 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2674 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2675
2676 // If the input is only used by this truncate, see if we can shrink it based
2677 // on the known demanded bits.
2678 switch (Src.getOpcode()) {
2679 default:
2680 break;
2681 case ISD::SRL:
2682 // Shrink SRL by a constant if none of the high bits shifted in are
2683 // demanded.
2684 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2685 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2686 // undesirable.
2687 break;
2688
2689 if (Src.getNode()->hasOneUse()) {
2690 if (isTruncateFree(Src, VT) &&
2691 !isTruncateFree(Src.getValueType(), VT)) {
2692 // If truncate is only free at trunc(srl), do not turn it into
2693 // srl(trunc). The check is done by first check the truncate is free
2694 // at Src's opcode(srl), then check the truncate is not done by
2695 // referencing sub-register. In test, if both trunc(srl) and
2696 // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2697 // trunc(srl)'s trunc is free, trunc(srl) is better.
2698 break;
2699 }
2700
2701 std::optional<unsigned> ShAmtC =
2702 TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2703 if (!ShAmtC || *ShAmtC >= BitWidth)
2704 break;
2705 unsigned ShVal = *ShAmtC;
2706
2707 APInt HighBits =
2708 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2709 HighBits.lshrInPlace(ShVal);
2710 HighBits = HighBits.trunc(BitWidth);
2711 if (!(HighBits & DemandedBits)) {
2712 // None of the shifted in bits are needed. Add a truncate of the
2713 // shift input, then shift it.
2714 SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2715 SDValue NewTrunc =
2716 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2717 return TLO.CombineTo(
2718 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2719 }
2720 }
2721 break;
2722 }
2723
2724 break;
2725 }
2726 case ISD::AssertZext: {
2727 // AssertZext demands all of the high bits, plus any of the low bits
2728 // demanded by its users.
2729 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2731 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2732 TLO, Depth + 1))
2733 return true;
2734
2735 Known.Zero |= ~InMask;
2736 Known.One &= (~Known.Zero);
2737 break;
2738 }
2740 SDValue Src = Op.getOperand(0);
2741 SDValue Idx = Op.getOperand(1);
2742 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2743 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2744
2745 if (SrcEltCnt.isScalable())
2746 return false;
2747
2748 // Demand the bits from every vector element without a constant index.
2749 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2750 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2751 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2752 if (CIdx->getAPIntValue().ult(NumSrcElts))
2753 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2754
2755 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2756 // anything about the extended bits.
2757 APInt DemandedSrcBits = DemandedBits;
2758 if (BitWidth > EltBitWidth)
2759 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2760
2761 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2762 Depth + 1))
2763 return true;
2764
2765 // Attempt to avoid multi-use ops if we don't need anything from them.
2766 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2767 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2768 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2769 SDValue NewOp =
2770 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2771 return TLO.CombineTo(Op, NewOp);
2772 }
2773 }
2774
2775 Known = Known2;
2776 if (BitWidth > EltBitWidth)
2777 Known = Known.anyext(BitWidth);
2778 break;
2779 }
2780 case ISD::BITCAST: {
2781 if (VT.isScalableVector())
2782 return false;
2783 SDValue Src = Op.getOperand(0);
2784 EVT SrcVT = Src.getValueType();
2785 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2786
2787 // If this is an FP->Int bitcast and if the sign bit is the only
2788 // thing demanded, turn this into a FGETSIGN.
2789 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2790 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2791 SrcVT.isFloatingPoint()) {
2793 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2794 // place. We expect the SHL to be eliminated by other optimizations.
2795 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, VT, Src);
2796 unsigned ShVal = Op.getValueSizeInBits() - 1;
2797 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2798 return TLO.CombineTo(Op,
2799 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2800 }
2801 }
2802
2803 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2804 // Demand the elt/bit if any of the original elts/bits are demanded.
2805 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2806 unsigned Scale = BitWidth / NumSrcEltBits;
2807 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2808 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2809 for (unsigned i = 0; i != Scale; ++i) {
2810 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2811 unsigned BitOffset = EltOffset * NumSrcEltBits;
2812 DemandedSrcBits |= DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2813 }
2814 // Recursive calls below may turn not demanded elements into poison, so we
2815 // need to demand all smaller source elements that maps to a demanded
2816 // destination element.
2817 APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
2818
2819 APInt KnownSrcUndef, KnownSrcZero;
2820 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2821 KnownSrcZero, TLO, Depth + 1))
2822 return true;
2823
2824 KnownBits KnownSrcBits;
2825 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2826 KnownSrcBits, TLO, Depth + 1))
2827 return true;
2828 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2829 // TODO - bigendian once we have test coverage.
2830 unsigned Scale = NumSrcEltBits / BitWidth;
2831 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2832 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2833 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2834 for (unsigned i = 0; i != NumElts; ++i)
2835 if (DemandedElts[i]) {
2836 unsigned Offset = (i % Scale) * BitWidth;
2837 DemandedSrcBits.insertBits(DemandedBits, Offset);
2838 DemandedSrcElts.setBit(i / Scale);
2839 }
2840
2841 if (SrcVT.isVector()) {
2842 APInt KnownSrcUndef, KnownSrcZero;
2843 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2844 KnownSrcZero, TLO, Depth + 1))
2845 return true;
2846 }
2847
2848 KnownBits KnownSrcBits;
2849 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2850 KnownSrcBits, TLO, Depth + 1))
2851 return true;
2852
2853 // Attempt to avoid multi-use ops if we don't need anything from them.
2854 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2855 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2856 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2857 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2858 return TLO.CombineTo(Op, NewOp);
2859 }
2860 }
2861 }
2862
2863 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2864 // recursive call where Known may be useful to the caller.
2865 if (Depth > 0) {
2866 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2867 return false;
2868 }
2869 break;
2870 }
2871 case ISD::MUL:
2872 if (DemandedBits.isPowerOf2()) {
2873 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2874 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2875 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2876 unsigned CTZ = DemandedBits.countr_zero();
2877 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2878 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2879 SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2880 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2881 return TLO.CombineTo(Op, Shl);
2882 }
2883 }
2884 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2885 // X * X is odd iff X is odd.
2886 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2887 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2888 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2889 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2890 return TLO.CombineTo(Op, And1);
2891 }
2892 [[fallthrough]];
2893 case ISD::PTRADD:
2894 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
2895 break;
2896 // PTRADD behaves like ADD if pointers are represented as integers.
2897 [[fallthrough]];
2898 case ISD::ADD:
2899 case ISD::SUB: {
2900 // Add, Sub, and Mul don't demand any bits in positions beyond that
2901 // of the highest bit demanded of them.
2902 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2903 SDNodeFlags Flags = Op.getNode()->getFlags();
2904 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2905 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2906 KnownBits KnownOp0, KnownOp1;
2907 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2908 const KnownBits &KnownRHS) {
2909 if (Op.getOpcode() == ISD::MUL)
2910 Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2911 return Demanded;
2912 };
2913 if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2914 Depth + 1) ||
2915 SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2916 DemandedElts, KnownOp0, TLO, Depth + 1) ||
2917 // See if the operation should be performed at a smaller bit width.
2919 // Disable the nsw and nuw flags. We can no longer guarantee that we
2920 // won't wrap after simplification.
2921 Op->dropFlags(SDNodeFlags::NoWrap);
2922 return true;
2923 }
2924
2925 // neg x with only low bit demanded is simply x.
2926 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2927 isNullConstant(Op0))
2928 return TLO.CombineTo(Op, Op1);
2929
2930 // Attempt to avoid multi-use ops if we don't need anything from them.
2931 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2933 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2935 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2936 if (DemandedOp0 || DemandedOp1) {
2937 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2938 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2939 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
2940 Flags & ~SDNodeFlags::NoWrap);
2941 return TLO.CombineTo(Op, NewOp);
2942 }
2943 }
2944
2945 // If we have a constant operand, we may be able to turn it into -1 if we
2946 // do not demand the high bits. This can make the constant smaller to
2947 // encode, allow more general folding, or match specialized instruction
2948 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2949 // is probably not useful (and could be detrimental).
2951 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2952 if (C && !C->isAllOnes() && !C->isOne() &&
2953 (C->getAPIntValue() | HighMask).isAllOnes()) {
2954 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2955 // Disable the nsw and nuw flags. We can no longer guarantee that we
2956 // won't wrap after simplification.
2957 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1,
2958 Flags & ~SDNodeFlags::NoWrap);
2959 return TLO.CombineTo(Op, NewOp);
2960 }
2961
2962 // Match a multiply with a disguised negated-power-of-2 and convert to a
2963 // an equivalent shift-left amount.
2964 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2965 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2966 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2967 return 0;
2968
2969 // Don't touch opaque constants. Also, ignore zero and power-of-2
2970 // multiplies. Those will get folded later.
2971 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2972 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2973 !MulC->getAPIntValue().isPowerOf2()) {
2974 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2975 if (UnmaskedC.isNegatedPowerOf2())
2976 return (-UnmaskedC).logBase2();
2977 }
2978 return 0;
2979 };
2980
2981 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2982 unsigned ShlAmt) {
2983 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2984 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2985 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2986 return TLO.CombineTo(Op, Res);
2987 };
2988
2990 if (Op.getOpcode() == ISD::ADD) {
2991 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2992 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2993 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2994 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2995 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2996 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2997 }
2998 if (Op.getOpcode() == ISD::SUB) {
2999 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
3000 if (unsigned ShAmt = getShiftLeftAmt(Op1))
3001 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
3002 }
3003 }
3004
3005 if (Op.getOpcode() == ISD::MUL) {
3006 Known = KnownBits::mul(KnownOp0, KnownOp1);
3007 } else { // Op.getOpcode() is either ISD::ADD, ISD::PTRADD, or ISD::SUB.
3009 Op.getOpcode() != ISD::SUB, Flags.hasNoSignedWrap(),
3010 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
3011 }
3012 break;
3013 }
3014 case ISD::FABS: {
3015 SDValue Op0 = Op.getOperand(0);
3016 APInt SignMask = APInt::getSignMask(BitWidth);
3017
3018 if (!DemandedBits.intersects(SignMask))
3019 return TLO.CombineTo(Op, Op0);
3020
3021 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
3022 Depth + 1))
3023 return true;
3024
3025 if (Known.isNonNegative())
3026 return TLO.CombineTo(Op, Op0);
3027 if (Known.isNegative())
3028 return TLO.CombineTo(
3029 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT, Op0, Op->getFlags()));
3030
3031 Known.Zero |= SignMask;
3032 Known.One &= ~SignMask;
3033
3034 break;
3035 }
3036 case ISD::FCOPYSIGN: {
3037 SDValue Op0 = Op.getOperand(0);
3038 SDValue Op1 = Op.getOperand(1);
3039
3040 unsigned BitWidth0 = Op0.getScalarValueSizeInBits();
3041 unsigned BitWidth1 = Op1.getScalarValueSizeInBits();
3042 APInt SignMask0 = APInt::getSignMask(BitWidth0);
3043 APInt SignMask1 = APInt::getSignMask(BitWidth1);
3044
3045 if (!DemandedBits.intersects(SignMask0))
3046 return TLO.CombineTo(Op, Op0);
3047
3048 if (SimplifyDemandedBits(Op0, ~SignMask0 & DemandedBits, DemandedElts,
3049 Known, TLO, Depth + 1) ||
3050 SimplifyDemandedBits(Op1, SignMask1, DemandedElts, Known2, TLO,
3051 Depth + 1))
3052 return true;
3053
3054 if (Known2.isNonNegative())
3055 return TLO.CombineTo(
3056 Op, TLO.DAG.getNode(ISD::FABS, dl, VT, Op0, Op->getFlags()));
3057
3058 if (Known2.isNegative())
3059 return TLO.CombineTo(
3060 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT,
3061 TLO.DAG.getNode(ISD::FABS, SDLoc(Op0), VT, Op0)));
3062
3063 Known.Zero &= ~SignMask0;
3064 Known.One &= ~SignMask0;
3065 break;
3066 }
3067 case ISD::FNEG: {
3068 SDValue Op0 = Op.getOperand(0);
3069 APInt SignMask = APInt::getSignMask(BitWidth);
3070
3071 if (!DemandedBits.intersects(SignMask))
3072 return TLO.CombineTo(Op, Op0);
3073
3074 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
3075 Depth + 1))
3076 return true;
3077
3078 if (!Known.isSignUnknown()) {
3079 Known.Zero ^= SignMask;
3080 Known.One ^= SignMask;
3081 }
3082
3083 break;
3084 }
3085 default:
3086 // We also ask the target about intrinsics (which could be specific to it).
3087 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3088 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3089 // TODO: Probably okay to remove after audit; here to reduce change size
3090 // in initial enablement patch for scalable vectors
3091 if (Op.getValueType().isScalableVector())
3092 break;
3094 Known, TLO, Depth))
3095 return true;
3096 break;
3097 }
3098
3099 // Just use computeKnownBits to compute output bits.
3100 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
3101 break;
3102 }
3103
3104 // If we know the value of all of the demanded bits, return this as a
3105 // constant.
3107 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
3108 // Avoid folding to a constant if any OpaqueConstant is involved.
3109 if (llvm::any_of(Op->ops(), [](SDValue V) {
3110 auto *C = dyn_cast<ConstantSDNode>(V);
3111 return C && C->isOpaque();
3112 }))
3113 return false;
3114 if (VT.isInteger())
3115 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
3116 if (VT.isFloatingPoint())
3117 return TLO.CombineTo(
3118 Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
3119 dl, VT));
3120 }
3121
3122 // A multi use 'all demanded elts' simplify failed to find any knownbits.
3123 // Try again just for the original demanded elts.
3124 // Ensure we do this AFTER constant folding above.
3125 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3126 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
3127
3128 return false;
3129}
3130
3132 const APInt &DemandedElts,
3133 DAGCombinerInfo &DCI) const {
3134 SelectionDAG &DAG = DCI.DAG;
3135 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3136 !DCI.isBeforeLegalizeOps());
3137
3138 APInt KnownUndef, KnownZero;
3139 bool Simplified =
3140 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
3141 if (Simplified) {
3142 DCI.AddToWorklist(Op.getNode());
3143 DCI.CommitTargetLoweringOpt(TLO);
3144 }
3145
3146 return Simplified;
3147}
3148
3149/// Given a vector binary operation and known undefined elements for each input
3150/// operand, compute whether each element of the output is undefined.
3152 const APInt &UndefOp0,
3153 const APInt &UndefOp1) {
3154 EVT VT = BO.getValueType();
3156 "Vector binop only");
3157
3158 EVT EltVT = VT.getVectorElementType();
3159 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3160 assert(UndefOp0.getBitWidth() == NumElts &&
3161 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3162
3163 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3164 const APInt &UndefVals) {
3165 if (UndefVals[Index])
3166 return DAG.getUNDEF(EltVT);
3167
3168 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3169 // Try hard to make sure that the getNode() call is not creating temporary
3170 // nodes. Ignore opaque integers because they do not constant fold.
3171 SDValue Elt = BV->getOperand(Index);
3172 auto *C = dyn_cast<ConstantSDNode>(Elt);
3173 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3174 return Elt;
3175 }
3176
3177 return SDValue();
3178 };
3179
3180 APInt KnownUndef = APInt::getZero(NumElts);
3181 for (unsigned i = 0; i != NumElts; ++i) {
3182 // If both inputs for this element are either constant or undef and match
3183 // the element type, compute the constant/undef result for this element of
3184 // the vector.
3185 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3186 // not handle FP constants. The code within getNode() should be refactored
3187 // to avoid the danger of creating a bogus temporary node here.
3188 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3189 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3190 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3191 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3192 KnownUndef.setBit(i);
3193 }
3194 return KnownUndef;
3195}
3196
3198 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3199 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3200 bool AssumeSingleUse) const {
3201 EVT VT = Op.getValueType();
3202 unsigned Opcode = Op.getOpcode();
3203 APInt DemandedElts = OriginalDemandedElts;
3204 unsigned NumElts = DemandedElts.getBitWidth();
3205 assert(VT.isVector() && "Expected vector op");
3206
3207 KnownUndef = KnownZero = APInt::getZero(NumElts);
3208
3210 return false;
3211
3212 // TODO: For now we assume we know nothing about scalable vectors.
3213 if (VT.isScalableVector())
3214 return false;
3215
3216 assert(VT.getVectorNumElements() == NumElts &&
3217 "Mask size mismatches value type element count!");
3218
3219 // Undef operand.
3220 if (Op.isUndef()) {
3221 KnownUndef.setAllBits();
3222 return false;
3223 }
3224
3225 // If Op has other users, assume that all elements are needed.
3226 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3227 DemandedElts.setAllBits();
3228
3229 // Not demanding any elements from Op.
3230 if (DemandedElts == 0) {
3231 KnownUndef.setAllBits();
3232 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3233 }
3234
3235 // Limit search depth.
3237 return false;
3238
3239 SDLoc DL(Op);
3240 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3241 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3242
3243 // Helper for demanding the specified elements and all the bits of both binary
3244 // operands.
3245 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3246 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3247 TLO.DAG, Depth + 1);
3248 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3249 TLO.DAG, Depth + 1);
3250 if (NewOp0 || NewOp1) {
3251 SDValue NewOp =
3252 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3253 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3254 return TLO.CombineTo(Op, NewOp);
3255 }
3256 return false;
3257 };
3258
3259 switch (Opcode) {
3260 case ISD::SCALAR_TO_VECTOR: {
3261 if (!DemandedElts[0]) {
3262 KnownUndef.setAllBits();
3263 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3264 }
3265 KnownUndef.setHighBits(NumElts - 1);
3266 break;
3267 }
3268 case ISD::BITCAST: {
3269 SDValue Src = Op.getOperand(0);
3270 EVT SrcVT = Src.getValueType();
3271
3272 if (!SrcVT.isVector()) {
3273 // TODO - bigendian once we have test coverage.
3274 if (IsLE) {
3275 APInt DemandedSrcBits = APInt::getZero(SrcVT.getSizeInBits());
3276 unsigned EltSize = VT.getScalarSizeInBits();
3277 for (unsigned I = 0; I != NumElts; ++I) {
3278 if (DemandedElts[I]) {
3279 unsigned Offset = I * EltSize;
3280 DemandedSrcBits.setBits(Offset, Offset + EltSize);
3281 }
3282 }
3283 KnownBits Known;
3284 if (SimplifyDemandedBits(Src, DemandedSrcBits, Known, TLO, Depth + 1))
3285 return true;
3286 }
3287 break;
3288 }
3289
3290 // Fast handling of 'identity' bitcasts.
3291 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3292 if (NumSrcElts == NumElts)
3293 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3294 KnownZero, TLO, Depth + 1);
3295
3296 APInt SrcDemandedElts, SrcZero, SrcUndef;
3297
3298 // Bitcast from 'large element' src vector to 'small element' vector, we
3299 // must demand a source element if any DemandedElt maps to it.
3300 if ((NumElts % NumSrcElts) == 0) {
3301 unsigned Scale = NumElts / NumSrcElts;
3302 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3303 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3304 TLO, Depth + 1))
3305 return true;
3306
3307 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3308 // of the large element.
3309 // TODO - bigendian once we have test coverage.
3310 if (IsLE) {
3311 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3312 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3313 for (unsigned i = 0; i != NumElts; ++i)
3314 if (DemandedElts[i]) {
3315 unsigned Ofs = (i % Scale) * EltSizeInBits;
3316 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3317 }
3318
3319 KnownBits Known;
3320 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3321 TLO, Depth + 1))
3322 return true;
3323
3324 // The bitcast has split each wide element into a number of
3325 // narrow subelements. We have just computed the Known bits
3326 // for wide elements. See if element splitting results in
3327 // some subelements being zero. Only for demanded elements!
3328 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3329 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3330 .isAllOnes())
3331 continue;
3332 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3333 unsigned Elt = Scale * SrcElt + SubElt;
3334 if (DemandedElts[Elt])
3335 KnownZero.setBit(Elt);
3336 }
3337 }
3338 }
3339
3340 // If the src element is zero/undef then all the output elements will be -
3341 // only demanded elements are guaranteed to be correct.
3342 for (unsigned i = 0; i != NumSrcElts; ++i) {
3343 if (SrcDemandedElts[i]) {
3344 if (SrcZero[i])
3345 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3346 if (SrcUndef[i])
3347 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3348 }
3349 }
3350 }
3351
3352 // Bitcast from 'small element' src vector to 'large element' vector, we
3353 // demand all smaller source elements covered by the larger demanded element
3354 // of this vector.
3355 if ((NumSrcElts % NumElts) == 0) {
3356 unsigned Scale = NumSrcElts / NumElts;
3357 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3358 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3359 TLO, Depth + 1))
3360 return true;
3361
3362 // If all the src elements covering an output element are zero/undef, then
3363 // the output element will be as well, assuming it was demanded.
3364 for (unsigned i = 0; i != NumElts; ++i) {
3365 if (DemandedElts[i]) {
3366 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3367 KnownZero.setBit(i);
3368 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3369 KnownUndef.setBit(i);
3370 }
3371 }
3372 }
3373 break;
3374 }
3375 case ISD::FREEZE: {
3376 SDValue N0 = Op.getOperand(0);
3377 if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
3378 /*PoisonOnly=*/false,
3379 Depth + 1))
3380 return TLO.CombineTo(Op, N0);
3381
3382 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3383 // freeze(op(x, ...)) -> op(freeze(x), ...).
3384 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3385 return TLO.CombineTo(
3387 TLO.DAG.getFreeze(N0.getOperand(0))));
3388 break;
3389 }
3390 case ISD::BUILD_VECTOR: {
3391 // Check all elements and simplify any unused elements with UNDEF.
3392 if (!DemandedElts.isAllOnes()) {
3393 // Don't simplify BROADCASTS.
3394 if (llvm::any_of(Op->op_values(),
3395 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3397 bool Updated = false;
3398 for (unsigned i = 0; i != NumElts; ++i) {
3399 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3400 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3401 KnownUndef.setBit(i);
3402 Updated = true;
3403 }
3404 }
3405 if (Updated)
3406 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3407 }
3408 }
3409 for (unsigned i = 0; i != NumElts; ++i) {
3410 SDValue SrcOp = Op.getOperand(i);
3411 if (SrcOp.isUndef()) {
3412 KnownUndef.setBit(i);
3413 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3415 KnownZero.setBit(i);
3416 }
3417 }
3418 break;
3419 }
3420 case ISD::CONCAT_VECTORS: {
3421 EVT SubVT = Op.getOperand(0).getValueType();
3422 unsigned NumSubVecs = Op.getNumOperands();
3423 unsigned NumSubElts = SubVT.getVectorNumElements();
3424 for (unsigned i = 0; i != NumSubVecs; ++i) {
3425 SDValue SubOp = Op.getOperand(i);
3426 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3427 APInt SubUndef, SubZero;
3428 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3429 Depth + 1))
3430 return true;
3431 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3432 KnownZero.insertBits(SubZero, i * NumSubElts);
3433 }
3434
3435 // Attempt to avoid multi-use ops if we don't need anything from them.
3436 if (!DemandedElts.isAllOnes()) {
3437 bool FoundNewSub = false;
3438 SmallVector<SDValue, 2> DemandedSubOps;
3439 for (unsigned i = 0; i != NumSubVecs; ++i) {
3440 SDValue SubOp = Op.getOperand(i);
3441 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3443 SubOp, SubElts, TLO.DAG, Depth + 1);
3444 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3445 FoundNewSub = NewSubOp ? true : FoundNewSub;
3446 }
3447 if (FoundNewSub) {
3448 SDValue NewOp =
3449 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3450 return TLO.CombineTo(Op, NewOp);
3451 }
3452 }
3453 break;
3454 }
3455 case ISD::INSERT_SUBVECTOR: {
3456 // Demand any elements from the subvector and the remainder from the src it
3457 // is inserted into.
3458 SDValue Src = Op.getOperand(0);
3459 SDValue Sub = Op.getOperand(1);
3460 uint64_t Idx = Op.getConstantOperandVal(2);
3461 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3462 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3463 APInt DemandedSrcElts = DemandedElts;
3464 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
3465
3466 // If none of the sub operand elements are demanded, bypass the insert.
3467 if (!DemandedSubElts)
3468 return TLO.CombineTo(Op, Src);
3469
3470 APInt SubUndef, SubZero;
3471 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3472 Depth + 1))
3473 return true;
3474
3475 // If none of the src operand elements are demanded, replace it with undef.
3476 if (!DemandedSrcElts && !Src.isUndef())
3477 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3478 TLO.DAG.getUNDEF(VT), Sub,
3479 Op.getOperand(2)));
3480
3481 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3482 TLO, Depth + 1))
3483 return true;
3484 KnownUndef.insertBits(SubUndef, Idx);
3485 KnownZero.insertBits(SubZero, Idx);
3486
3487 // Attempt to avoid multi-use ops if we don't need anything from them.
3488 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3490 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3492 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3493 if (NewSrc || NewSub) {
3494 NewSrc = NewSrc ? NewSrc : Src;
3495 NewSub = NewSub ? NewSub : Sub;
3496 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3497 NewSub, Op.getOperand(2));
3498 return TLO.CombineTo(Op, NewOp);
3499 }
3500 }
3501 break;
3502 }
3504 // Offset the demanded elts by the subvector index.
3505 SDValue Src = Op.getOperand(0);
3506 if (Src.getValueType().isScalableVector())
3507 break;
3508 uint64_t Idx = Op.getConstantOperandVal(1);
3509 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3510 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3511
3512 APInt SrcUndef, SrcZero;
3513 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3514 Depth + 1))
3515 return true;
3516 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3517 KnownZero = SrcZero.extractBits(NumElts, Idx);
3518
3519 // Attempt to avoid multi-use ops if we don't need anything from them.
3520 if (!DemandedElts.isAllOnes()) {
3522 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3523 if (NewSrc) {
3524 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3525 Op.getOperand(1));
3526 return TLO.CombineTo(Op, NewOp);
3527 }
3528 }
3529 break;
3530 }
3532 SDValue Vec = Op.getOperand(0);
3533 SDValue Scl = Op.getOperand(1);
3534 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3535
3536 // For a legal, constant insertion index, if we don't need this insertion
3537 // then strip it, else remove it from the demanded elts.
3538 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3539 unsigned Idx = CIdx->getZExtValue();
3540 if (!DemandedElts[Idx])
3541 return TLO.CombineTo(Op, Vec);
3542
3543 APInt DemandedVecElts(DemandedElts);
3544 DemandedVecElts.clearBit(Idx);
3545 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3546 KnownZero, TLO, Depth + 1))
3547 return true;
3548
3549 KnownUndef.setBitVal(Idx, Scl.isUndef());
3550
3551 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3552 break;
3553 }
3554
3555 APInt VecUndef, VecZero;
3556 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3557 Depth + 1))
3558 return true;
3559 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3560 break;
3561 }
3562 case ISD::VSELECT: {
3563 SDValue Sel = Op.getOperand(0);
3564 SDValue LHS = Op.getOperand(1);
3565 SDValue RHS = Op.getOperand(2);
3566
3567 // Try to transform the select condition based on the current demanded
3568 // elements.
3569 APInt UndefSel, ZeroSel;
3570 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3571 Depth + 1))
3572 return true;
3573
3574 // See if we can simplify either vselect operand.
3575 APInt DemandedLHS(DemandedElts);
3576 APInt DemandedRHS(DemandedElts);
3577 APInt UndefLHS, ZeroLHS;
3578 APInt UndefRHS, ZeroRHS;
3579 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3580 Depth + 1))
3581 return true;
3582 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3583 Depth + 1))
3584 return true;
3585
3586 KnownUndef = UndefLHS & UndefRHS;
3587 KnownZero = ZeroLHS & ZeroRHS;
3588
3589 // If we know that the selected element is always zero, we don't need the
3590 // select value element.
3591 APInt DemandedSel = DemandedElts & ~KnownZero;
3592 if (DemandedSel != DemandedElts)
3593 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3594 Depth + 1))
3595 return true;
3596
3597 break;
3598 }
3599 case ISD::VECTOR_SHUFFLE: {
3600 SDValue LHS = Op.getOperand(0);
3601 SDValue RHS = Op.getOperand(1);
3602 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3603
3604 // Collect demanded elements from shuffle operands..
3605 APInt DemandedLHS(NumElts, 0);
3606 APInt DemandedRHS(NumElts, 0);
3607 for (unsigned i = 0; i != NumElts; ++i) {
3608 int M = ShuffleMask[i];
3609 if (M < 0 || !DemandedElts[i])
3610 continue;
3611 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3612 if (M < (int)NumElts)
3613 DemandedLHS.setBit(M);
3614 else
3615 DemandedRHS.setBit(M - NumElts);
3616 }
3617
3618 // If either side isn't demanded, replace it by UNDEF. We handle this
3619 // explicitly here to also simplify in case of multiple uses (on the
3620 // contrary to the SimplifyDemandedVectorElts calls below).
3621 bool FoldLHS = !DemandedLHS && !LHS.isUndef();
3622 bool FoldRHS = !DemandedRHS && !RHS.isUndef();
3623 if (FoldLHS || FoldRHS) {
3624 LHS = FoldLHS ? TLO.DAG.getUNDEF(LHS.getValueType()) : LHS;
3625 RHS = FoldRHS ? TLO.DAG.getUNDEF(RHS.getValueType()) : RHS;
3626 SDValue NewOp =
3627 TLO.DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, ShuffleMask);
3628 return TLO.CombineTo(Op, NewOp);
3629 }
3630
3631 // See if we can simplify either shuffle operand.
3632 APInt UndefLHS, ZeroLHS;
3633 APInt UndefRHS, ZeroRHS;
3634 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3635 Depth + 1))
3636 return true;
3637 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3638 Depth + 1))
3639 return true;
3640
3641 // Simplify mask using undef elements from LHS/RHS.
3642 bool Updated = false;
3643 bool IdentityLHS = true, IdentityRHS = true;
3644 SmallVector<int, 32> NewMask(ShuffleMask);
3645 for (unsigned i = 0; i != NumElts; ++i) {
3646 int &M = NewMask[i];
3647 if (M < 0)
3648 continue;
3649 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3650 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3651 Updated = true;
3652 M = -1;
3653 }
3654 IdentityLHS &= (M < 0) || (M == (int)i);
3655 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3656 }
3657
3658 // Update legal shuffle masks based on demanded elements if it won't reduce
3659 // to Identity which can cause premature removal of the shuffle mask.
3660 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3661 SDValue LegalShuffle =
3662 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3663 if (LegalShuffle)
3664 return TLO.CombineTo(Op, LegalShuffle);
3665 }
3666
3667 // Propagate undef/zero elements from LHS/RHS.
3668 for (unsigned i = 0; i != NumElts; ++i) {
3669 int M = ShuffleMask[i];
3670 if (M < 0) {
3671 KnownUndef.setBit(i);
3672 } else if (M < (int)NumElts) {
3673 if (UndefLHS[M])
3674 KnownUndef.setBit(i);
3675 if (ZeroLHS[M])
3676 KnownZero.setBit(i);
3677 } else {
3678 if (UndefRHS[M - NumElts])
3679 KnownUndef.setBit(i);
3680 if (ZeroRHS[M - NumElts])
3681 KnownZero.setBit(i);
3682 }
3683 }
3684 break;
3685 }
3689 APInt SrcUndef, SrcZero;
3690 SDValue Src = Op.getOperand(0);
3691 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3692 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3693 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3694 Depth + 1))
3695 return true;
3696 KnownZero = SrcZero.zextOrTrunc(NumElts);
3697 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3698
3699 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3700 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3701 DemandedSrcElts == 1) {
3702 // aext - if we just need the bottom element then we can bitcast.
3703 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3704 }
3705
3706 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3707 // zext(undef) upper bits are guaranteed to be zero.
3708 if (DemandedElts.isSubsetOf(KnownUndef))
3709 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3710 KnownUndef.clearAllBits();
3711
3712 // zext - if we just need the bottom element then we can mask:
3713 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3714 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3715 Op->isOnlyUserOf(Src.getNode()) &&
3716 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3717 SDLoc DL(Op);
3718 EVT SrcVT = Src.getValueType();
3719 EVT SrcSVT = SrcVT.getScalarType();
3720
3721 // If we're after type legalization and SrcSVT is not legal, use the
3722 // promoted type for creating constants to avoid creating nodes with
3723 // illegal types.
3725 SrcSVT = getLegalTypeToTransformTo(*TLO.DAG.getContext(), SrcSVT);
3726
3727 SmallVector<SDValue> MaskElts;
3728 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3729 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3730 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3731 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3732 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3733 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3734 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3735 }
3736 }
3737 }
3738 break;
3739 }
3740
3741 // TODO: There are more binop opcodes that could be handled here - MIN,
3742 // MAX, saturated math, etc.
3743 case ISD::ADD: {
3744 SDValue Op0 = Op.getOperand(0);
3745 SDValue Op1 = Op.getOperand(1);
3746 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3747 APInt UndefLHS, ZeroLHS;
3748 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3749 Depth + 1, /*AssumeSingleUse*/ true))
3750 return true;
3751 }
3752 [[fallthrough]];
3753 }
3754 case ISD::AVGCEILS:
3755 case ISD::AVGCEILU:
3756 case ISD::AVGFLOORS:
3757 case ISD::AVGFLOORU:
3758 case ISD::OR:
3759 case ISD::XOR:
3760 case ISD::SUB:
3761 case ISD::FADD:
3762 case ISD::FSUB:
3763 case ISD::FMUL:
3764 case ISD::FDIV:
3765 case ISD::FREM: {
3766 SDValue Op0 = Op.getOperand(0);
3767 SDValue Op1 = Op.getOperand(1);
3768
3769 APInt UndefRHS, ZeroRHS;
3770 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3771 Depth + 1))
3772 return true;
3773 APInt UndefLHS, ZeroLHS;
3774 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3775 Depth + 1))
3776 return true;
3777
3778 KnownZero = ZeroLHS & ZeroRHS;
3779 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3780
3781 // Attempt to avoid multi-use ops if we don't need anything from them.
3782 // TODO - use KnownUndef to relax the demandedelts?
3783 if (!DemandedElts.isAllOnes())
3784 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3785 return true;
3786 break;
3787 }
3788 case ISD::SHL:
3789 case ISD::SRL:
3790 case ISD::SRA:
3791 case ISD::ROTL:
3792 case ISD::ROTR: {
3793 SDValue Op0 = Op.getOperand(0);
3794 SDValue Op1 = Op.getOperand(1);
3795
3796 APInt UndefRHS, ZeroRHS;
3797 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3798 Depth + 1))
3799 return true;
3800 APInt UndefLHS, ZeroLHS;
3801 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3802 Depth + 1))
3803 return true;
3804
3805 KnownZero = ZeroLHS;
3806 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3807
3808 // Attempt to avoid multi-use ops if we don't need anything from them.
3809 // TODO - use KnownUndef to relax the demandedelts?
3810 if (!DemandedElts.isAllOnes())
3811 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3812 return true;
3813 break;
3814 }
3815 case ISD::MUL:
3816 case ISD::MULHU:
3817 case ISD::MULHS:
3818 case ISD::AND: {
3819 SDValue Op0 = Op.getOperand(0);
3820 SDValue Op1 = Op.getOperand(1);
3821
3822 APInt SrcUndef, SrcZero;
3823 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3824 Depth + 1))
3825 return true;
3826 // FIXME: If we know that a demanded element was zero in Op1 we don't need
3827 // to demand it in Op0 - its guaranteed to be zero. There is however a
3828 // restriction, as we must not make any of the originally demanded elements
3829 // more poisonous. We could reduce amount of elements demanded, but then we
3830 // also need a to inform SimplifyDemandedVectorElts that some elements must
3831 // not be made more poisonous.
3832 if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero,
3833 TLO, Depth + 1))
3834 return true;
3835
3836 KnownUndef &= DemandedElts;
3837 KnownZero &= DemandedElts;
3838
3839 // If every element pair has a zero/undef/poison then just fold to zero.
3840 // fold (and x, undef/poison) -> 0 / (and x, 0) -> 0
3841 // fold (mul x, undef/poison) -> 0 / (mul x, 0) -> 0
3842 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3843 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3844
3845 // If either side has a zero element, then the result element is zero, even
3846 // if the other is an UNDEF.
3847 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3848 // and then handle 'and' nodes with the rest of the binop opcodes.
3849 KnownZero |= SrcZero;
3850 KnownUndef &= SrcUndef;
3851 KnownUndef &= ~KnownZero;
3852
3853 // Attempt to avoid multi-use ops if we don't need anything from them.
3854 if (!DemandedElts.isAllOnes())
3855 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3856 return true;
3857 break;
3858 }
3859 case ISD::TRUNCATE:
3860 case ISD::SIGN_EXTEND:
3861 case ISD::ZERO_EXTEND:
3862 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3863 KnownZero, TLO, Depth + 1))
3864 return true;
3865
3866 if (!DemandedElts.isAllOnes())
3868 Op.getOperand(0), DemandedElts, TLO.DAG, Depth + 1))
3869 return TLO.CombineTo(Op, TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp));
3870
3871 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3872 // zext(undef) upper bits are guaranteed to be zero.
3873 if (DemandedElts.isSubsetOf(KnownUndef))
3874 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3875 KnownUndef.clearAllBits();
3876 }
3877 break;
3878 case ISD::SINT_TO_FP:
3879 case ISD::UINT_TO_FP:
3880 case ISD::FP_TO_SINT:
3881 case ISD::FP_TO_UINT:
3882 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3883 KnownZero, TLO, Depth + 1))
3884 return true;
3885 // Don't fall through to generic undef -> undef handling.
3886 return false;
3887 default: {
3888 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3889 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3890 KnownZero, TLO, Depth))
3891 return true;
3892 } else {
3893 KnownBits Known;
3894 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3895 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3896 TLO, Depth, AssumeSingleUse))
3897 return true;
3898 }
3899 break;
3900 }
3901 }
3902 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3903
3904 // Constant fold all undef cases.
3905 // TODO: Handle zero cases as well.
3906 if (DemandedElts.isSubsetOf(KnownUndef))
3907 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3908
3909 return false;
3910}
3911
3912/// Determine which of the bits specified in Mask are known to be either zero or
3913/// one and return them in the Known.
3915 KnownBits &Known,
3916 const APInt &DemandedElts,
3917 const SelectionDAG &DAG,
3918 unsigned Depth) const {
3919 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3920 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3921 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3922 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3923 "Should use MaskedValueIsZero if you don't know whether Op"
3924 " is a target node!");
3925 Known.resetAll();
3926}
3927
3930 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3931 unsigned Depth) const {
3932 Known.resetAll();
3933}
3934
3937 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3938 unsigned Depth) const {
3939 Known.resetAll();
3940}
3941
3943 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3944 // The low bits are known zero if the pointer is aligned.
3945 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3946}
3947
3953
3954/// This method can be implemented by targets that want to expose additional
3955/// information about sign bits to the DAG Combiner.
3957 const APInt &,
3958 const SelectionDAG &,
3959 unsigned Depth) const {
3960 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3961 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3962 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3963 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3964 "Should use ComputeNumSignBits if you don't know whether Op"
3965 " is a target node!");
3966 return 1;
3967}
3968
3970 GISelValueTracking &Analysis, Register R, const APInt &DemandedElts,
3971 const MachineRegisterInfo &MRI, unsigned Depth) const {
3972 return 1;
3973}
3974
3976 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3977 TargetLoweringOpt &TLO, unsigned Depth) const {
3978 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3979 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3980 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3981 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3982 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3983 " is a target node!");
3984 return false;
3985}
3986
3988 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3989 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3990 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3991 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3992 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3993 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3994 "Should use SimplifyDemandedBits if you don't know whether Op"
3995 " is a target node!");
3996 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3997 return false;
3998}
3999
4001 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
4002 SelectionDAG &DAG, unsigned Depth) const {
4003 assert(
4004 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4005 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4006 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4007 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4008 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
4009 " is a target node!");
4010 return SDValue();
4011}
4012
4013SDValue
4016 SelectionDAG &DAG) const {
4017 bool LegalMask = isShuffleMaskLegal(Mask, VT);
4018 if (!LegalMask) {
4019 std::swap(N0, N1);
4021 LegalMask = isShuffleMaskLegal(Mask, VT);
4022 }
4023
4024 if (!LegalMask)
4025 return SDValue();
4026
4027 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
4028}
4029
4031 return nullptr;
4032}
4033
4035 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4036 bool PoisonOnly, unsigned Depth) const {
4037 assert(
4038 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4039 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4040 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4041 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4042 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
4043 " is a target node!");
4044
4045 // If Op can't create undef/poison and none of its operands are undef/poison
4046 // then Op is never undef/poison.
4047 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
4048 /*ConsiderFlags*/ true, Depth) &&
4049 all_of(Op->ops(), [&](SDValue V) {
4050 return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
4051 Depth + 1);
4052 });
4053}
4054
4056 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4057 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
4058 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4059 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4060 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4061 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4062 "Should use canCreateUndefOrPoison if you don't know whether Op"
4063 " is a target node!");
4064 // Be conservative and return true.
4065 return true;
4066}
4067
4069 KnownFPClass &Known,
4070 const APInt &DemandedElts,
4071 const SelectionDAG &DAG,
4072 unsigned Depth) const {
4073 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4074 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4075 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4076 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4077 "Should use computeKnownFPClass if you don't know whether Op"
4078 " is a target node!");
4079}
4080
4082 const APInt &DemandedElts,
4083 const SelectionDAG &DAG,
4084 bool SNaN,
4085 unsigned Depth) const {
4086 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4087 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4088 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4089 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4090 "Should use isKnownNeverNaN if you don't know whether Op"
4091 " is a target node!");
4092 return false;
4093}
4094
4096 const APInt &DemandedElts,
4097 APInt &UndefElts,
4098 const SelectionDAG &DAG,
4099 unsigned Depth) const {
4100 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4101 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4102 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4103 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4104 "Should use isSplatValue if you don't know whether Op"
4105 " is a target node!");
4106 return false;
4107}
4108
4109// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
4110// work with truncating build vectors and vectors with elements of less than
4111// 8 bits.
4113 if (!N)
4114 return false;
4115
4116 unsigned EltWidth;
4117 APInt CVal;
4118 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
4119 /*AllowTruncation=*/true)) {
4120 CVal = CN->getAPIntValue();
4121 EltWidth = N.getValueType().getScalarSizeInBits();
4122 } else
4123 return false;
4124
4125 // If this is a truncating splat, truncate the splat value.
4126 // Otherwise, we may fail to match the expected values below.
4127 if (EltWidth < CVal.getBitWidth())
4128 CVal = CVal.trunc(EltWidth);
4129
4130 switch (getBooleanContents(N.getValueType())) {
4132 return CVal[0];
4134 return CVal.isOne();
4136 return CVal.isAllOnes();
4137 }
4138
4139 llvm_unreachable("Invalid boolean contents");
4140}
4141
4143 if (!N)
4144 return false;
4145
4147 if (!CN) {
4149 if (!BV)
4150 return false;
4151
4152 // Only interested in constant splats, we don't care about undef
4153 // elements in identifying boolean constants and getConstantSplatNode
4154 // returns NULL if all ops are undef;
4155 CN = BV->getConstantSplatNode();
4156 if (!CN)
4157 return false;
4158 }
4159
4160 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
4161 return !CN->getAPIntValue()[0];
4162
4163 return CN->isZero();
4164}
4165
4167 bool SExt) const {
4168 if (VT == MVT::i1)
4169 return N->isOne();
4170
4172 switch (Cnt) {
4174 // An extended value of 1 is always true, unless its original type is i1,
4175 // in which case it will be sign extended to -1.
4176 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
4179 return N->isAllOnes() && SExt;
4180 }
4181 llvm_unreachable("Unexpected enumeration.");
4182}
4183
4184/// This helper function of SimplifySetCC tries to optimize the comparison when
4185/// either operand of the SetCC node is a bitwise-and instruction.
4186SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4187 ISD::CondCode Cond, const SDLoc &DL,
4188 DAGCombinerInfo &DCI) const {
4189 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4190 std::swap(N0, N1);
4191
4192 SelectionDAG &DAG = DCI.DAG;
4193 EVT OpVT = N0.getValueType();
4194 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
4195 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4196 return SDValue();
4197
4198 // (X & Y) != 0 --> zextOrTrunc(X & Y)
4199 // iff everything but LSB is known zero:
4200 if (Cond == ISD::SETNE && isNullConstant(N1) &&
4203 unsigned NumEltBits = OpVT.getScalarSizeInBits();
4204 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
4205 if (DAG.MaskedValueIsZero(N0, UpperBits))
4206 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
4207 }
4208
4209 // Try to eliminate a power-of-2 mask constant by converting to a signbit
4210 // test in a narrow type that we can truncate to with no cost. Examples:
4211 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4212 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4213 // TODO: This conservatively checks for type legality on the source and
4214 // destination types. That may inhibit optimizations, but it also
4215 // allows setcc->shift transforms that may be more beneficial.
4216 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4217 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4218 isTypeLegal(OpVT) && N0.hasOneUse()) {
4219 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4220 AndC->getAPIntValue().getActiveBits());
4221 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4222 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4223 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4224 return DAG.getSetCC(DL, VT, Trunc, Zero,
4226 }
4227 }
4228
4229 // Match these patterns in any of their permutations:
4230 // (X & Y) == Y
4231 // (X & Y) != Y
4232 SDValue X, Y;
4233 if (N0.getOperand(0) == N1) {
4234 X = N0.getOperand(1);
4235 Y = N0.getOperand(0);
4236 } else if (N0.getOperand(1) == N1) {
4237 X = N0.getOperand(0);
4238 Y = N0.getOperand(1);
4239 } else {
4240 return SDValue();
4241 }
4242
4243 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4244 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4245 // its liable to create and infinite loop.
4246 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4247 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4249 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4250 // Note that where Y is variable and is known to have at most one bit set
4251 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4252 // equivalent when Y == 0.
4253 assert(OpVT.isInteger());
4255 if (DCI.isBeforeLegalizeOps() ||
4257 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4258 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4259 // If the target supports an 'and-not' or 'and-complement' logic operation,
4260 // try to use that to make a comparison operation more efficient.
4261 // But don't do this transform if the mask is a single bit because there are
4262 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4263 // 'rlwinm' on PPC).
4264
4265 // Bail out if the compare operand that we want to turn into a zero is
4266 // already a zero (otherwise, infinite loop).
4267 if (isNullConstant(Y))
4268 return SDValue();
4269
4270 // Transform this into: ~X & Y == 0.
4271 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4272 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4273 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4274 }
4275
4276 return SDValue();
4277}
4278
4279/// This helper function of SimplifySetCC tries to optimize the comparison when
4280/// either operand of the SetCC node is a bitwise-or instruction.
4281/// For now, this just transforms (X | Y) ==/!= Y into X & ~Y ==/!= 0.
4282SDValue TargetLowering::foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1,
4283 ISD::CondCode Cond, const SDLoc &DL,
4284 DAGCombinerInfo &DCI) const {
4285 if (N1.getOpcode() == ISD::OR && N0.getOpcode() != ISD::OR)
4286 std::swap(N0, N1);
4287
4288 SelectionDAG &DAG = DCI.DAG;
4289 EVT OpVT = N0.getValueType();
4290 if (!N0.hasOneUse() || !OpVT.isInteger() ||
4291 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4292 return SDValue();
4293
4294 // (X | Y) == Y
4295 // (X | Y) != Y
4296 SDValue X;
4297 if (sd_match(N0, m_Or(m_Value(X), m_Specific(N1))) && hasAndNotCompare(X)) {
4298 // If the target supports an 'and-not' or 'and-complement' logic operation,
4299 // try to use that to make a comparison operation more efficient.
4300
4301 // Bail out if the compare operand that we want to turn into a zero is
4302 // already a zero (otherwise, infinite loop).
4303 if (isNullConstant(N1))
4304 return SDValue();
4305
4306 // Transform this into: X & ~Y ==/!= 0.
4307 SDValue NotY = DAG.getNOT(SDLoc(N1), N1, OpVT);
4308 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, X, NotY);
4309 return DAG.getSetCC(DL, VT, NewAnd, DAG.getConstant(0, DL, OpVT), Cond);
4310 }
4311
4312 return SDValue();
4313}
4314
4315/// There are multiple IR patterns that could be checking whether certain
4316/// truncation of a signed number would be lossy or not. The pattern which is
4317/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4318/// We are looking for the following pattern: (KeptBits is a constant)
4319/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4320/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4321/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4322/// We will unfold it into the natural trunc+sext pattern:
4323/// ((%x << C) a>> C) dstcond %x
4324/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4325SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4326 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4327 const SDLoc &DL) const {
4328 // We must be comparing with a constant.
4329 ConstantSDNode *C1;
4330 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4331 return SDValue();
4332
4333 // N0 should be: add %x, (1 << (KeptBits-1))
4334 if (N0->getOpcode() != ISD::ADD)
4335 return SDValue();
4336
4337 // And we must be 'add'ing a constant.
4338 ConstantSDNode *C01;
4339 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4340 return SDValue();
4341
4342 SDValue X = N0->getOperand(0);
4343 EVT XVT = X.getValueType();
4344
4345 // Validate constants ...
4346
4347 APInt I1 = C1->getAPIntValue();
4348
4349 ISD::CondCode NewCond;
4350 if (Cond == ISD::CondCode::SETULT) {
4351 NewCond = ISD::CondCode::SETEQ;
4352 } else if (Cond == ISD::CondCode::SETULE) {
4353 NewCond = ISD::CondCode::SETEQ;
4354 // But need to 'canonicalize' the constant.
4355 I1 += 1;
4356 } else if (Cond == ISD::CondCode::SETUGT) {
4357 NewCond = ISD::CondCode::SETNE;
4358 // But need to 'canonicalize' the constant.
4359 I1 += 1;
4360 } else if (Cond == ISD::CondCode::SETUGE) {
4361 NewCond = ISD::CondCode::SETNE;
4362 } else
4363 return SDValue();
4364
4365 APInt I01 = C01->getAPIntValue();
4366
4367 auto checkConstants = [&I1, &I01]() -> bool {
4368 // Both of them must be power-of-two, and the constant from setcc is bigger.
4369 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4370 };
4371
4372 if (checkConstants()) {
4373 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4374 } else {
4375 // What if we invert constants? (and the target predicate)
4376 I1.negate();
4377 I01.negate();
4378 assert(XVT.isInteger());
4379 NewCond = getSetCCInverse(NewCond, XVT);
4380 if (!checkConstants())
4381 return SDValue();
4382 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4383 }
4384
4385 // They are power-of-two, so which bit is set?
4386 const unsigned KeptBits = I1.logBase2();
4387 const unsigned KeptBitsMinusOne = I01.logBase2();
4388
4389 // Magic!
4390 if (KeptBits != (KeptBitsMinusOne + 1))
4391 return SDValue();
4392 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4393
4394 // We don't want to do this in every single case.
4395 SelectionDAG &DAG = DCI.DAG;
4396 if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4397 return SDValue();
4398
4399 // Unfold into: sext_inreg(%x) cond %x
4400 // Where 'cond' will be either 'eq' or 'ne'.
4401 SDValue SExtInReg = DAG.getNode(
4403 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4404 return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4405}
4406
4407// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4408SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4409 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4410 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4412 "Should be a comparison with 0.");
4413 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4414 "Valid only for [in]equality comparisons.");
4415
4416 unsigned NewShiftOpcode;
4417 SDValue X, C, Y;
4418
4419 SelectionDAG &DAG = DCI.DAG;
4420
4421 // Look for '(C l>>/<< Y)'.
4422 auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4423 // The shift should be one-use.
4424 if (!V.hasOneUse())
4425 return false;
4426 unsigned OldShiftOpcode = V.getOpcode();
4427 switch (OldShiftOpcode) {
4428 case ISD::SHL:
4429 NewShiftOpcode = ISD::SRL;
4430 break;
4431 case ISD::SRL:
4432 NewShiftOpcode = ISD::SHL;
4433 break;
4434 default:
4435 return false; // must be a logical shift.
4436 }
4437 // We should be shifting a constant.
4438 // FIXME: best to use isConstantOrConstantVector().
4439 C = V.getOperand(0);
4440 ConstantSDNode *CC =
4441 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4442 if (!CC)
4443 return false;
4444 Y = V.getOperand(1);
4445
4446 ConstantSDNode *XC =
4447 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4449 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4450 };
4451
4452 // LHS of comparison should be an one-use 'and'.
4453 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4454 return SDValue();
4455
4456 X = N0.getOperand(0);
4457 SDValue Mask = N0.getOperand(1);
4458
4459 // 'and' is commutative!
4460 if (!Match(Mask)) {
4461 std::swap(X, Mask);
4462 if (!Match(Mask))
4463 return SDValue();
4464 }
4465
4466 EVT VT = X.getValueType();
4467
4468 // Produce:
4469 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4470 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4471 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4472 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4473 return T2;
4474}
4475
4476/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4477/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4478/// handle the commuted versions of these patterns.
4479SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4480 ISD::CondCode Cond, const SDLoc &DL,
4481 DAGCombinerInfo &DCI) const {
4482 unsigned BOpcode = N0.getOpcode();
4483 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4484 "Unexpected binop");
4485 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4486
4487 // (X + Y) == X --> Y == 0
4488 // (X - Y) == X --> Y == 0
4489 // (X ^ Y) == X --> Y == 0
4490 SelectionDAG &DAG = DCI.DAG;
4491 EVT OpVT = N0.getValueType();
4492 SDValue X = N0.getOperand(0);
4493 SDValue Y = N0.getOperand(1);
4494 if (X == N1)
4495 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4496
4497 if (Y != N1)
4498 return SDValue();
4499
4500 // (X + Y) == Y --> X == 0
4501 // (X ^ Y) == Y --> X == 0
4502 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4503 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4504
4505 // The shift would not be valid if the operands are boolean (i1).
4506 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4507 return SDValue();
4508
4509 // (X - Y) == Y --> X == Y << 1
4510 SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4511 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4512 if (!DCI.isCalledByLegalizer())
4513 DCI.AddToWorklist(YShl1.getNode());
4514 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4515}
4516
4518 SDValue N0, const APInt &C1,
4519 ISD::CondCode Cond, const SDLoc &dl,
4520 SelectionDAG &DAG) {
4521 // Look through truncs that don't change the value of a ctpop.
4522 // FIXME: Add vector support? Need to be careful with setcc result type below.
4523 SDValue CTPOP = N0;
4524 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4526 CTPOP = N0.getOperand(0);
4527
4528 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4529 return SDValue();
4530
4531 EVT CTVT = CTPOP.getValueType();
4532 SDValue CTOp = CTPOP.getOperand(0);
4533
4534 // Expand a power-of-2-or-zero comparison based on ctpop:
4535 // (ctpop x) u< 2 -> (x & x-1) == 0
4536 // (ctpop x) u> 1 -> (x & x-1) != 0
4537 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4538 // Keep the CTPOP if it is a cheap vector op.
4539 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4540 return SDValue();
4541
4542 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4543 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4544 return SDValue();
4545 if (C1 == 0 && (Cond == ISD::SETULT))
4546 return SDValue(); // This is handled elsewhere.
4547
4548 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4549
4550 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4551 SDValue Result = CTOp;
4552 for (unsigned i = 0; i < Passes; i++) {
4553 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4554 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4555 }
4557 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4558 }
4559
4560 // Expand a power-of-2 comparison based on ctpop
4561 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4562 // Keep the CTPOP if it is cheap.
4563 if (TLI.isCtpopFast(CTVT))
4564 return SDValue();
4565
4566 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4567 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4568 assert(CTVT.isInteger());
4569 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4570
4571 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4572 // check before emitting a potentially unnecessary op.
4573 if (DAG.isKnownNeverZero(CTOp)) {
4574 // (ctpop x) == 1 --> (x & x-1) == 0
4575 // (ctpop x) != 1 --> (x & x-1) != 0
4576 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4577 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4578 return RHS;
4579 }
4580
4581 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4582 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4583 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4585 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4586 }
4587
4588 return SDValue();
4589}
4590
4592 ISD::CondCode Cond, const SDLoc &dl,
4593 SelectionDAG &DAG) {
4594 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4595 return SDValue();
4596
4597 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4598 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4599 return SDValue();
4600
4601 auto getRotateSource = [](SDValue X) {
4602 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4603 return X.getOperand(0);
4604 return SDValue();
4605 };
4606
4607 // Peek through a rotated value compared against 0 or -1:
4608 // (rot X, Y) == 0/-1 --> X == 0/-1
4609 // (rot X, Y) != 0/-1 --> X != 0/-1
4610 if (SDValue R = getRotateSource(N0))
4611 return DAG.getSetCC(dl, VT, R, N1, Cond);
4612
4613 // Peek through an 'or' of a rotated value compared against 0:
4614 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4615 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4616 //
4617 // TODO: Add the 'and' with -1 sibling.
4618 // TODO: Recurse through a series of 'or' ops to find the rotate.
4619 EVT OpVT = N0.getValueType();
4620 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4621 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4622 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4623 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4624 }
4625 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4626 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4627 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4628 }
4629 }
4630
4631 return SDValue();
4632}
4633
4635 ISD::CondCode Cond, const SDLoc &dl,
4636 SelectionDAG &DAG) {
4637 // If we are testing for all-bits-clear, we might be able to do that with
4638 // less shifting since bit-order does not matter.
4639 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4640 return SDValue();
4641
4642 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4643 if (!C1 || !C1->isZero())
4644 return SDValue();
4645
4646 if (!N0.hasOneUse() ||
4647 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4648 return SDValue();
4649
4650 unsigned BitWidth = N0.getScalarValueSizeInBits();
4651 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4652 if (!ShAmtC)
4653 return SDValue();
4654
4655 uint64_t ShAmt = ShAmtC->getAPIntValue().urem(BitWidth);
4656 if (ShAmt == 0)
4657 return SDValue();
4658
4659 // Canonicalize fshr as fshl to reduce pattern-matching.
4660 if (N0.getOpcode() == ISD::FSHR)
4661 ShAmt = BitWidth - ShAmt;
4662
4663 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4664 SDValue X, Y;
4665 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4666 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4667 return false;
4668 if (Or.getOperand(0) == Other) {
4669 X = Or.getOperand(0);
4670 Y = Or.getOperand(1);
4671 return true;
4672 }
4673 if (Or.getOperand(1) == Other) {
4674 X = Or.getOperand(1);
4675 Y = Or.getOperand(0);
4676 return true;
4677 }
4678 return false;
4679 };
4680
4681 EVT OpVT = N0.getValueType();
4682 EVT ShAmtVT = N0.getOperand(2).getValueType();
4683 SDValue F0 = N0.getOperand(0);
4684 SDValue F1 = N0.getOperand(1);
4685 if (matchOr(F0, F1)) {
4686 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4687 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4688 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4689 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4690 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4691 }
4692 if (matchOr(F1, F0)) {
4693 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4694 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4695 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4696 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4697 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4698 }
4699
4700 return SDValue();
4701}
4702
4703/// Try to simplify a setcc built with the specified operands and cc. If it is
4704/// unable to simplify it, return a null SDValue.
4706 ISD::CondCode Cond, bool foldBooleans,
4707 DAGCombinerInfo &DCI,
4708 const SDLoc &dl) const {
4709 SelectionDAG &DAG = DCI.DAG;
4710 const DataLayout &Layout = DAG.getDataLayout();
4711 EVT OpVT = N0.getValueType();
4712 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4713
4714 // Constant fold or commute setcc.
4715 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4716 return Fold;
4717
4718 bool N0ConstOrSplat =
4719 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4720 bool N1ConstOrSplat =
4721 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4722
4723 // Canonicalize toward having the constant on the RHS.
4724 // TODO: Handle non-splat vector constants. All undef causes trouble.
4725 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4726 // infinite loop here when we encounter one.
4728 if (N0ConstOrSplat && !N1ConstOrSplat &&
4729 (DCI.isBeforeLegalizeOps() ||
4730 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4731 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4732
4733 // If we have a subtract with the same 2 non-constant operands as this setcc
4734 // -- but in reverse order -- then try to commute the operands of this setcc
4735 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4736 // instruction on some targets.
4737 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4738 (DCI.isBeforeLegalizeOps() ||
4739 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4740 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4741 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4742 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4743
4744 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4745 return V;
4746
4747 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4748 return V;
4749
4750 if (auto *N1C = isConstOrConstSplat(N1)) {
4751 const APInt &C1 = N1C->getAPIntValue();
4752
4753 // Optimize some CTPOP cases.
4754 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4755 return V;
4756
4757 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4758 // X * Y == 0 --> (X == 0) || (Y == 0)
4759 // X * Y != 0 --> (X != 0) && (Y != 0)
4760 // TODO: This bails out if minsize is set, but if the target doesn't have a
4761 // single instruction multiply for this type, it would likely be
4762 // smaller to decompose.
4763 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4764 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4765 (N0->getFlags().hasNoUnsignedWrap() ||
4766 N0->getFlags().hasNoSignedWrap()) &&
4767 !Attr.hasFnAttr(Attribute::MinSize)) {
4768 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4769 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4770 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4771 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4772 }
4773
4774 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4775 // equality comparison, then we're just comparing whether X itself is
4776 // zero.
4777 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4778 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4780 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4781 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4782 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4783 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4784 // (srl (ctlz x), 5) == 0 -> X != 0
4785 // (srl (ctlz x), 5) != 1 -> X != 0
4786 Cond = ISD::SETNE;
4787 } else {
4788 // (srl (ctlz x), 5) != 0 -> X == 0
4789 // (srl (ctlz x), 5) == 1 -> X == 0
4790 Cond = ISD::SETEQ;
4791 }
4792 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4793 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4794 Cond);
4795 }
4796 }
4797 }
4798 }
4799
4800 // setcc X, 0, setlt --> X (when X is all sign bits)
4801 // setcc X, 0, setne --> X (when X is all sign bits)
4802 //
4803 // When we know that X has 0 or -1 in each element (or scalar), this
4804 // comparison will produce X. This is only true when boolean contents are
4805 // represented via 0s and -1s.
4806 if (VT == OpVT &&
4807 // Check that the result of setcc is 0 and -1.
4809 // Match only for checks X < 0 and X != 0
4810 (Cond == ISD::SETLT || Cond == ISD::SETNE) && isNullOrNullSplat(N1) &&
4811 // The identity holds iff we know all sign bits for all lanes.
4813 return N0;
4814
4815 // FIXME: Support vectors.
4816 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4817 const APInt &C1 = N1C->getAPIntValue();
4818
4819 // (zext x) == C --> x == (trunc C)
4820 // (sext x) == C --> x == (trunc C)
4821 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4822 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4823 unsigned MinBits = N0.getValueSizeInBits();
4824 SDValue PreExt;
4825 bool Signed = false;
4826 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4827 // ZExt
4828 MinBits = N0->getOperand(0).getValueSizeInBits();
4829 PreExt = N0->getOperand(0);
4830 } else if (N0->getOpcode() == ISD::AND) {
4831 // DAGCombine turns costly ZExts into ANDs
4832 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4833 if ((C->getAPIntValue()+1).isPowerOf2()) {
4834 MinBits = C->getAPIntValue().countr_one();
4835 PreExt = N0->getOperand(0);
4836 }
4837 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4838 // SExt
4839 MinBits = N0->getOperand(0).getValueSizeInBits();
4840 PreExt = N0->getOperand(0);
4841 Signed = true;
4842 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4843 // ZEXTLOAD / SEXTLOAD
4844 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4845 MinBits = LN0->getMemoryVT().getSizeInBits();
4846 PreExt = N0;
4847 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4848 Signed = true;
4849 MinBits = LN0->getMemoryVT().getSizeInBits();
4850 PreExt = N0;
4851 }
4852 }
4853
4854 // Figure out how many bits we need to preserve this constant.
4855 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4856
4857 // Make sure we're not losing bits from the constant.
4858 if (MinBits > 0 &&
4859 MinBits < C1.getBitWidth() &&
4860 MinBits >= ReqdBits) {
4861 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4862 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4863 // Will get folded away.
4864 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4865 if (MinBits == 1 && C1 == 1)
4866 // Invert the condition.
4867 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4869 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4870 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4871 }
4872
4873 // If truncating the setcc operands is not desirable, we can still
4874 // simplify the expression in some cases:
4875 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4876 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4877 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4878 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4879 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4880 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4881 SDValue TopSetCC = N0->getOperand(0);
4882 unsigned N0Opc = N0->getOpcode();
4883 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4884 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4885 TopSetCC.getOpcode() == ISD::SETCC &&
4886 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4887 (isConstFalseVal(N1) ||
4888 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4889
4890 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4891 (!N1C->isZero() && Cond == ISD::SETNE);
4892
4893 if (!Inverse)
4894 return TopSetCC;
4895
4897 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4898 TopSetCC.getOperand(0).getValueType());
4899 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4900 TopSetCC.getOperand(1),
4901 InvCond);
4902 }
4903 }
4904 }
4905
4906 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4907 // equality or unsigned, and all 1 bits of the const are in the same
4908 // partial word, see if we can shorten the load.
4909 if (DCI.isBeforeLegalize() &&
4911 N0.getOpcode() == ISD::AND && C1 == 0 &&
4912 N0.getNode()->hasOneUse() &&
4913 isa<LoadSDNode>(N0.getOperand(0)) &&
4914 N0.getOperand(0).getNode()->hasOneUse() &&
4916 auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4917 APInt bestMask;
4918 unsigned bestWidth = 0, bestOffset = 0;
4919 if (Lod->isSimple() && Lod->isUnindexed() &&
4920 (Lod->getMemoryVT().isByteSized() ||
4921 isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4922 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4923 unsigned origWidth = N0.getValueSizeInBits();
4924 unsigned maskWidth = origWidth;
4925 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4926 // 8 bits, but have to be careful...
4927 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4928 origWidth = Lod->getMemoryVT().getSizeInBits();
4929 const APInt &Mask = N0.getConstantOperandAPInt(1);
4930 // Only consider power-of-2 widths (and at least one byte) as candiates
4931 // for the narrowed load.
4932 for (unsigned width = 8; width < origWidth; width *= 2) {
4933 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4934 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4935 // Avoid accessing any padding here for now (we could use memWidth
4936 // instead of origWidth here otherwise).
4937 unsigned maxOffset = origWidth - width;
4938 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4939 if (Mask.isSubsetOf(newMask)) {
4940 unsigned ptrOffset =
4941 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4942 unsigned IsFast = 0;
4943 assert((ptrOffset % 8) == 0 && "Non-Bytealigned pointer offset");
4944 Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4946 ptrOffset / 8) &&
4948 *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4949 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4950 IsFast) {
4951 bestOffset = ptrOffset / 8;
4952 bestMask = Mask.lshr(offset);
4953 bestWidth = width;
4954 break;
4955 }
4956 }
4957 newMask <<= 8;
4958 }
4959 if (bestWidth)
4960 break;
4961 }
4962 }
4963 if (bestWidth) {
4964 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4965 SDValue Ptr = Lod->getBasePtr();
4966 if (bestOffset != 0)
4967 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4968 SDValue NewLoad =
4969 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4970 Lod->getPointerInfo().getWithOffset(bestOffset),
4971 Lod->getBaseAlign());
4972 SDValue And =
4973 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4974 DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4975 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4976 }
4977 }
4978
4979 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4980 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4981 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4982
4983 // If the comparison constant has bits in the upper part, the
4984 // zero-extended value could never match.
4986 C1.getBitWidth() - InSize))) {
4987 switch (Cond) {
4988 case ISD::SETUGT:
4989 case ISD::SETUGE:
4990 case ISD::SETEQ:
4991 return DAG.getConstant(0, dl, VT);
4992 case ISD::SETULT:
4993 case ISD::SETULE:
4994 case ISD::SETNE:
4995 return DAG.getConstant(1, dl, VT);
4996 case ISD::SETGT:
4997 case ISD::SETGE:
4998 // True if the sign bit of C1 is set.
4999 return DAG.getConstant(C1.isNegative(), dl, VT);
5000 case ISD::SETLT:
5001 case ISD::SETLE:
5002 // True if the sign bit of C1 isn't set.
5003 return DAG.getConstant(C1.isNonNegative(), dl, VT);
5004 default:
5005 break;
5006 }
5007 }
5008
5009 // Otherwise, we can perform the comparison with the low bits.
5010 switch (Cond) {
5011 case ISD::SETEQ:
5012 case ISD::SETNE:
5013 case ISD::SETUGT:
5014 case ISD::SETUGE:
5015 case ISD::SETULT:
5016 case ISD::SETULE: {
5017 EVT newVT = N0.getOperand(0).getValueType();
5018 // FIXME: Should use isNarrowingProfitable.
5019 if (DCI.isBeforeLegalizeOps() ||
5020 (isOperationLegal(ISD::SETCC, newVT) &&
5021 isCondCodeLegal(Cond, newVT.getSimpleVT()) &&
5023 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
5024 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
5025
5026 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
5027 NewConst, Cond);
5028 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
5029 }
5030 break;
5031 }
5032 default:
5033 break; // todo, be more careful with signed comparisons
5034 }
5035 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
5036 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5038 OpVT)) {
5039 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
5040 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
5041 EVT ExtDstTy = N0.getValueType();
5042 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
5043
5044 // If the constant doesn't fit into the number of bits for the source of
5045 // the sign extension, it is impossible for both sides to be equal.
5046 if (C1.getSignificantBits() > ExtSrcTyBits)
5047 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
5048
5049 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
5050 ExtDstTy != ExtSrcTy && "Unexpected types!");
5051 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
5052 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
5053 DAG.getConstant(Imm, dl, ExtDstTy));
5054 if (!DCI.isCalledByLegalizer())
5055 DCI.AddToWorklist(ZextOp.getNode());
5056 // Otherwise, make this a use of a zext.
5057 return DAG.getSetCC(dl, VT, ZextOp,
5058 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
5059 } else if ((N1C->isZero() || N1C->isOne()) &&
5060 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5061 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
5062 // excluded as they are handled below whilst checking for foldBooleans.
5063 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
5064 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
5065 (N0.getValueType() == MVT::i1 ||
5069 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
5070 if (TrueWhenTrue)
5071 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
5072 // Invert the condition.
5073 if (N0.getOpcode() == ISD::SETCC) {
5076 if (DCI.isBeforeLegalizeOps() ||
5078 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
5079 }
5080 }
5081
5082 if ((N0.getOpcode() == ISD::XOR ||
5083 (N0.getOpcode() == ISD::AND &&
5084 N0.getOperand(0).getOpcode() == ISD::XOR &&
5085 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
5086 isOneConstant(N0.getOperand(1))) {
5087 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
5088 // can only do this if the top bits are known zero.
5089 unsigned BitWidth = N0.getValueSizeInBits();
5090 if (DAG.MaskedValueIsZero(N0,
5092 BitWidth-1))) {
5093 // Okay, get the un-inverted input value.
5094 SDValue Val;
5095 if (N0.getOpcode() == ISD::XOR) {
5096 Val = N0.getOperand(0);
5097 } else {
5098 assert(N0.getOpcode() == ISD::AND &&
5099 N0.getOperand(0).getOpcode() == ISD::XOR);
5100 // ((X^1)&1)^1 -> X & 1
5101 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
5102 N0.getOperand(0).getOperand(0),
5103 N0.getOperand(1));
5104 }
5105
5106 return DAG.getSetCC(dl, VT, Val, N1,
5108 }
5109 } else if (N1C->isOne()) {
5110 SDValue Op0 = N0;
5111 if (Op0.getOpcode() == ISD::TRUNCATE)
5112 Op0 = Op0.getOperand(0);
5113
5114 if ((Op0.getOpcode() == ISD::XOR) &&
5115 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
5116 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
5117 SDValue XorLHS = Op0.getOperand(0);
5118 SDValue XorRHS = Op0.getOperand(1);
5119 // Ensure that the input setccs return an i1 type or 0/1 value.
5120 if (Op0.getValueType() == MVT::i1 ||
5125 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
5127 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
5128 }
5129 }
5130 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
5131 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
5132 if (Op0.getValueType().bitsGT(VT))
5133 Op0 = DAG.getNode(ISD::AND, dl, VT,
5134 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
5135 DAG.getConstant(1, dl, VT));
5136 else if (Op0.getValueType().bitsLT(VT))
5137 Op0 = DAG.getNode(ISD::AND, dl, VT,
5138 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
5139 DAG.getConstant(1, dl, VT));
5140
5141 return DAG.getSetCC(dl, VT, Op0,
5142 DAG.getConstant(0, dl, Op0.getValueType()),
5144 }
5145 if (Op0.getOpcode() == ISD::AssertZext &&
5146 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
5147 return DAG.getSetCC(dl, VT, Op0,
5148 DAG.getConstant(0, dl, Op0.getValueType()),
5150 }
5151 }
5152
5153 // Given:
5154 // icmp eq/ne (urem %x, %y), 0
5155 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
5156 // icmp eq/ne %x, 0
5157 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
5158 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5159 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
5160 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
5161 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
5162 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
5163 }
5164
5165 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
5166 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
5167 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5169 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
5170 N1C->isAllOnes()) {
5171 return DAG.getSetCC(dl, VT, N0.getOperand(0),
5172 DAG.getConstant(0, dl, OpVT),
5174 }
5175
5176 // fold (setcc (trunc x) c) -> (setcc x c)
5177 if (N0.getOpcode() == ISD::TRUNCATE &&
5179 (N0->getFlags().hasNoSignedWrap() &&
5182 EVT NewVT = N0.getOperand(0).getValueType();
5183 SDValue NewConst = DAG.getConstant(
5185 ? C1.sext(NewVT.getSizeInBits())
5186 : C1.zext(NewVT.getSizeInBits()),
5187 dl, NewVT);
5188 return DAG.getSetCC(dl, VT, N0.getOperand(0), NewConst, Cond);
5189 }
5190
5191 if (SDValue V =
5192 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
5193 return V;
5194 }
5195
5196 // These simplifications apply to splat vectors as well.
5197 // TODO: Handle more splat vector cases.
5198 if (auto *N1C = isConstOrConstSplat(N1)) {
5199 const APInt &C1 = N1C->getAPIntValue();
5200
5201 APInt MinVal, MaxVal;
5202 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
5204 MinVal = APInt::getSignedMinValue(OperandBitSize);
5205 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
5206 } else {
5207 MinVal = APInt::getMinValue(OperandBitSize);
5208 MaxVal = APInt::getMaxValue(OperandBitSize);
5209 }
5210
5211 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
5212 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
5213 // X >= MIN --> true
5214 if (C1 == MinVal)
5215 return DAG.getBoolConstant(true, dl, VT, OpVT);
5216
5217 if (!VT.isVector()) { // TODO: Support this for vectors.
5218 // X >= C0 --> X > (C0 - 1)
5219 APInt C = C1 - 1;
5221 if ((DCI.isBeforeLegalizeOps() ||
5222 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5223 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5224 isLegalICmpImmediate(C.getSExtValue())))) {
5225 return DAG.getSetCC(dl, VT, N0,
5226 DAG.getConstant(C, dl, N1.getValueType()),
5227 NewCC);
5228 }
5229 }
5230 }
5231
5232 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
5233 // X <= MAX --> true
5234 if (C1 == MaxVal)
5235 return DAG.getBoolConstant(true, dl, VT, OpVT);
5236
5237 // X <= C0 --> X < (C0 + 1)
5238 if (!VT.isVector()) { // TODO: Support this for vectors.
5239 APInt C = C1 + 1;
5241 if ((DCI.isBeforeLegalizeOps() ||
5242 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5243 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5244 isLegalICmpImmediate(C.getSExtValue())))) {
5245 return DAG.getSetCC(dl, VT, N0,
5246 DAG.getConstant(C, dl, N1.getValueType()),
5247 NewCC);
5248 }
5249 }
5250 }
5251
5252 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
5253 if (C1 == MinVal)
5254 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
5255
5256 // TODO: Support this for vectors after legalize ops.
5257 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5258 // Canonicalize setlt X, Max --> setne X, Max
5259 if (C1 == MaxVal)
5260 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5261
5262 // If we have setult X, 1, turn it into seteq X, 0
5263 if (C1 == MinVal+1)
5264 return DAG.getSetCC(dl, VT, N0,
5265 DAG.getConstant(MinVal, dl, N0.getValueType()),
5266 ISD::SETEQ);
5267 }
5268 }
5269
5270 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5271 if (C1 == MaxVal)
5272 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
5273
5274 // TODO: Support this for vectors after legalize ops.
5275 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5276 // Canonicalize setgt X, Min --> setne X, Min
5277 if (C1 == MinVal)
5278 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5279
5280 // If we have setugt X, Max-1, turn it into seteq X, Max
5281 if (C1 == MaxVal-1)
5282 return DAG.getSetCC(dl, VT, N0,
5283 DAG.getConstant(MaxVal, dl, N0.getValueType()),
5284 ISD::SETEQ);
5285 }
5286 }
5287
5288 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5289 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5290 if (C1.isZero())
5291 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5292 VT, N0, N1, Cond, DCI, dl))
5293 return CC;
5294
5295 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5296 // For example, when high 32-bits of i64 X are known clear:
5297 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5298 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5299 bool CmpZero = N1C->isZero();
5300 bool CmpNegOne = N1C->isAllOnes();
5301 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5302 // Match or(lo,shl(hi,bw/2)) pattern.
5303 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5304 unsigned EltBits = V.getScalarValueSizeInBits();
5305 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5306 return false;
5307 SDValue LHS = V.getOperand(0);
5308 SDValue RHS = V.getOperand(1);
5309 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5310 // Unshifted element must have zero upperbits.
5311 if (RHS.getOpcode() == ISD::SHL &&
5312 isa<ConstantSDNode>(RHS.getOperand(1)) &&
5313 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5314 DAG.MaskedValueIsZero(LHS, HiBits)) {
5315 Lo = LHS;
5316 Hi = RHS.getOperand(0);
5317 return true;
5318 }
5319 if (LHS.getOpcode() == ISD::SHL &&
5320 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5321 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5322 DAG.MaskedValueIsZero(RHS, HiBits)) {
5323 Lo = RHS;
5324 Hi = LHS.getOperand(0);
5325 return true;
5326 }
5327 return false;
5328 };
5329
5330 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5331 unsigned EltBits = N0.getScalarValueSizeInBits();
5332 unsigned HalfBits = EltBits / 2;
5333 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5334 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5335 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5336 SDValue NewN0 =
5337 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5338 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5339 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5340 };
5341
5342 SDValue Lo, Hi;
5343 if (IsConcat(N0, Lo, Hi))
5344 return MergeConcat(Lo, Hi);
5345
5346 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5347 SDValue Lo0, Lo1, Hi0, Hi1;
5348 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5349 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5350 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5351 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5352 }
5353 }
5354 }
5355 }
5356
5357 // If we have "setcc X, C0", check to see if we can shrink the immediate
5358 // by changing cc.
5359 // TODO: Support this for vectors after legalize ops.
5360 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5361 // SETUGT X, SINTMAX -> SETLT X, 0
5362 // SETUGE X, SINTMIN -> SETLT X, 0
5363 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5364 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5365 return DAG.getSetCC(dl, VT, N0,
5366 DAG.getConstant(0, dl, N1.getValueType()),
5367 ISD::SETLT);
5368
5369 // SETULT X, SINTMIN -> SETGT X, -1
5370 // SETULE X, SINTMAX -> SETGT X, -1
5371 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5372 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5373 return DAG.getSetCC(dl, VT, N0,
5374 DAG.getAllOnesConstant(dl, N1.getValueType()),
5375 ISD::SETGT);
5376 }
5377 }
5378
5379 // Back to non-vector simplifications.
5380 // TODO: Can we do these for vector splats?
5381 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5382 const APInt &C1 = N1C->getAPIntValue();
5383 EVT ShValTy = N0.getValueType();
5384
5385 // Fold bit comparisons when we can. This will result in an
5386 // incorrect value when boolean false is negative one, unless
5387 // the bitsize is 1 in which case the false value is the same
5388 // in practice regardless of the representation.
5389 if ((VT.getSizeInBits() == 1 ||
5391 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5392 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5393 N0.getOpcode() == ISD::AND) {
5394 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5395 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5396 // Perform the xform if the AND RHS is a single bit.
5397 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5398 if (AndRHS->getAPIntValue().isPowerOf2() &&
5399 !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5400 return DAG.getNode(
5401 ISD::TRUNCATE, dl, VT,
5402 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5403 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5404 }
5405 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5406 // (X & 8) == 8 --> (X & 8) >> 3
5407 // Perform the xform if C1 is a single bit.
5408 unsigned ShCt = C1.logBase2();
5409 if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5410 return DAG.getNode(
5411 ISD::TRUNCATE, dl, VT,
5412 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5413 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5414 }
5415 }
5416 }
5417 }
5418
5419 if (C1.getSignificantBits() <= 64 &&
5421 // (X & -256) == 256 -> (X >> 8) == 1
5422 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5423 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5424 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5425 const APInt &AndRHSC = AndRHS->getAPIntValue();
5426 if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5427 unsigned ShiftBits = AndRHSC.countr_zero();
5428 if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5429 // If using an unsigned shift doesn't yield a legal compare
5430 // immediate, try using sra instead.
5431 APInt NewC = C1.lshr(ShiftBits);
5432 if (NewC.getSignificantBits() <= 64 &&
5434 APInt SignedC = C1.ashr(ShiftBits);
5435 if (SignedC.getSignificantBits() <= 64 &&
5437 SDValue Shift = DAG.getNode(
5438 ISD::SRA, dl, ShValTy, N0.getOperand(0),
5439 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5440 SDValue CmpRHS = DAG.getConstant(SignedC, dl, ShValTy);
5441 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5442 }
5443 }
5444 SDValue Shift = DAG.getNode(
5445 ISD::SRL, dl, ShValTy, N0.getOperand(0),
5446 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5447 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5448 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5449 }
5450 }
5451 }
5452 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5453 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5454 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5455 // X < 0x100000000 -> (X >> 32) < 1
5456 // X >= 0x100000000 -> (X >> 32) >= 1
5457 // X <= 0x0ffffffff -> (X >> 32) < 1
5458 // X > 0x0ffffffff -> (X >> 32) >= 1
5459 unsigned ShiftBits;
5460 APInt NewC = C1;
5461 ISD::CondCode NewCond = Cond;
5462 if (AdjOne) {
5463 ShiftBits = C1.countr_one();
5464 NewC = NewC + 1;
5465 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5466 } else {
5467 ShiftBits = C1.countr_zero();
5468 }
5469 NewC.lshrInPlace(ShiftBits);
5470 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5472 !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5473 SDValue Shift =
5474 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5475 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5476 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5477 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5478 }
5479 }
5480 }
5481 }
5482
5484 auto *CFP = cast<ConstantFPSDNode>(N1);
5485 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5486
5487 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5488 // constant if knowing that the operand is non-nan is enough. We prefer to
5489 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5490 // materialize 0.0.
5491 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5492 return DAG.getSetCC(dl, VT, N0, N0, Cond);
5493
5494 // setcc (fneg x), C -> setcc swap(pred) x, -C
5495 if (N0.getOpcode() == ISD::FNEG) {
5497 if (DCI.isBeforeLegalizeOps() ||
5498 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5499 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5500 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5501 }
5502 }
5503
5504 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5506 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5507 bool IsFabs = N0.getOpcode() == ISD::FABS;
5508 SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5509 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5510 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5511 : (IsFabs ? fcInf : fcPosInf);
5512 if (Cond == ISD::SETUEQ)
5513 Flag |= fcNan;
5514 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5515 DAG.getTargetConstant(Flag, dl, MVT::i32));
5516 }
5517 }
5518
5519 // If the condition is not legal, see if we can find an equivalent one
5520 // which is legal.
5522 // If the comparison was an awkward floating-point == or != and one of
5523 // the comparison operands is infinity or negative infinity, convert the
5524 // condition to a less-awkward <= or >=.
5525 if (CFP->getValueAPF().isInfinity()) {
5526 bool IsNegInf = CFP->getValueAPF().isNegative();
5528 switch (Cond) {
5529 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5530 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5531 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5532 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5533 default: break;
5534 }
5535 if (NewCond != ISD::SETCC_INVALID &&
5536 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5537 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5538 }
5539 }
5540 }
5541
5542 if (N0 == N1) {
5543 // The sext(setcc()) => setcc() optimization relies on the appropriate
5544 // constant being emitted.
5545 assert(!N0.getValueType().isInteger() &&
5546 "Integer types should be handled by FoldSetCC");
5547
5548 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5549 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5550 if (UOF == 2) // FP operators that are undefined on NaNs.
5551 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5552 if (UOF == unsigned(EqTrue))
5553 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5554 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5555 // if it is not already.
5556 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5557 if (NewCond != Cond &&
5558 (DCI.isBeforeLegalizeOps() ||
5559 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5560 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5561 }
5562
5563 // ~X > ~Y --> Y > X
5564 // ~X < ~Y --> Y < X
5565 // ~X < C --> X > ~C
5566 // ~X > C --> X < ~C
5567 if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5568 N0.getValueType().isInteger()) {
5569 if (isBitwiseNot(N0)) {
5570 if (isBitwiseNot(N1))
5571 return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5572
5575 SDValue Not = DAG.getNOT(dl, N1, OpVT);
5576 return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5577 }
5578 }
5579 }
5580
5581 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5582 N0.getValueType().isInteger()) {
5583 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5584 N0.getOpcode() == ISD::XOR) {
5585 // Simplify (X+Y) == (X+Z) --> Y == Z
5586 if (N0.getOpcode() == N1.getOpcode()) {
5587 if (N0.getOperand(0) == N1.getOperand(0))
5588 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5589 if (N0.getOperand(1) == N1.getOperand(1))
5590 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5591 if (isCommutativeBinOp(N0.getOpcode())) {
5592 // If X op Y == Y op X, try other combinations.
5593 if (N0.getOperand(0) == N1.getOperand(1))
5594 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5595 Cond);
5596 if (N0.getOperand(1) == N1.getOperand(0))
5597 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5598 Cond);
5599 }
5600 }
5601
5602 // If RHS is a legal immediate value for a compare instruction, we need
5603 // to be careful about increasing register pressure needlessly.
5604 bool LegalRHSImm = false;
5605
5606 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5607 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5608 // Turn (X+C1) == C2 --> X == C2-C1
5609 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5610 return DAG.getSetCC(
5611 dl, VT, N0.getOperand(0),
5612 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5613 dl, N0.getValueType()),
5614 Cond);
5615
5616 // Turn (X^C1) == C2 --> X == C1^C2
5617 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5618 return DAG.getSetCC(
5619 dl, VT, N0.getOperand(0),
5620 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5621 dl, N0.getValueType()),
5622 Cond);
5623 }
5624
5625 // Turn (C1-X) == C2 --> X == C1-C2
5626 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5627 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5628 return DAG.getSetCC(
5629 dl, VT, N0.getOperand(1),
5630 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5631 dl, N0.getValueType()),
5632 Cond);
5633
5634 // Could RHSC fold directly into a compare?
5635 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5636 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5637 }
5638
5639 // (X+Y) == X --> Y == 0 and similar folds.
5640 // Don't do this if X is an immediate that can fold into a cmp
5641 // instruction and X+Y has other uses. It could be an induction variable
5642 // chain, and the transform would increase register pressure.
5643 if (!LegalRHSImm || N0.hasOneUse())
5644 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5645 return V;
5646 }
5647
5648 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5649 N1.getOpcode() == ISD::XOR)
5650 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5651 return V;
5652
5653 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5654 return V;
5655
5656 if (SDValue V = foldSetCCWithOr(VT, N0, N1, Cond, dl, DCI))
5657 return V;
5658 }
5659
5660 // Fold remainder of division by a constant.
5661 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5662 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5663 // When division is cheap or optimizing for minimum size,
5664 // fall through to DIVREM creation by skipping this fold.
5665 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5666 if (N0.getOpcode() == ISD::UREM) {
5667 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5668 return Folded;
5669 } else if (N0.getOpcode() == ISD::SREM) {
5670 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5671 return Folded;
5672 }
5673 }
5674 }
5675
5676 // Fold away ALL boolean setcc's.
5677 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5678 SDValue Temp;
5679 switch (Cond) {
5680 default: llvm_unreachable("Unknown integer setcc!");
5681 case ISD::SETEQ: // X == Y -> ~(X^Y)
5682 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5683 N0 = DAG.getNOT(dl, Temp, OpVT);
5684 if (!DCI.isCalledByLegalizer())
5685 DCI.AddToWorklist(Temp.getNode());
5686 break;
5687 case ISD::SETNE: // X != Y --> (X^Y)
5688 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5689 break;
5690 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5691 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5692 Temp = DAG.getNOT(dl, N0, OpVT);
5693 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5694 if (!DCI.isCalledByLegalizer())
5695 DCI.AddToWorklist(Temp.getNode());
5696 break;
5697 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5698 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5699 Temp = DAG.getNOT(dl, N1, OpVT);
5700 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5701 if (!DCI.isCalledByLegalizer())
5702 DCI.AddToWorklist(Temp.getNode());
5703 break;
5704 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5705 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5706 Temp = DAG.getNOT(dl, N0, OpVT);
5707 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5708 if (!DCI.isCalledByLegalizer())
5709 DCI.AddToWorklist(Temp.getNode());
5710 break;
5711 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5712 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5713 Temp = DAG.getNOT(dl, N1, OpVT);
5714 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5715 break;
5716 }
5717 if (VT.getScalarType() != MVT::i1) {
5718 if (!DCI.isCalledByLegalizer())
5719 DCI.AddToWorklist(N0.getNode());
5720 // FIXME: If running after legalize, we probably can't do this.
5722 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5723 }
5724 return N0;
5725 }
5726
5727 // Fold (setcc (trunc x) (trunc y)) -> (setcc x y)
5728 if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE &&
5729 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
5731 N1->getFlags().hasNoUnsignedWrap()) ||
5733 N1->getFlags().hasNoSignedWrap())) &&
5735 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5736 }
5737
5738 // Fold (setcc (sub nsw a, b), zero, s??) -> (setcc a, b, s??)
5739 // TODO: Remove that .isVector() check
5740 if (VT.isVector() && isZeroOrZeroSplat(N1) && N0.getOpcode() == ISD::SUB &&
5742 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), Cond);
5743 }
5744
5745 // Could not fold it.
5746 return SDValue();
5747}
5748
5749/// Returns true (and the GlobalValue and the offset) if the node is a
5750/// GlobalAddress + offset.
5752 int64_t &Offset) const {
5753
5754 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5755
5756 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5757 GA = GASD->getGlobal();
5758 Offset += GASD->getOffset();
5759 return true;
5760 }
5761
5762 if (N->isAnyAdd()) {
5763 SDValue N1 = N->getOperand(0);
5764 SDValue N2 = N->getOperand(1);
5765 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5766 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5767 Offset += V->getSExtValue();
5768 return true;
5769 }
5770 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5771 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5772 Offset += V->getSExtValue();
5773 return true;
5774 }
5775 }
5776 }
5777
5778 return false;
5779}
5780
5782 DAGCombinerInfo &DCI) const {
5783 // Default implementation: no optimization.
5784 return SDValue();
5785}
5786
5787//===----------------------------------------------------------------------===//
5788// Inline Assembler Implementation Methods
5789//===----------------------------------------------------------------------===//
5790
5793 unsigned S = Constraint.size();
5794
5795 if (S == 1) {
5796 switch (Constraint[0]) {
5797 default: break;
5798 case 'r':
5799 return C_RegisterClass;
5800 case 'm': // memory
5801 case 'o': // offsetable
5802 case 'V': // not offsetable
5803 return C_Memory;
5804 case 'p': // Address.
5805 return C_Address;
5806 case 'n': // Simple Integer
5807 case 'E': // Floating Point Constant
5808 case 'F': // Floating Point Constant
5809 return C_Immediate;
5810 case 'i': // Simple Integer or Relocatable Constant
5811 case 's': // Relocatable Constant
5812 case 'X': // Allow ANY value.
5813 case 'I': // Target registers.
5814 case 'J':
5815 case 'K':
5816 case 'L':
5817 case 'M':
5818 case 'N':
5819 case 'O':
5820 case 'P':
5821 case '<':
5822 case '>':
5823 return C_Other;
5824 }
5825 }
5826
5827 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5828 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5829 return C_Memory;
5830 return C_Register;
5831 }
5832 return C_Unknown;
5833}
5834
5835/// Try to replace an X constraint, which matches anything, with another that
5836/// has more specific requirements based on the type of the corresponding
5837/// operand.
5838const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5839 if (ConstraintVT.isInteger())
5840 return "r";
5841 if (ConstraintVT.isFloatingPoint())
5842 return "f"; // works for many targets
5843 return nullptr;
5844}
5845
5847 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5848 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5849 return SDValue();
5850}
5851
5852/// Lower the specified operand into the Ops vector.
5853/// If it is invalid, don't add anything to Ops.
5855 StringRef Constraint,
5856 std::vector<SDValue> &Ops,
5857 SelectionDAG &DAG) const {
5858
5859 if (Constraint.size() > 1)
5860 return;
5861
5862 char ConstraintLetter = Constraint[0];
5863 switch (ConstraintLetter) {
5864 default: break;
5865 case 'X': // Allows any operand
5866 case 'i': // Simple Integer or Relocatable Constant
5867 case 'n': // Simple Integer
5868 case 's': { // Relocatable Constant
5869
5871 uint64_t Offset = 0;
5872
5873 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5874 // etc., since getelementpointer is variadic. We can't use
5875 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5876 // while in this case the GA may be furthest from the root node which is
5877 // likely an ISD::ADD.
5878 while (true) {
5879 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5880 // gcc prints these as sign extended. Sign extend value to 64 bits
5881 // now; without this it would get ZExt'd later in
5882 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5883 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5884 BooleanContent BCont = getBooleanContents(MVT::i64);
5885 ISD::NodeType ExtOpc =
5886 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5887 int64_t ExtVal =
5888 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5889 Ops.push_back(
5890 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5891 return;
5892 }
5893 if (ConstraintLetter != 'n') {
5894 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5895 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5896 GA->getValueType(0),
5897 Offset + GA->getOffset()));
5898 return;
5899 }
5900 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5901 Ops.push_back(DAG.getTargetBlockAddress(
5902 BA->getBlockAddress(), BA->getValueType(0),
5903 Offset + BA->getOffset(), BA->getTargetFlags()));
5904 return;
5905 }
5907 Ops.push_back(Op);
5908 return;
5909 }
5910 }
5911 const unsigned OpCode = Op.getOpcode();
5912 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5913 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5914 Op = Op.getOperand(1);
5915 // Subtraction is not commutative.
5916 else if (OpCode == ISD::ADD &&
5917 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5918 Op = Op.getOperand(0);
5919 else
5920 return;
5921 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5922 continue;
5923 }
5924 return;
5925 }
5926 break;
5927 }
5928 }
5929}
5930
5934
5935std::pair<unsigned, const TargetRegisterClass *>
5937 StringRef Constraint,
5938 MVT VT) const {
5939 if (!Constraint.starts_with("{"))
5940 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5941 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5942
5943 // Remove the braces from around the name.
5944 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5945
5946 std::pair<unsigned, const TargetRegisterClass *> R =
5947 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5948
5949 // Figure out which register class contains this reg.
5950 for (const TargetRegisterClass *RC : RI->regclasses()) {
5951 // If none of the value types for this register class are valid, we
5952 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5953 if (!isLegalRC(*RI, *RC))
5954 continue;
5955
5956 for (const MCPhysReg &PR : *RC) {
5957 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5958 std::pair<unsigned, const TargetRegisterClass *> S =
5959 std::make_pair(PR, RC);
5960
5961 // If this register class has the requested value type, return it,
5962 // otherwise keep searching and return the first class found
5963 // if no other is found which explicitly has the requested type.
5964 if (RI->isTypeLegalForClass(*RC, VT))
5965 return S;
5966 if (!R.second)
5967 R = S;
5968 }
5969 }
5970 }
5971
5972 return R;
5973}
5974
5975//===----------------------------------------------------------------------===//
5976// Constraint Selection.
5977
5978/// Return true of this is an input operand that is a matching constraint like
5979/// "4".
5981 assert(!ConstraintCode.empty() && "No known constraint!");
5982 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5983}
5984
5985/// If this is an input matching constraint, this method returns the output
5986/// operand it matches.
5988 assert(!ConstraintCode.empty() && "No known constraint!");
5989 return atoi(ConstraintCode.c_str());
5990}
5991
5992/// Split up the constraint string from the inline assembly value into the
5993/// specific constraints and their prefixes, and also tie in the associated
5994/// operand values.
5995/// If this returns an empty vector, and if the constraint string itself
5996/// isn't empty, there was an error parsing.
5999 const TargetRegisterInfo *TRI,
6000 const CallBase &Call) const {
6001 /// Information about all of the constraints.
6002 AsmOperandInfoVector ConstraintOperands;
6003 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
6004 unsigned maCount = 0; // Largest number of multiple alternative constraints.
6005
6006 // Do a prepass over the constraints, canonicalizing them, and building up the
6007 // ConstraintOperands list.
6008 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
6009 unsigned ResNo = 0; // ResNo - The result number of the next output.
6010 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
6011
6012 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
6013 ConstraintOperands.emplace_back(std::move(CI));
6014 AsmOperandInfo &OpInfo = ConstraintOperands.back();
6015
6016 // Update multiple alternative constraint count.
6017 if (OpInfo.multipleAlternatives.size() > maCount)
6018 maCount = OpInfo.multipleAlternatives.size();
6019
6020 OpInfo.ConstraintVT = MVT::Other;
6021
6022 // Compute the value type for each operand.
6023 switch (OpInfo.Type) {
6024 case InlineAsm::isOutput: {
6025 // Indirect outputs just consume an argument.
6026 if (OpInfo.isIndirect) {
6027 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
6028 break;
6029 }
6030
6031 // The return value of the call is this value. As such, there is no
6032 // corresponding argument.
6033 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
6034 EVT VT;
6035 if (auto *STy = dyn_cast<StructType>(Call.getType())) {
6036 VT = getAsmOperandValueType(DL, STy->getElementType(ResNo));
6037 } else {
6038 assert(ResNo == 0 && "Asm only has one result!");
6039 VT = getAsmOperandValueType(DL, Call.getType());
6040 }
6041 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6042 ++ResNo;
6043 break;
6044 }
6045 case InlineAsm::isInput:
6046 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
6047 break;
6048 case InlineAsm::isLabel:
6049 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
6050 ++LabelNo;
6051 continue;
6053 // Nothing to do.
6054 break;
6055 }
6056
6057 if (OpInfo.CallOperandVal) {
6058 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
6059 if (OpInfo.isIndirect) {
6060 OpTy = Call.getParamElementType(ArgNo);
6061 assert(OpTy && "Indirect operand must have elementtype attribute");
6062 }
6063
6064 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
6065 if (StructType *STy = dyn_cast<StructType>(OpTy))
6066 if (STy->getNumElements() == 1)
6067 OpTy = STy->getElementType(0);
6068
6069 // If OpTy is not a single value, it may be a struct/union that we
6070 // can tile with integers.
6071 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
6072 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
6073 switch (BitSize) {
6074 default: break;
6075 case 1:
6076 case 8:
6077 case 16:
6078 case 32:
6079 case 64:
6080 case 128:
6081 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
6082 break;
6083 }
6084 }
6085
6086 EVT VT = getAsmOperandValueType(DL, OpTy, true);
6087 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6088 ArgNo++;
6089 }
6090 }
6091
6092 // If we have multiple alternative constraints, select the best alternative.
6093 if (!ConstraintOperands.empty()) {
6094 if (maCount) {
6095 unsigned bestMAIndex = 0;
6096 int bestWeight = -1;
6097 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
6098 int weight = -1;
6099 unsigned maIndex;
6100 // Compute the sums of the weights for each alternative, keeping track
6101 // of the best (highest weight) one so far.
6102 for (maIndex = 0; maIndex < maCount; ++maIndex) {
6103 int weightSum = 0;
6104 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6105 cIndex != eIndex; ++cIndex) {
6106 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6107 if (OpInfo.Type == InlineAsm::isClobber)
6108 continue;
6109
6110 // If this is an output operand with a matching input operand,
6111 // look up the matching input. If their types mismatch, e.g. one
6112 // is an integer, the other is floating point, or their sizes are
6113 // different, flag it as an maCantMatch.
6114 if (OpInfo.hasMatchingInput()) {
6115 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6116 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6117 if ((OpInfo.ConstraintVT.isInteger() !=
6118 Input.ConstraintVT.isInteger()) ||
6119 (OpInfo.ConstraintVT.getSizeInBits() !=
6120 Input.ConstraintVT.getSizeInBits())) {
6121 weightSum = -1; // Can't match.
6122 break;
6123 }
6124 }
6125 }
6126 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
6127 if (weight == -1) {
6128 weightSum = -1;
6129 break;
6130 }
6131 weightSum += weight;
6132 }
6133 // Update best.
6134 if (weightSum > bestWeight) {
6135 bestWeight = weightSum;
6136 bestMAIndex = maIndex;
6137 }
6138 }
6139
6140 // Now select chosen alternative in each constraint.
6141 for (AsmOperandInfo &cInfo : ConstraintOperands)
6142 if (cInfo.Type != InlineAsm::isClobber)
6143 cInfo.selectAlternative(bestMAIndex);
6144 }
6145 }
6146
6147 // Check and hook up tied operands, choose constraint code to use.
6148 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6149 cIndex != eIndex; ++cIndex) {
6150 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6151
6152 // If this is an output operand with a matching input operand, look up the
6153 // matching input. If their types mismatch, e.g. one is an integer, the
6154 // other is floating point, or their sizes are different, flag it as an
6155 // error.
6156 if (OpInfo.hasMatchingInput()) {
6157 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6158
6159 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6160 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
6161 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
6162 OpInfo.ConstraintVT);
6163 std::pair<unsigned, const TargetRegisterClass *> InputRC =
6164 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
6165 Input.ConstraintVT);
6166 const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
6167 OpInfo.ConstraintVT.isFloatingPoint();
6168 const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
6169 Input.ConstraintVT.isFloatingPoint();
6170 if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
6171 (MatchRC.second != InputRC.second)) {
6172 report_fatal_error("Unsupported asm: input constraint"
6173 " with a matching output constraint of"
6174 " incompatible type!");
6175 }
6176 }
6177 }
6178 }
6179
6180 return ConstraintOperands;
6181}
6182
6183/// Return a number indicating our preference for chosing a type of constraint
6184/// over another, for the purpose of sorting them. Immediates are almost always
6185/// preferrable (when they can be emitted). A higher return value means a
6186/// stronger preference for one constraint type relative to another.
6187/// FIXME: We should prefer registers over memory but doing so may lead to
6188/// unrecoverable register exhaustion later.
6189/// https://github.com/llvm/llvm-project/issues/20571
6191 switch (CT) {
6194 return 4;
6197 return 3;
6199 return 2;
6201 return 1;
6203 return 0;
6204 }
6205 llvm_unreachable("Invalid constraint type");
6206}
6207
6208/// Examine constraint type and operand type and determine a weight value.
6209/// This object must already have been set up with the operand type
6210/// and the current alternative constraint selected.
6213 AsmOperandInfo &info, int maIndex) const {
6215 if (maIndex >= (int)info.multipleAlternatives.size())
6216 rCodes = &info.Codes;
6217 else
6218 rCodes = &info.multipleAlternatives[maIndex].Codes;
6219 ConstraintWeight BestWeight = CW_Invalid;
6220
6221 // Loop over the options, keeping track of the most general one.
6222 for (const std::string &rCode : *rCodes) {
6223 ConstraintWeight weight =
6224 getSingleConstraintMatchWeight(info, rCode.c_str());
6225 if (weight > BestWeight)
6226 BestWeight = weight;
6227 }
6228
6229 return BestWeight;
6230}
6231
6232/// Examine constraint type and operand type and determine a weight value.
6233/// This object must already have been set up with the operand type
6234/// and the current alternative constraint selected.
6237 AsmOperandInfo &info, const char *constraint) const {
6239 Value *CallOperandVal = info.CallOperandVal;
6240 // If we don't have a value, we can't do a match,
6241 // but allow it at the lowest weight.
6242 if (!CallOperandVal)
6243 return CW_Default;
6244 // Look at the constraint type.
6245 switch (*constraint) {
6246 case 'i': // immediate integer.
6247 case 'n': // immediate integer with a known value.
6248 if (isa<ConstantInt>(CallOperandVal))
6249 weight = CW_Constant;
6250 break;
6251 case 's': // non-explicit intregal immediate.
6252 if (isa<GlobalValue>(CallOperandVal))
6253 weight = CW_Constant;
6254 break;
6255 case 'E': // immediate float if host format.
6256 case 'F': // immediate float.
6257 if (isa<ConstantFP>(CallOperandVal))
6258 weight = CW_Constant;
6259 break;
6260 case '<': // memory operand with autodecrement.
6261 case '>': // memory operand with autoincrement.
6262 case 'm': // memory operand.
6263 case 'o': // offsettable memory operand
6264 case 'V': // non-offsettable memory operand
6265 weight = CW_Memory;
6266 break;
6267 case 'r': // general register.
6268 case 'g': // general register, memory operand or immediate integer.
6269 // note: Clang converts "g" to "imr".
6270 if (CallOperandVal->getType()->isIntegerTy())
6271 weight = CW_Register;
6272 break;
6273 case 'X': // any operand.
6274 default:
6275 weight = CW_Default;
6276 break;
6277 }
6278 return weight;
6279}
6280
6281/// If there are multiple different constraints that we could pick for this
6282/// operand (e.g. "imr") try to pick the 'best' one.
6283/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6284/// into seven classes:
6285/// Register -> one specific register
6286/// RegisterClass -> a group of regs
6287/// Memory -> memory
6288/// Address -> a symbolic memory reference
6289/// Immediate -> immediate values
6290/// Other -> magic values (such as "Flag Output Operands")
6291/// Unknown -> something we don't recognize yet and can't handle
6292/// Ideally, we would pick the most specific constraint possible: if we have
6293/// something that fits into a register, we would pick it. The problem here
6294/// is that if we have something that could either be in a register or in
6295/// memory that use of the register could cause selection of *other*
6296/// operands to fail: they might only succeed if we pick memory. Because of
6297/// this the heuristic we use is:
6298///
6299/// 1) If there is an 'other' constraint, and if the operand is valid for
6300/// that constraint, use it. This makes us take advantage of 'i'
6301/// constraints when available.
6302/// 2) Otherwise, pick the most general constraint present. This prefers
6303/// 'm' over 'r', for example.
6304///
6306 TargetLowering::AsmOperandInfo &OpInfo) const {
6307 ConstraintGroup Ret;
6308
6309 Ret.reserve(OpInfo.Codes.size());
6310 for (StringRef Code : OpInfo.Codes) {
6312
6313 // Indirect 'other' or 'immediate' constraints are not allowed.
6314 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6315 CType == TargetLowering::C_Register ||
6317 continue;
6318
6319 // Things with matching constraints can only be registers, per gcc
6320 // documentation. This mainly affects "g" constraints.
6321 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6322 continue;
6323
6324 Ret.emplace_back(Code, CType);
6325 }
6326
6328 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6329 });
6330
6331 return Ret;
6332}
6333
6334/// If we have an immediate, see if we can lower it. Return true if we can,
6335/// false otherwise.
6337 SDValue Op, SelectionDAG *DAG,
6338 const TargetLowering &TLI) {
6339
6340 assert((P.second == TargetLowering::C_Other ||
6341 P.second == TargetLowering::C_Immediate) &&
6342 "need immediate or other");
6343
6344 if (!Op.getNode())
6345 return false;
6346
6347 std::vector<SDValue> ResultOps;
6348 TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6349 return !ResultOps.empty();
6350}
6351
6352/// Determines the constraint code and constraint type to use for the specific
6353/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6355 SDValue Op,
6356 SelectionDAG *DAG) const {
6357 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6358
6359 // Single-letter constraints ('r') are very common.
6360 if (OpInfo.Codes.size() == 1) {
6361 OpInfo.ConstraintCode = OpInfo.Codes[0];
6362 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6363 } else {
6365 if (G.empty())
6366 return;
6367
6368 unsigned BestIdx = 0;
6369 for (const unsigned E = G.size();
6370 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6371 G[BestIdx].second == TargetLowering::C_Immediate);
6372 ++BestIdx) {
6373 if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6374 break;
6375 // If we're out of constraints, just pick the first one.
6376 if (BestIdx + 1 == E) {
6377 BestIdx = 0;
6378 break;
6379 }
6380 }
6381
6382 OpInfo.ConstraintCode = G[BestIdx].first;
6383 OpInfo.ConstraintType = G[BestIdx].second;
6384 }
6385
6386 // 'X' matches anything.
6387 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6388 // Constants are handled elsewhere. For Functions, the type here is the
6389 // type of the result, which is not what we want to look at; leave them
6390 // alone.
6391 Value *v = OpInfo.CallOperandVal;
6392 if (isa<ConstantInt>(v) || isa<Function>(v)) {
6393 return;
6394 }
6395
6396 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6397 OpInfo.ConstraintCode = "i";
6398 return;
6399 }
6400
6401 // Otherwise, try to resolve it to something we know about by looking at
6402 // the actual operand type.
6403 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6404 OpInfo.ConstraintCode = Repl;
6405 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6406 }
6407 }
6408}
6409
6410/// Given an exact SDIV by a constant, create a multiplication
6411/// with the multiplicative inverse of the constant.
6412/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6414 const SDLoc &dl, SelectionDAG &DAG,
6415 SmallVectorImpl<SDNode *> &Created) {
6416 SDValue Op0 = N->getOperand(0);
6417 SDValue Op1 = N->getOperand(1);
6418 EVT VT = N->getValueType(0);
6419 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6420 EVT ShSVT = ShVT.getScalarType();
6421
6422 bool UseSRA = false;
6423 SmallVector<SDValue, 16> Shifts, Factors;
6424
6425 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6426 if (C->isZero())
6427 return false;
6428
6429 EVT CT = C->getValueType(0);
6430 APInt Divisor = C->getAPIntValue();
6431 unsigned Shift = Divisor.countr_zero();
6432 if (Shift) {
6433 Divisor.ashrInPlace(Shift);
6434 UseSRA = true;
6435 }
6436 APInt Factor = Divisor.multiplicativeInverse();
6437 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6438 Factors.push_back(DAG.getConstant(Factor, dl, CT));
6439 return true;
6440 };
6441
6442 // Collect all magic values from the build vector.
6443 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6444 return SDValue();
6445
6446 SDValue Shift, Factor;
6447 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6448 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6449 Factor = DAG.getBuildVector(VT, dl, Factors);
6450 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6451 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6452 "Expected matchUnaryPredicate to return one element for scalable "
6453 "vectors");
6454 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6455 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6456 } else {
6457 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6458 Shift = Shifts[0];
6459 Factor = Factors[0];
6460 }
6461
6462 SDValue Res = Op0;
6463 if (UseSRA) {
6464 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, SDNodeFlags::Exact);
6465 Created.push_back(Res.getNode());
6466 }
6467
6468 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6469}
6470
6471/// Given an exact UDIV by a constant, create a multiplication
6472/// with the multiplicative inverse of the constant.
6473/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6475 const SDLoc &dl, SelectionDAG &DAG,
6476 SmallVectorImpl<SDNode *> &Created) {
6477 EVT VT = N->getValueType(0);
6478 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6479 EVT ShSVT = ShVT.getScalarType();
6480
6481 bool UseSRL = false;
6482 SmallVector<SDValue, 16> Shifts, Factors;
6483
6484 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6485 if (C->isZero())
6486 return false;
6487
6488 EVT CT = C->getValueType(0);
6489 APInt Divisor = C->getAPIntValue();
6490 unsigned Shift = Divisor.countr_zero();
6491 if (Shift) {
6492 Divisor.lshrInPlace(Shift);
6493 UseSRL = true;
6494 }
6495 // Calculate the multiplicative inverse modulo BW.
6496 APInt Factor = Divisor.multiplicativeInverse();
6497 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6498 Factors.push_back(DAG.getConstant(Factor, dl, CT));
6499 return true;
6500 };
6501
6502 SDValue Op1 = N->getOperand(1);
6503
6504 // Collect all magic values from the build vector.
6505 if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6506 return SDValue();
6507
6508 SDValue Shift, Factor;
6509 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6510 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6511 Factor = DAG.getBuildVector(VT, dl, Factors);
6512 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6513 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6514 "Expected matchUnaryPredicate to return one element for scalable "
6515 "vectors");
6516 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6517 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6518 } else {
6519 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6520 Shift = Shifts[0];
6521 Factor = Factors[0];
6522 }
6523
6524 SDValue Res = N->getOperand(0);
6525 if (UseSRL) {
6526 Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, SDNodeFlags::Exact);
6527 Created.push_back(Res.getNode());
6528 }
6529
6530 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6531}
6532
6534 SelectionDAG &DAG,
6535 SmallVectorImpl<SDNode *> &Created) const {
6536 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6537 if (isIntDivCheap(N->getValueType(0), Attr))
6538 return SDValue(N, 0); // Lower SDIV as SDIV
6539 return SDValue();
6540}
6541
6542SDValue
6544 SelectionDAG &DAG,
6545 SmallVectorImpl<SDNode *> &Created) const {
6546 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6547 if (isIntDivCheap(N->getValueType(0), Attr))
6548 return SDValue(N, 0); // Lower SREM as SREM
6549 return SDValue();
6550}
6551
6552/// Build sdiv by power-of-2 with conditional move instructions
6553/// Ref: "Hacker's Delight" by Henry Warren 10-1
6554/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6555/// bgez x, label
6556/// add x, x, 2**k-1
6557/// label:
6558/// sra res, x, k
6559/// neg res, res (when the divisor is negative)
6561 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6562 SmallVectorImpl<SDNode *> &Created) const {
6563 unsigned Lg2 = Divisor.countr_zero();
6564 EVT VT = N->getValueType(0);
6565
6566 SDLoc DL(N);
6567 SDValue N0 = N->getOperand(0);
6568 SDValue Zero = DAG.getConstant(0, DL, VT);
6569 APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6570 SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6571
6572 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6573 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6574 SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6575 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6576 SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6577
6578 Created.push_back(Cmp.getNode());
6579 Created.push_back(Add.getNode());
6580 Created.push_back(CMov.getNode());
6581
6582 // Divide by pow2.
6583 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, CMov,
6584 DAG.getShiftAmountConstant(Lg2, VT, DL));
6585
6586 // If we're dividing by a positive value, we're done. Otherwise, we must
6587 // negate the result.
6588 if (Divisor.isNonNegative())
6589 return SRA;
6590
6591 Created.push_back(SRA.getNode());
6592 return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6593}
6594
6595/// Given an ISD::SDIV node expressing a divide by constant,
6596/// return a DAG expression to select that will generate the same value by
6597/// multiplying by a magic number.
6598/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6600 bool IsAfterLegalization,
6601 bool IsAfterLegalTypes,
6602 SmallVectorImpl<SDNode *> &Created) const {
6603 SDLoc dl(N);
6604
6605 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6606 if (N->getFlags().hasExact())
6607 return BuildExactSDIV(*this, N, dl, DAG, Created);
6608
6609 EVT VT = N->getValueType(0);
6610 EVT SVT = VT.getScalarType();
6611 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6612 EVT ShSVT = ShVT.getScalarType();
6613 unsigned EltBits = VT.getScalarSizeInBits();
6614 EVT MulVT;
6615
6616 // Check to see if we can do this.
6617 // FIXME: We should be more aggressive here.
6618 EVT QueryVT = VT;
6619 if (VT.isVector()) {
6620 // If the vector type will be legalized to a vector type with the same
6621 // element type, allow the transform before type legalization if MULHS or
6622 // SMUL_LOHI are supported.
6623 QueryVT = getLegalTypeToTransformTo(*DAG.getContext(), VT);
6624 if (!QueryVT.isVector() ||
6626 return SDValue();
6627 } else if (!isTypeLegal(VT)) {
6628 // Limit this to simple scalars for now.
6629 if (!VT.isSimple())
6630 return SDValue();
6631
6632 // If this type will be promoted to a large enough type with a legal
6633 // multiply operation, we can go ahead and do this transform.
6635 return SDValue();
6636
6637 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6638 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6639 !isOperationLegal(ISD::MUL, MulVT))
6640 return SDValue();
6641 }
6642
6643 bool HasMULHS =
6644 isOperationLegalOrCustom(ISD::MULHS, QueryVT, IsAfterLegalization);
6645 bool HasSMUL_LOHI =
6646 isOperationLegalOrCustom(ISD::SMUL_LOHI, QueryVT, IsAfterLegalization);
6647
6648 if (isTypeLegal(VT) && !HasMULHS && !HasSMUL_LOHI && MulVT == EVT()) {
6649 // If type twice as wide legal, widen and use a mul plus a shift.
6650 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
6651 // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6652 // custom lowered. This is very expensive so avoid it at all costs for
6653 // constant divisors.
6654 if ((!IsAfterLegalTypes && isOperationExpand(ISD::SDIV, VT) &&
6657 MulVT = WideVT;
6658 }
6659
6660 if (!HasMULHS && !HasSMUL_LOHI && MulVT == EVT())
6661 return SDValue();
6662
6663 // If we're after type legalization and SVT is not legal, use the
6664 // promoted type for creating constants to avoid creating nodes with
6665 // illegal types.
6666 if (IsAfterLegalTypes && VT.isVector()) {
6667 SVT = getTypeToTransformTo(*DAG.getContext(), SVT);
6668 if (SVT.bitsLT(VT.getScalarType()))
6669 return SDValue();
6670 ShSVT = getTypeToTransformTo(*DAG.getContext(), ShSVT);
6671 if (ShSVT.bitsLT(ShVT.getScalarType()))
6672 return SDValue();
6673 }
6674 const unsigned SVTBits = SVT.getSizeInBits();
6675
6676 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6677
6678 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6679 if (C->isZero())
6680 return false;
6681 // Truncate the divisor to the target scalar type in case it was promoted
6682 // during type legalization.
6683 APInt Divisor = C->getAPIntValue().trunc(EltBits);
6685 int NumeratorFactor = 0;
6686 int ShiftMask = -1;
6687
6688 if (Divisor.isOne() || Divisor.isAllOnes()) {
6689 // If d is +1/-1, we just multiply the numerator by +1/-1.
6690 NumeratorFactor = Divisor.getSExtValue();
6691 magics.Magic = 0;
6692 magics.ShiftAmount = 0;
6693 ShiftMask = 0;
6694 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6695 // If d > 0 and m < 0, add the numerator.
6696 NumeratorFactor = 1;
6697 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6698 // If d < 0 and m > 0, subtract the numerator.
6699 NumeratorFactor = -1;
6700 }
6701
6702 MagicFactors.push_back(
6703 DAG.getConstant(magics.Magic.zext(SVTBits), dl, SVT));
6704 Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6705 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6706 ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6707 return true;
6708 };
6709
6710 SDValue N0 = N->getOperand(0);
6711 SDValue N1 = N->getOperand(1);
6712
6713 // Collect the shifts / magic values from each element.
6714 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern, /*AllowUndefs=*/false,
6715 /*AllowTruncation=*/true))
6716 return SDValue();
6717
6718 SDValue MagicFactor, Factor, Shift, ShiftMask;
6719 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6720 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6721 Factor = DAG.getBuildVector(VT, dl, Factors);
6722 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6723 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6724 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6725 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6726 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6727 "Expected matchUnaryPredicate to return one element for scalable "
6728 "vectors");
6729 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6730 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6731 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6732 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6733 } else {
6734 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6735 MagicFactor = MagicFactors[0];
6736 Factor = Factors[0];
6737 Shift = Shifts[0];
6738 ShiftMask = ShiftMasks[0];
6739 }
6740
6741 // Multiply the numerator (operand 0) by the magic value.
6742 auto GetMULHS = [&](SDValue X, SDValue Y) {
6743 if (HasMULHS)
6744 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6745 if (HasSMUL_LOHI) {
6746 SDValue LoHi =
6747 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6748 return LoHi.getValue(1);
6749 }
6750
6751 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6752 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6753 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6754 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6755 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6756 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6757 };
6758
6759 SDValue Q = GetMULHS(N0, MagicFactor);
6760 if (!Q)
6761 return SDValue();
6762
6763 Created.push_back(Q.getNode());
6764
6765 // (Optionally) Add/subtract the numerator using Factor.
6766 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6767 Created.push_back(Factor.getNode());
6768 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6769 Created.push_back(Q.getNode());
6770
6771 // Shift right algebraic by shift value.
6772 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6773 Created.push_back(Q.getNode());
6774
6775 // Extract the sign bit, mask it and add it to the quotient.
6776 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6777 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6778 Created.push_back(T.getNode());
6779 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6780 Created.push_back(T.getNode());
6781 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6782}
6783
6784/// Given an ISD::UDIV node expressing a divide by constant,
6785/// return a DAG expression to select that will generate the same value by
6786/// multiplying by a magic number.
6787/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6789 bool IsAfterLegalization,
6790 bool IsAfterLegalTypes,
6791 SmallVectorImpl<SDNode *> &Created) const {
6792 SDLoc dl(N);
6793
6794 // If the udiv has an 'exact' bit we can use a simpler lowering.
6795 if (N->getFlags().hasExact())
6796 return BuildExactUDIV(*this, N, dl, DAG, Created);
6797
6798 EVT VT = N->getValueType(0);
6799 EVT SVT = VT.getScalarType();
6800 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6801 EVT ShSVT = ShVT.getScalarType();
6802 unsigned EltBits = VT.getScalarSizeInBits();
6803 EVT MulVT;
6804
6805 // Check to see if we can do this.
6806 // FIXME: We should be more aggressive here.
6807 EVT QueryVT = VT;
6808 if (VT.isVector()) {
6809 // If the vector type will be legalized to a vector type with the same
6810 // element type, allow the transform before type legalization if MULHU or
6811 // UMUL_LOHI are supported.
6812 QueryVT = getLegalTypeToTransformTo(*DAG.getContext(), VT);
6813 if (!QueryVT.isVector() ||
6815 return SDValue();
6816 } else if (!isTypeLegal(VT)) {
6817 // Limit this to simple scalars for now.
6818 if (!VT.isSimple())
6819 return SDValue();
6820
6821 // If this type will be promoted to a large enough type with a legal
6822 // multiply operation, we can go ahead and do this transform.
6824 return SDValue();
6825
6826 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6827 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6828 !isOperationLegal(ISD::MUL, MulVT))
6829 return SDValue();
6830 }
6831
6832 bool HasMULHU =
6833 isOperationLegalOrCustom(ISD::MULHU, QueryVT, IsAfterLegalization);
6834 bool HasUMUL_LOHI =
6835 isOperationLegalOrCustom(ISD::UMUL_LOHI, QueryVT, IsAfterLegalization);
6836
6837 if (isTypeLegal(VT) && !HasMULHU && !HasUMUL_LOHI && MulVT == EVT()) {
6838 // If type twice as wide legal, widen and use a mul plus a shift.
6839 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
6840 // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6841 // custom lowered. This is very expensive so avoid it at all costs for
6842 // constant divisors.
6843 if ((!IsAfterLegalTypes && isOperationExpand(ISD::UDIV, VT) &&
6846 MulVT = WideVT;
6847 }
6848
6849 if (!HasMULHU && !HasUMUL_LOHI && MulVT == EVT())
6850 return SDValue();
6851
6852 SDValue N0 = N->getOperand(0);
6853 SDValue N1 = N->getOperand(1);
6854
6855 // Try to use leading zeros of the dividend to reduce the multiplier and
6856 // avoid expensive fixups.
6857 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6858
6859 // If we're after type legalization and SVT is not legal, use the
6860 // promoted type for creating constants to avoid creating nodes with
6861 // illegal types.
6862 if (IsAfterLegalTypes && VT.isVector()) {
6863 SVT = getTypeToTransformTo(*DAG.getContext(), SVT);
6864 if (SVT.bitsLT(VT.getScalarType()))
6865 return SDValue();
6866 ShSVT = getTypeToTransformTo(*DAG.getContext(), ShSVT);
6867 if (ShSVT.bitsLT(ShVT.getScalarType()))
6868 return SDValue();
6869 }
6870 const unsigned SVTBits = SVT.getSizeInBits();
6871
6872 // Allow i32 to be widened to i64 for uncooperative divisors if i64 MULHU or
6873 // UMUL_LOHI is supported.
6874 const EVT WideSVT = MVT::i64;
6875 const bool HasWideMULHU =
6876 VT == MVT::i32 &&
6877 isOperationLegalOrCustom(ISD::MULHU, WideSVT, IsAfterLegalization);
6878 const bool HasWideUMUL_LOHI =
6879 VT == MVT::i32 &&
6880 isOperationLegalOrCustom(ISD::UMUL_LOHI, WideSVT, IsAfterLegalization);
6881 const bool AllowWiden = (HasWideMULHU || HasWideUMUL_LOHI);
6882
6883 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6884 bool UseWiden = false;
6885 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6886
6887 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6888 if (C->isZero())
6889 return false;
6890 // Truncate the divisor to the target scalar type in case it was promoted
6891 // during type legalization.
6892 APInt Divisor = C->getAPIntValue().trunc(EltBits);
6893
6894 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6895
6896 // Magic algorithm doesn't work for division by 1. We need to emit a select
6897 // at the end.
6898 if (Divisor.isOne()) {
6899 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6900 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6901 } else {
6904 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()),
6905 /*AllowEvenDivisorOptimization=*/true,
6906 /*AllowWidenOptimization=*/AllowWiden);
6907
6908 if (magics.Widen) {
6909 UseWiden = true;
6910 MagicFactor = DAG.getConstant(magics.Magic, dl, WideSVT);
6911 } else {
6912 MagicFactor = DAG.getConstant(magics.Magic.zext(SVTBits), dl, SVT);
6913 }
6914
6915 assert(magics.PreShift < Divisor.getBitWidth() &&
6916 "We shouldn't generate an undefined shift!");
6917 assert(magics.PostShift < Divisor.getBitWidth() &&
6918 "We shouldn't generate an undefined shift!");
6919 assert((!magics.IsAdd || magics.PreShift == 0) &&
6920 "Unexpected pre-shift");
6921 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6922 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6923 NPQFactor = DAG.getConstant(
6924 magics.IsAdd ? APInt::getOneBitSet(SVTBits, EltBits - 1)
6925 : APInt::getZero(SVTBits),
6926 dl, SVT);
6927 UseNPQ |= magics.IsAdd;
6928 UsePreShift |= magics.PreShift != 0;
6929 UsePostShift |= magics.PostShift != 0;
6930 }
6931
6932 PreShifts.push_back(PreShift);
6933 MagicFactors.push_back(MagicFactor);
6934 NPQFactors.push_back(NPQFactor);
6935 PostShifts.push_back(PostShift);
6936 return true;
6937 };
6938
6939 // Collect the shifts/magic values from each element.
6940 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern, /*AllowUndefs=*/false,
6941 /*AllowTruncation=*/true))
6942 return SDValue();
6943
6944 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6945 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6946 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6947 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6948 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6949 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6950 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6951 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6952 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6953 "Expected matchUnaryPredicate to return one for scalable vectors");
6954 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6955 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6956 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6957 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6958 } else {
6959 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6960 PreShift = PreShifts[0];
6961 MagicFactor = MagicFactors[0];
6962 PostShift = PostShifts[0];
6963 }
6964
6965 if (UseWiden) {
6966 // Compute: (WideSVT(x) * MagicFactor) >> WideSVTBits.
6967 SDValue WideN0 = DAG.getNode(ISD::ZERO_EXTEND, dl, WideSVT, N0);
6968
6969 // Perform WideSVTxWideSVT -> 2*WideSVT multiplication and extract high
6970 // WideSVT bits
6971 SDValue High;
6972 if (HasWideMULHU) {
6973 High = DAG.getNode(ISD::MULHU, dl, WideSVT, WideN0, MagicFactor);
6974 } else {
6975 assert(HasWideUMUL_LOHI);
6976 SDValue LoHi =
6977 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(WideSVT, WideSVT),
6978 WideN0, MagicFactor);
6979 High = LoHi.getValue(1);
6980 }
6981
6982 Created.push_back(High.getNode());
6983 return DAG.getNode(ISD::TRUNCATE, dl, VT, High);
6984 }
6985
6986 SDValue Q = N0;
6987 if (UsePreShift) {
6988 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6989 Created.push_back(Q.getNode());
6990 }
6991
6992 auto GetMULHU = [&](SDValue X, SDValue Y) {
6993 if (HasMULHU)
6994 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6995 if (HasUMUL_LOHI) {
6996 SDValue LoHi =
6997 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6998 return LoHi.getValue(1);
6999 }
7000
7001 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
7002 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
7003 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
7004 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
7005 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
7006 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
7007 };
7008
7009 // Multiply the numerator (operand 0) by the magic value.
7010 Q = GetMULHU(Q, MagicFactor);
7011 if (!Q)
7012 return SDValue();
7013
7014 Created.push_back(Q.getNode());
7015
7016 if (UseNPQ) {
7017 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
7018 Created.push_back(NPQ.getNode());
7019
7020 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
7021 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
7022 if (VT.isVector())
7023 NPQ = GetMULHU(NPQ, NPQFactor);
7024 else
7025 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
7026
7027 Created.push_back(NPQ.getNode());
7028
7029 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
7030 Created.push_back(Q.getNode());
7031 }
7032
7033 if (UsePostShift) {
7034 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
7035 Created.push_back(Q.getNode());
7036 }
7037
7038 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7039
7040 SDValue One = DAG.getConstant(1, dl, VT);
7041 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
7042 return DAG.getSelect(dl, VT, IsOne, N0, Q);
7043}
7044
7045/// If all values in Values that *don't* match the predicate are same 'splat'
7046/// value, then replace all values with that splat value.
7047/// Else, if AlternativeReplacement was provided, then replace all values that
7048/// do match predicate with AlternativeReplacement value.
7049static void
7051 std::function<bool(SDValue)> Predicate,
7052 SDValue AlternativeReplacement = SDValue()) {
7053 SDValue Replacement;
7054 // Is there a value for which the Predicate does *NOT* match? What is it?
7055 auto SplatValue = llvm::find_if_not(Values, Predicate);
7056 if (SplatValue != Values.end()) {
7057 // Does Values consist only of SplatValue's and values matching Predicate?
7058 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
7059 return Value == *SplatValue || Predicate(Value);
7060 })) // Then we shall replace values matching predicate with SplatValue.
7061 Replacement = *SplatValue;
7062 }
7063 if (!Replacement) {
7064 // Oops, we did not find the "baseline" splat value.
7065 if (!AlternativeReplacement)
7066 return; // Nothing to do.
7067 // Let's replace with provided value then.
7068 Replacement = AlternativeReplacement;
7069 }
7070 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
7071}
7072
7073/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
7074/// where the divisor and comparison target are constants,
7075/// return a DAG expression that will generate the same comparison result
7076/// using only multiplications, additions and shifts/rotations.
7077/// Ref: "Hacker's Delight" 10-17.
7078SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
7079 SDValue CompTargetNode,
7081 DAGCombinerInfo &DCI,
7082 const SDLoc &DL) const {
7084 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7085 DCI, DL, Built)) {
7086 for (SDNode *N : Built)
7087 DCI.AddToWorklist(N);
7088 return Folded;
7089 }
7090
7091 return SDValue();
7092}
7093
7094SDValue
7095TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
7096 SDValue CompTargetNode, ISD::CondCode Cond,
7097 DAGCombinerInfo &DCI, const SDLoc &DL,
7098 SmallVectorImpl<SDNode *> &Created) const {
7099 // fold (seteq/ne (urem N, D), C) ->
7100 // (setule/ugt (rotr (mul (sub N, C), P), K), Q)
7101 // - D must be constant, with D = D0 * 2^K where D0 is odd
7102 // - P is the multiplicative inverse of D0 modulo 2^W
7103 // - Q = floor(((2^W) - 1) / D)
7104 // where W is the width of the common type of N and D.
7105 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7106 "Only applicable for (in)equality comparisons.");
7107
7108 SelectionDAG &DAG = DCI.DAG;
7109
7110 EVT VT = REMNode.getValueType();
7111 EVT SVT = VT.getScalarType();
7112 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7113 EVT ShSVT = ShVT.getScalarType();
7114
7115 // If MUL is unavailable, we cannot proceed in any case.
7116 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7117 return SDValue();
7118
7119 bool ComparingWithAllZeros = true;
7120 bool AllComparisonsWithNonZerosAreTautological = true;
7121 bool HadTautologicalLanes = false;
7122 bool AllLanesAreTautological = true;
7123 bool HadEvenDivisor = false;
7124 bool AllDivisorsArePowerOfTwo = true;
7125 bool HadTautologicalInvertedLanes = false;
7126 SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
7127
7128 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
7129 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7130 if (CDiv->isZero())
7131 return false;
7132
7133 const APInt &D = CDiv->getAPIntValue();
7134 const APInt &Cmp = CCmp->getAPIntValue();
7135
7136 ComparingWithAllZeros &= Cmp.isZero();
7137
7138 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7139 // if C2 is not less than C1, the comparison is always false.
7140 // But we will only be able to produce the comparison that will give the
7141 // opposive tautological answer. So this lane would need to be fixed up.
7142 bool TautologicalInvertedLane = D.ule(Cmp);
7143 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
7144
7145 // If all lanes are tautological (either all divisors are ones, or divisor
7146 // is not greater than the constant we are comparing with),
7147 // we will prefer to avoid the fold.
7148 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
7149 HadTautologicalLanes |= TautologicalLane;
7150 AllLanesAreTautological &= TautologicalLane;
7151
7152 // If we are comparing with non-zero, we need'll need to subtract said
7153 // comparison value from the LHS. But there is no point in doing that if
7154 // every lane where we are comparing with non-zero is tautological..
7155 if (!Cmp.isZero())
7156 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
7157
7158 // Decompose D into D0 * 2^K
7159 unsigned K = D.countr_zero();
7160 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7161 APInt D0 = D.lshr(K);
7162
7163 // D is even if it has trailing zeros.
7164 HadEvenDivisor |= (K != 0);
7165 // D is a power-of-two if D0 is one.
7166 // If all divisors are power-of-two, we will prefer to avoid the fold.
7167 AllDivisorsArePowerOfTwo &= D0.isOne();
7168
7169 // P = inv(D0, 2^W)
7170 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7171 unsigned W = D.getBitWidth();
7172 APInt P = D0.multiplicativeInverse();
7173 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7174
7175 // Q = floor((2^W - 1) u/ D)
7176 // R = ((2^W - 1) u% D)
7177 APInt Q, R;
7179
7180 // If we are comparing with zero, then that comparison constant is okay,
7181 // else it may need to be one less than that.
7182 if (Cmp.ugt(R))
7183 Q -= 1;
7184
7186 "We are expecting that K is always less than all-ones for ShSVT");
7187
7188 // If the lane is tautological the result can be constant-folded.
7189 if (TautologicalLane) {
7190 // Set P and K amount to a bogus values so we can try to splat them.
7191 P = 0;
7192 KAmts.push_back(DAG.getAllOnesConstant(DL, ShSVT));
7193 // And ensure that comparison constant is tautological,
7194 // it will always compare true/false.
7195 Q.setAllBits();
7196 } else {
7197 KAmts.push_back(DAG.getConstant(K, DL, ShSVT));
7198 }
7199
7200 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7201 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7202 return true;
7203 };
7204
7205 SDValue N = REMNode.getOperand(0);
7206 SDValue D = REMNode.getOperand(1);
7207
7208 // Collect the values from each element.
7209 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
7210 return SDValue();
7211
7212 // If all lanes are tautological, the result can be constant-folded.
7213 if (AllLanesAreTautological)
7214 return SDValue();
7215
7216 // If this is a urem by a powers-of-two, avoid the fold since it can be
7217 // best implemented as a bit test.
7218 if (AllDivisorsArePowerOfTwo)
7219 return SDValue();
7220
7221 SDValue PVal, KVal, QVal;
7222 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7223 if (HadTautologicalLanes) {
7224 // Try to turn PAmts into a splat, since we don't care about the values
7225 // that are currently '0'. If we can't, just keep '0'`s.
7227 // Try to turn KAmts into a splat, since we don't care about the values
7228 // that are currently '-1'. If we can't, change them to '0'`s.
7230 DAG.getConstant(0, DL, ShSVT));
7231 }
7232
7233 PVal = DAG.getBuildVector(VT, DL, PAmts);
7234 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7235 QVal = DAG.getBuildVector(VT, DL, QAmts);
7236 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7237 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
7238 "Expected matchBinaryPredicate to return one element for "
7239 "SPLAT_VECTORs");
7240 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7241 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7242 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7243 } else {
7244 PVal = PAmts[0];
7245 KVal = KAmts[0];
7246 QVal = QAmts[0];
7247 }
7248
7249 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
7250 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
7251 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
7252 assert(CompTargetNode.getValueType() == N.getValueType() &&
7253 "Expecting that the types on LHS and RHS of comparisons match.");
7254 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
7255 }
7256
7257 // (mul N, P)
7258 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7259 Created.push_back(Op0.getNode());
7260
7261 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7262 // divisors as a performance improvement, since rotating by 0 is a no-op.
7263 if (HadEvenDivisor) {
7264 // We need ROTR to do this.
7265 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7266 return SDValue();
7267 // UREM: (rotr (mul N, P), K)
7268 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7269 Created.push_back(Op0.getNode());
7270 }
7271
7272 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
7273 SDValue NewCC =
7274 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7276 if (!HadTautologicalInvertedLanes)
7277 return NewCC;
7278
7279 // If any lanes previously compared always-false, the NewCC will give
7280 // always-true result for them, so we need to fixup those lanes.
7281 // Or the other way around for inequality predicate.
7282 assert(VT.isVector() && "Can/should only get here for vectors.");
7283 Created.push_back(NewCC.getNode());
7284
7285 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7286 // if C2 is not less than C1, the comparison is always false.
7287 // But we have produced the comparison that will give the
7288 // opposive tautological answer. So these lanes would need to be fixed up.
7289 SDValue TautologicalInvertedChannels =
7290 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
7291 Created.push_back(TautologicalInvertedChannels.getNode());
7292
7293 // NOTE: we avoid letting illegal types through even if we're before legalize
7294 // ops – legalization has a hard time producing good code for this.
7295 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
7296 // If we have a vector select, let's replace the comparison results in the
7297 // affected lanes with the correct tautological result.
7298 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
7299 DL, SETCCVT, SETCCVT);
7300 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
7301 Replacement, NewCC);
7302 }
7303
7304 // Else, we can just invert the comparison result in the appropriate lanes.
7305 //
7306 // NOTE: see the note above VSELECT above.
7307 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
7308 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
7309 TautologicalInvertedChannels);
7310
7311 return SDValue(); // Don't know how to lower.
7312}
7313
7314/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
7315/// where the divisor is constant and the comparison target is zero,
7316/// return a DAG expression that will generate the same comparison result
7317/// using only multiplications, additions and shifts/rotations.
7318/// Ref: "Hacker's Delight" 10-17.
7319SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
7320 SDValue CompTargetNode,
7322 DAGCombinerInfo &DCI,
7323 const SDLoc &DL) const {
7325 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7326 DCI, DL, Built)) {
7327 assert(Built.size() <= 7 && "Max size prediction failed.");
7328 for (SDNode *N : Built)
7329 DCI.AddToWorklist(N);
7330 return Folded;
7331 }
7332
7333 return SDValue();
7334}
7335
7336SDValue
7337TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
7338 SDValue CompTargetNode, ISD::CondCode Cond,
7339 DAGCombinerInfo &DCI, const SDLoc &DL,
7340 SmallVectorImpl<SDNode *> &Created) const {
7341 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
7342 // Fold:
7343 // (seteq/ne (srem N, D), 0)
7344 // To:
7345 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
7346 //
7347 // - D must be constant, with D = D0 * 2^K where D0 is odd
7348 // - P is the multiplicative inverse of D0 modulo 2^W
7349 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7350 // - Q = floor((2 * A) / (2^K))
7351 // where W is the width of the common type of N and D.
7352 //
7353 // When D is a power of two (and thus D0 is 1), the normal
7354 // formula for A and Q don't apply, because the derivation
7355 // depends on D not dividing 2^(W-1), and thus theorem ZRS
7356 // does not apply. This specifically fails when N = INT_MIN.
7357 //
7358 // Instead, for power-of-two D, we use:
7359 // - A = 0
7360 // | -> No offset needed. We're effectively treating it the same as urem.
7361 // - Q = 2^(W-K) - 1
7362 // |-> Test that the top K bits are zero after rotation
7363 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7364 "Only applicable for (in)equality comparisons.");
7365
7366 SelectionDAG &DAG = DCI.DAG;
7367
7368 EVT VT = REMNode.getValueType();
7369 EVT SVT = VT.getScalarType();
7370 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7371 EVT ShSVT = ShVT.getScalarType();
7372
7373 // If we are after ops legalization, and MUL is unavailable, we can not
7374 // proceed.
7375 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7376 return SDValue();
7377
7378 // TODO: Could support comparing with non-zero too.
7379 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
7380 if (!CompTarget || !CompTarget->isZero())
7381 return SDValue();
7382
7383 bool HadOneDivisor = false;
7384 bool AllDivisorsAreOnes = true;
7385 bool HadEvenDivisor = false;
7386 bool AllDivisorsArePowerOfTwo = true;
7387 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7388
7389 auto BuildSREMPattern = [&](ConstantSDNode *C) {
7390 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7391 if (C->isZero())
7392 return false;
7393
7394 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7395
7396 // WARNING: this fold is only valid for positive divisors!
7397 // `rem %X, -C` is equivalent to `rem %X, C`
7398 APInt D = C->getAPIntValue().abs();
7399
7400 // If all divisors are ones, we will prefer to avoid the fold.
7401 HadOneDivisor |= D.isOne();
7402 AllDivisorsAreOnes &= D.isOne();
7403
7404 // Decompose D into D0 * 2^K
7405 unsigned K = D.countr_zero();
7406 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7407 APInt D0 = D.lshr(K);
7408
7409 // D is even if it has trailing zeros.
7410 HadEvenDivisor |= (K != 0);
7411
7412 // D is a power-of-two if D0 is one. This includes INT_MIN.
7413 // If all divisors are power-of-two, we will prefer to avoid the fold.
7414 AllDivisorsArePowerOfTwo &= D0.isOne();
7415
7416 // P = inv(D0, 2^W)
7417 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7418 unsigned W = D.getBitWidth();
7419 APInt P = D0.multiplicativeInverse();
7420 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7421
7422 // A = floor((2^(W - 1) - 1) / D0) & -2^K
7423 APInt A = APInt::getSignedMaxValue(W).udiv(D0);
7424 A.clearLowBits(K);
7425
7426 // Q = floor((2 * A) / (2^K))
7427 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7428
7430 "We are expecting that A is always less than all-ones for SVT");
7432 "We are expecting that K is always less than all-ones for ShSVT");
7433
7434 // If D was a power of two, apply the alternate constant derivation.
7435 if (D0.isOne()) {
7436 // A = 0
7437 A = APInt(W, 0);
7438 // - Q = 2^(W-K) - 1
7439 Q = APInt::getLowBitsSet(W, W - K);
7440 }
7441
7442 // If the divisor is 1 the result can be constant-folded.
7443 if (D.isOne()) {
7444 // Set P, A and K to a bogus values so we can try to splat them.
7445 P = 0;
7446 A.setAllBits();
7447 KAmts.push_back(DAG.getAllOnesConstant(DL, ShSVT));
7448
7449 // x ?% 1 == 0 <--> true <--> x u<= -1
7450 Q.setAllBits();
7451 } else {
7452 KAmts.push_back(DAG.getConstant(K, DL, ShSVT));
7453 }
7454
7455 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7456 AAmts.push_back(DAG.getConstant(A, DL, SVT));
7457 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7458 return true;
7459 };
7460
7461 SDValue N = REMNode.getOperand(0);
7462 SDValue D = REMNode.getOperand(1);
7463
7464 // Collect the values from each element.
7465 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7466 return SDValue();
7467
7468 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7469 if (AllDivisorsAreOnes)
7470 return SDValue();
7471
7472 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7473 // since it can be best implemented as a bit test.
7474 if (AllDivisorsArePowerOfTwo)
7475 return SDValue();
7476
7477 SDValue PVal, AVal, KVal, QVal;
7478 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7479 if (HadOneDivisor) {
7480 // Try to turn PAmts into a splat, since we don't care about the values
7481 // that are currently '0'. If we can't, just keep '0'`s.
7483 // Try to turn AAmts into a splat, since we don't care about the
7484 // values that are currently '-1'. If we can't, change them to '0'`s.
7486 DAG.getConstant(0, DL, SVT));
7487 // Try to turn KAmts into a splat, since we don't care about the values
7488 // that are currently '-1'. If we can't, change them to '0'`s.
7490 DAG.getConstant(0, DL, ShSVT));
7491 }
7492
7493 PVal = DAG.getBuildVector(VT, DL, PAmts);
7494 AVal = DAG.getBuildVector(VT, DL, AAmts);
7495 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7496 QVal = DAG.getBuildVector(VT, DL, QAmts);
7497 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7498 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7499 QAmts.size() == 1 &&
7500 "Expected matchUnaryPredicate to return one element for scalable "
7501 "vectors");
7502 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7503 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7504 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7505 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7506 } else {
7507 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7508 PVal = PAmts[0];
7509 AVal = AAmts[0];
7510 KVal = KAmts[0];
7511 QVal = QAmts[0];
7512 }
7513
7514 // (mul N, P)
7515 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7516 Created.push_back(Op0.getNode());
7517
7518 // We need ADD to do this.
7519 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7520 return SDValue();
7521
7522 // (add (mul N, P), A)
7523 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7524 Created.push_back(Op0.getNode());
7525
7526 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7527 // divisors as a performance improvement, since rotating by 0 is a no-op.
7528 if (HadEvenDivisor) {
7529 // We need ROTR to do this.
7530 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7531 return SDValue();
7532 // SREM: (rotr (add (mul N, P), A), K)
7533 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7534 Created.push_back(Op0.getNode());
7535 }
7536
7537 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7538 return DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7540}
7541
7543 const DenormalMode &Mode,
7544 SDNodeFlags Flags) const {
7545 SDLoc DL(Op);
7546 EVT VT = Op.getValueType();
7547 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7548 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7549
7550 // This is specifically a check for the handling of denormal inputs, not the
7551 // result.
7552 if (Mode.Input == DenormalMode::PreserveSign ||
7553 Mode.Input == DenormalMode::PositiveZero) {
7554 // Test = X == 0.0
7555 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ, /*Chain=*/{},
7556 /*Signaling=*/false, Flags);
7557 }
7558
7559 // Testing it with denormal inputs to avoid wrong estimate.
7560 //
7561 // Test = fabs(X) < SmallestNormal
7562 const fltSemantics &FltSem = VT.getFltSemantics();
7563 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7564 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7565 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op, Flags);
7566 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT, /*Chain=*/{},
7567 /*Signaling=*/false, Flags);
7568}
7569
7571 bool LegalOps, bool OptForSize,
7573 unsigned Depth) const {
7574 // fneg is removable even if it has multiple uses.
7575 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7577 return Op.getOperand(0);
7578 }
7579
7580 // Don't recurse exponentially.
7582 return SDValue();
7583
7584 // Pre-increment recursion depth for use in recursive calls.
7585 ++Depth;
7586 const SDNodeFlags Flags = Op->getFlags();
7587 EVT VT = Op.getValueType();
7588 unsigned Opcode = Op.getOpcode();
7589
7590 // Don't allow anything with multiple uses unless we know it is free.
7591 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7592 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7593 isFPExtFree(VT, Op.getOperand(0).getValueType());
7594 if (!IsFreeExtend)
7595 return SDValue();
7596 }
7597
7598 auto RemoveDeadNode = [&](SDValue N) {
7599 if (N && N.getNode()->use_empty())
7600 DAG.RemoveDeadNode(N.getNode());
7601 };
7602
7603 SDLoc DL(Op);
7604
7605 // Because getNegatedExpression can delete nodes we need a handle to keep
7606 // temporary nodes alive in case the recursion manages to create an identical
7607 // node.
7608 std::list<HandleSDNode> Handles;
7609
7610 switch (Opcode) {
7611 case ISD::ConstantFP: {
7612 // Don't invert constant FP values after legalization unless the target says
7613 // the negated constant is legal.
7614 bool IsOpLegal =
7616 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7617 OptForSize);
7618
7619 if (LegalOps && !IsOpLegal)
7620 break;
7621
7622 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7623 V.changeSign();
7624 SDValue CFP = DAG.getConstantFP(V, DL, VT);
7625
7626 // If we already have the use of the negated floating constant, it is free
7627 // to negate it even it has multiple uses.
7628 if (!Op.hasOneUse() && CFP.use_empty())
7629 break;
7631 return CFP;
7632 }
7633 case ISD::SPLAT_VECTOR: {
7634 // fold splat_vector(fneg(X)) -> splat_vector(-X)
7635 SDValue X = Op.getOperand(0);
7637 break;
7638
7639 SDValue NegX = getCheaperNegatedExpression(X, DAG, LegalOps, OptForSize);
7640 if (!NegX)
7641 break;
7643 return DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, NegX);
7644 }
7645 case ISD::BUILD_VECTOR: {
7646 // Only permit BUILD_VECTOR of constants.
7647 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7648 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7649 }))
7650 break;
7651
7652 bool IsOpLegal =
7655 llvm::all_of(Op->op_values(), [&](SDValue N) {
7656 return N.isUndef() ||
7657 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7658 OptForSize);
7659 });
7660
7661 if (LegalOps && !IsOpLegal)
7662 break;
7663
7665 for (SDValue C : Op->op_values()) {
7666 if (C.isUndef()) {
7667 Ops.push_back(C);
7668 continue;
7669 }
7670 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7671 V.changeSign();
7672 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7673 }
7675 return DAG.getBuildVector(VT, DL, Ops);
7676 }
7677 case ISD::FADD: {
7678 if (!Flags.hasNoSignedZeros())
7679 break;
7680
7681 // After operation legalization, it might not be legal to create new FSUBs.
7682 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7683 break;
7684 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7685
7686 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7688 SDValue NegX =
7689 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7690 // Prevent this node from being deleted by the next call.
7691 if (NegX)
7692 Handles.emplace_back(NegX);
7693
7694 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7696 SDValue NegY =
7697 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7698
7699 // We're done with the handles.
7700 Handles.clear();
7701
7702 // Negate the X if its cost is less or equal than Y.
7703 if (NegX && (CostX <= CostY)) {
7704 Cost = CostX;
7705 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7706 if (NegY != N)
7707 RemoveDeadNode(NegY);
7708 return N;
7709 }
7710
7711 // Negate the Y if it is not expensive.
7712 if (NegY) {
7713 Cost = CostY;
7714 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7715 if (NegX != N)
7716 RemoveDeadNode(NegX);
7717 return N;
7718 }
7719 break;
7720 }
7721 case ISD::FSUB: {
7722 // We can't turn -(A-B) into B-A when we honor signed zeros.
7723 if (!Flags.hasNoSignedZeros())
7724 break;
7725
7726 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7727 // fold (fneg (fsub 0, Y)) -> Y
7728 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7729 if (C->isZero()) {
7731 return Y;
7732 }
7733
7734 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7736 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7737 }
7738 case ISD::FMUL:
7739 case ISD::FDIV: {
7740 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7741
7742 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7744 SDValue NegX =
7745 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7746 // Prevent this node from being deleted by the next call.
7747 if (NegX)
7748 Handles.emplace_back(NegX);
7749
7750 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7752 SDValue NegY =
7753 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7754
7755 // We're done with the handles.
7756 Handles.clear();
7757
7758 // Negate the X if its cost is less or equal than Y.
7759 if (NegX && (CostX <= CostY)) {
7760 Cost = CostX;
7761 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7762 if (NegY != N)
7763 RemoveDeadNode(NegY);
7764 return N;
7765 }
7766
7767 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7768 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7769 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7770 break;
7771
7772 // Negate the Y if it is not expensive.
7773 if (NegY) {
7774 Cost = CostY;
7775 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7776 if (NegX != N)
7777 RemoveDeadNode(NegX);
7778 return N;
7779 }
7780 break;
7781 }
7782 case ISD::FMA:
7783 case ISD::FMULADD:
7784 case ISD::FMAD: {
7785 if (!Flags.hasNoSignedZeros())
7786 break;
7787
7788 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7790 SDValue NegZ =
7791 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7792 // Give up if fail to negate the Z.
7793 if (!NegZ)
7794 break;
7795
7796 // Prevent this node from being deleted by the next two calls.
7797 Handles.emplace_back(NegZ);
7798
7799 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7801 SDValue NegX =
7802 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7803 // Prevent this node from being deleted by the next call.
7804 if (NegX)
7805 Handles.emplace_back(NegX);
7806
7807 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7809 SDValue NegY =
7810 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7811
7812 // We're done with the handles.
7813 Handles.clear();
7814
7815 // Negate the X if its cost is less or equal than Y.
7816 if (NegX && (CostX <= CostY)) {
7817 Cost = std::min(CostX, CostZ);
7818 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7819 if (NegY != N)
7820 RemoveDeadNode(NegY);
7821 return N;
7822 }
7823
7824 // Negate the Y if it is not expensive.
7825 if (NegY) {
7826 Cost = std::min(CostY, CostZ);
7827 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7828 if (NegX != N)
7829 RemoveDeadNode(NegX);
7830 return N;
7831 }
7832 break;
7833 }
7834
7835 case ISD::FP_EXTEND:
7836 case ISD::FSIN:
7837 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7838 OptForSize, Cost, Depth))
7839 return DAG.getNode(Opcode, DL, VT, NegV);
7840 break;
7841 case ISD::FP_ROUND:
7842 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7843 OptForSize, Cost, Depth))
7844 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7845 break;
7846 case ISD::SELECT:
7847 case ISD::VSELECT: {
7848 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7849 // iff at least one cost is cheaper and the other is neutral/cheaper
7850 SDValue LHS = Op.getOperand(1);
7852 SDValue NegLHS =
7853 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7854 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7855 RemoveDeadNode(NegLHS);
7856 break;
7857 }
7858
7859 // Prevent this node from being deleted by the next call.
7860 Handles.emplace_back(NegLHS);
7861
7862 SDValue RHS = Op.getOperand(2);
7864 SDValue NegRHS =
7865 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7866
7867 // We're done with the handles.
7868 Handles.clear();
7869
7870 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7871 (CostLHS != NegatibleCost::Cheaper &&
7872 CostRHS != NegatibleCost::Cheaper)) {
7873 RemoveDeadNode(NegLHS);
7874 RemoveDeadNode(NegRHS);
7875 break;
7876 }
7877
7878 Cost = std::min(CostLHS, CostRHS);
7879 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7880 }
7881 }
7882
7883 return SDValue();
7884}
7885
7886//===----------------------------------------------------------------------===//
7887// Legalization Utilities
7888//===----------------------------------------------------------------------===//
7889
7890bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7891 SDValue LHS, SDValue RHS,
7893 EVT HiLoVT, SelectionDAG &DAG,
7894 MulExpansionKind Kind, SDValue LL,
7895 SDValue LH, SDValue RL, SDValue RH) const {
7896 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7897 Opcode == ISD::SMUL_LOHI);
7898
7899 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7901 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7903 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7905 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7907
7908 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7909 return false;
7910
7911 unsigned OuterBitSize = VT.getScalarSizeInBits();
7912 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7913
7914 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7915 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7916 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7917
7918 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7919 bool Signed) -> bool {
7920 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7921 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7922 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7923 Hi = Lo.getValue(1);
7924 return true;
7925 }
7926 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7927 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7928 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7929 return true;
7930 }
7931 return false;
7932 };
7933
7934 SDValue Lo, Hi;
7935
7936 if (!LL.getNode() && !RL.getNode() &&
7938 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7939 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7940 }
7941
7942 if (!LL.getNode())
7943 return false;
7944
7945 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7946 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7947 DAG.MaskedValueIsZero(RHS, HighMask)) {
7948 // The inputs are both zero-extended.
7949 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7950 Result.push_back(Lo);
7951 Result.push_back(Hi);
7952 if (Opcode != ISD::MUL) {
7953 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7954 Result.push_back(Zero);
7955 Result.push_back(Zero);
7956 }
7957 return true;
7958 }
7959 }
7960
7961 if (!VT.isVector() && Opcode == ISD::MUL &&
7962 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7963 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7964 // The input values are both sign-extended.
7965 // TODO non-MUL case?
7966 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7967 Result.push_back(Lo);
7968 Result.push_back(Hi);
7969 return true;
7970 }
7971 }
7972
7973 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7974 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7975
7976 if (!LH.getNode() && !RH.getNode() &&
7979 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7980 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7981 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7982 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7983 }
7984
7985 if (!LH.getNode())
7986 return false;
7987
7988 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7989 return false;
7990
7991 Result.push_back(Lo);
7992
7993 if (Opcode == ISD::MUL) {
7994 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7995 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7996 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7997 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7998 Result.push_back(Hi);
7999 return true;
8000 }
8001
8002 // Compute the full width result.
8003 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
8004 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
8005 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
8006 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
8007 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
8008 };
8009
8010 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
8011 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
8012 return false;
8013
8014 // This is effectively the add part of a multiply-add of half-sized operands,
8015 // so it cannot overflow.
8016 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
8017
8018 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
8019 return false;
8020
8021 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
8022 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8023
8024 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
8026 if (UseGlue)
8027 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
8028 Merge(Lo, Hi));
8029 else
8030 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
8031 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
8032
8033 SDValue Carry = Next.getValue(1);
8034 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8035 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
8036
8037 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
8038 return false;
8039
8040 if (UseGlue)
8041 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
8042 Carry);
8043 else
8044 Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
8045 Zero, Carry);
8046
8047 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
8048
8049 if (Opcode == ISD::SMUL_LOHI) {
8050 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
8051 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
8052 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
8053
8054 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
8055 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
8056 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
8057 }
8058
8059 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8060 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
8061 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8062 return true;
8063}
8064
8066 SelectionDAG &DAG, MulExpansionKind Kind,
8067 SDValue LL, SDValue LH, SDValue RL,
8068 SDValue RH) const {
8070 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
8071 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
8072 DAG, Kind, LL, LH, RL, RH);
8073 if (Ok) {
8074 assert(Result.size() == 2);
8075 Lo = Result[0];
8076 Hi = Result[1];
8077 }
8078 return Ok;
8079}
8080
8081// Optimize unsigned division or remainder by constants for types twice as large
8082// as a legal VT.
8083//
8084// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
8085// can be computed
8086// as:
8087// Sum = __builtin_uadd_overflow(Lo, High, &Sum);
8088// Remainder = Sum % Constant;
8089//
8090// If (1 << (BitWidth / 2)) % Constant != 1, we can search for a smaller value
8091// W such that W != (BitWidth / 2) and (1 << W) % Constant == 1. We can break
8092// High:Low into 3 chunks of W bits and compute remainder as
8093// Sum = Chunk0 + Chunk1 + Chunk2;
8094// Remainder = Sum % Constant;
8095//
8096// This is based on "Remainder by Summing Digits" from Hacker's Delight.
8097//
8098// For division, we can compute the remainder using the algorithm described
8099// above, subtract it from the dividend to get an exact multiple of Constant.
8100// Then multiply that exact multiply by the multiplicative inverse modulo
8101// (1 << (BitWidth / 2)) to get the quotient.
8102
8103// If Constant is even, we can shift right the dividend and the divisor by the
8104// number of trailing zeros in Constant before applying the remainder algorithm.
8105// If we're after the quotient, we can subtract this value from the shifted
8106// dividend and multiply by the multiplicative inverse of the shifted divisor.
8107// If we want the remainder, we shift the value left by the number of trailing
8108// zeros and add the bits that were shifted out of the dividend.
8111 EVT HiLoVT, SelectionDAG &DAG,
8112 SDValue LL, SDValue LH) const {
8113 unsigned Opcode = N->getOpcode();
8114 EVT VT = N->getValueType(0);
8115
8116 // TODO: Support signed division/remainder.
8117 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
8118 return false;
8119 assert(
8120 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
8121 "Unexpected opcode");
8122
8123 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
8124 if (!CN)
8125 return false;
8126
8127 APInt Divisor = CN->getAPIntValue();
8128 unsigned BitWidth = Divisor.getBitWidth();
8129 unsigned HBitWidth = BitWidth / 2;
8131 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
8132
8133 // We depend on the UREM by constant optimization in DAGCombiner that requires
8134 // high multiply.
8135 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
8137 return false;
8138
8139 // Don't expand if optimizing for size.
8140 if (DAG.shouldOptForSize())
8141 return false;
8142
8143 // Early out for 0 or 1 divisors.
8144 if (Divisor.ule(1))
8145 return false;
8146
8147 // If the divisor is even, shift it until it becomes odd.
8148 unsigned TrailingZeros = 0;
8149 if (!Divisor[0]) {
8150 TrailingZeros = Divisor.countr_zero();
8151 Divisor.lshrInPlace(TrailingZeros);
8152 }
8153
8154 // After removing trailing zeros, the divisor needs to be less than
8155 // (1 << HBitWidth).
8156 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
8157 if (Divisor.uge(HalfMaxPlus1))
8158 return false;
8159
8160 // Look for the largest chunk width W such that (1 << W) % Divisor == 1 or
8161 // (1 << W) % Divisor == -1.
8162 unsigned BestChunkWidth = 0, AltChunkWidth = 0;
8163 for (unsigned I = HBitWidth, E = HBitWidth / 2; I > E; --I) {
8164 // Skip HBitWidth-1, it doesn't have enough bits for carries.
8165 if (I == HBitWidth - 1)
8166 continue;
8167
8168 APInt Mod = APInt::getOneBitSet(Divisor.getBitWidth(), I).urem(Divisor);
8169
8170 if (Mod.isOne()) {
8171 BestChunkWidth = I;
8172 break;
8173 }
8174
8175 // We have an alternate strategy for Remainder == Divisor - 1.
8176 // FIXME: Support HBitWidth.
8177 if (I != HBitWidth && Mod == Divisor - 1)
8178 AltChunkWidth = I;
8179 }
8180
8181 bool Alternate = false;
8182 if (!BestChunkWidth) {
8183 if (!AltChunkWidth)
8184 return false;
8185 Alternate = true;
8186 BestChunkWidth = AltChunkWidth;
8187 }
8188
8189 SDLoc dl(N);
8190
8191 assert(!LL == !LH && "Expected both input halves or no input halves!");
8192 if (!LL)
8193 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
8194
8195 bool HasFSHR = isOperationLegal(ISD::FSHR, HiLoVT);
8196
8197 auto GetFSHR = [&](SDValue Lo, SDValue Hi, unsigned ShiftAmt) {
8198 assert(ShiftAmt > 0 && ShiftAmt < HBitWidth);
8199 if (HasFSHR)
8200 return DAG.getNode(ISD::FSHR, dl, HiLoVT, Hi, Lo,
8201 DAG.getShiftAmountConstant(ShiftAmt, HiLoVT, dl));
8202 return DAG.getNode(
8203 ISD::OR, dl, HiLoVT,
8204 DAG.getNode(ISD::SRL, dl, HiLoVT, Lo,
8205 DAG.getShiftAmountConstant(ShiftAmt, HiLoVT, dl)),
8206 DAG.getNode(
8207 ISD::SHL, dl, HiLoVT, Hi,
8208 DAG.getShiftAmountConstant(HBitWidth - ShiftAmt, HiLoVT, dl)));
8209 };
8210
8211 // Helper to perform a right shift on a 128-bit value split into two halves.
8212 // Handles shifts >= HBitWidth by moving Hi to Lo and shifting Hi.
8213 auto ShiftRight = [&](SDValue &Lo, SDValue &Hi, unsigned ShiftAmt) {
8214 if (ShiftAmt == 0)
8215 return;
8216 if (ShiftAmt < HBitWidth) {
8217 Lo = GetFSHR(Lo, Hi, ShiftAmt);
8218 Hi = DAG.getNode(ISD::SRL, dl, HiLoVT, Hi,
8219 DAG.getShiftAmountConstant(ShiftAmt, HiLoVT, dl));
8220 } else if (ShiftAmt == HBitWidth) {
8221 Lo = Hi;
8222 Hi = DAG.getConstant(0, dl, HiLoVT);
8223 } else {
8224 Lo = DAG.getNode(
8225 ISD::SRL, dl, HiLoVT, Hi,
8226 DAG.getShiftAmountConstant(ShiftAmt - HBitWidth, HiLoVT, dl));
8227 Hi = DAG.getConstant(0, dl, HiLoVT);
8228 }
8229 };
8230
8231 // Shift the input by the number of TrailingZeros in the divisor. The
8232 // shifted out bits will be added to the remainder later.
8233 SDValue PartialRemL, PartialRemH;
8234 if (TrailingZeros && Opcode != ISD::UDIV) {
8235 // Save the shifted off bits if we need the remainder.
8236 if (TrailingZeros < HBitWidth) {
8237 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
8238 PartialRemL = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
8239 DAG.getConstant(Mask, dl, HiLoVT));
8240 } else if (TrailingZeros == HBitWidth) {
8241 // All of LL is part of the remainder.
8242 PartialRemL = LL;
8243 } else {
8244 // TrailingZeros > HBitWidth: LL and part of LH are the remainder.
8245 PartialRemL = LL;
8246 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros - HBitWidth);
8247 PartialRemH = DAG.getNode(ISD::AND, dl, HiLoVT, LH,
8248 DAG.getConstant(Mask, dl, HiLoVT));
8249 }
8250 }
8251
8252 SDValue Sum;
8253 // If BestChunkWidth is HBitWidth add low and high half. If there is a carry
8254 // out, add that to the final sum.
8255 if (BestChunkWidth == HBitWidth) {
8256 assert(!Alternate);
8257 // Shift LH:LL right if there were trailing zeros in the divisor.
8258 ShiftRight(LL, LH, TrailingZeros);
8259
8260 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
8261 EVT SetCCType =
8262 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
8264 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
8265 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
8266 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
8267 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
8268 } else {
8269 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
8270 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
8271 // If the boolean for the target is 0 or 1, we can add the setcc result
8272 // directly.
8273 if (getBooleanContents(HiLoVT) ==
8275 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
8276 else
8277 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
8278 DAG.getConstant(0, dl, HiLoVT));
8279 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
8280 }
8281 } else {
8282 // Otherwise split into multple chunks and add them together. We chose
8283 // BestChunkWidth so that the sum will not overflow.
8284 SDValue Mask = DAG.getConstant(
8285 APInt::getLowBitsSet(HBitWidth, BestChunkWidth), dl, HiLoVT);
8286
8287 for (unsigned I = 0; I < BitWidth - TrailingZeros; I += BestChunkWidth) {
8288 // If there were trailing zeros in the divisor, increase the shift amount.
8289 unsigned Shift = I + TrailingZeros;
8290 SDValue Chunk;
8291 if (Shift == 0)
8292 Chunk = LL;
8293 else if (Shift >= HBitWidth)
8294 Chunk = DAG.getNode(
8295 ISD::SRL, dl, HiLoVT, LH,
8296 DAG.getShiftAmountConstant(Shift - HBitWidth, HiLoVT, dl));
8297 else
8298 Chunk = GetFSHR(LL, LH, Shift);
8299 // If we're on the last chunk, we don't need an AND.
8300 if (I + BestChunkWidth < BitWidth - TrailingZeros)
8301 Chunk = DAG.getNode(ISD::AND, dl, HiLoVT, Chunk, Mask);
8302 if (!Sum) {
8303 Sum = Chunk;
8304 } else {
8305 // For Alternate, we need to subtract odd chunks.
8306 unsigned ChunkNum = I / BestChunkWidth;
8307 unsigned Opc = (Alternate && (ChunkNum % 2) != 0) ? ISD::SUB : ISD::ADD;
8308 Sum = DAG.getNode(Opc, dl, HiLoVT, Sum, Chunk);
8309 }
8310 }
8311
8312 // For Alternate, the sum may be negative, but we need a positive sum. We
8313 // can increase it by a multiple of the divisor to make it positive. For 3
8314 // chunks the largest negative value is -(2^BestChunkWidth - 1). For 4
8315 // chunks, it's 2*-(2^BestChunkWidth - 1). We know that 2^BestChunkWidth + 1
8316 // is a multiple of the divisor. Add that 1 or 2 times to make the sum
8317 // positive.
8318 if (Alternate) {
8319 unsigned NumChunks = divideCeil(BitWidth - TrailingZeros, BestChunkWidth);
8320 assert(NumChunks <= 4);
8321
8322 APInt Adjust = APInt::getOneBitSet(HBitWidth, BestChunkWidth);
8323 Adjust.setBit(0);
8324 // If there are 4 chunks, we need to adjust twice.
8325 if (NumChunks == 4)
8326 Adjust <<= 1;
8327 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum,
8328 DAG.getConstant(Adjust, dl, HiLoVT));
8329 }
8330 }
8331
8332 // Perform a HiLoVT urem on the Sum using truncated divisor.
8333 SDValue RemL =
8334 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
8335 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
8336 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
8337
8338 if (Opcode != ISD::UREM) {
8339 // If we didn't shift LH/LR earlier, do it now.
8340 if (BestChunkWidth != HBitWidth)
8341 ShiftRight(LL, LH, TrailingZeros);
8342
8343 // Subtract the remainder from the shifted dividend.
8344 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
8345 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
8346
8347 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
8348
8349 // Multiply by the multiplicative inverse of the divisor modulo
8350 // (1 << BitWidth).
8351 APInt MulFactor = Divisor.multiplicativeInverse();
8352
8353 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
8354 DAG.getConstant(MulFactor, dl, VT));
8355
8356 // Split the quotient into low and high parts.
8357 SDValue QuotL, QuotH;
8358 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
8359 Result.push_back(QuotL);
8360 Result.push_back(QuotH);
8361 }
8362
8363 if (Opcode != ISD::UDIV) {
8364 // If we shifted the input, shift the remainder left and add the bits we
8365 // shifted off the input.
8366 if (TrailingZeros) {
8367 if (TrailingZeros < HBitWidth) {
8368 // Shift RemH:RemL left by TrailingZeros.
8369 // RemH gets the high bits shifted out of RemL.
8370 RemH = DAG.getNode(
8371 ISD::SRL, dl, HiLoVT, RemL,
8372 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros, HiLoVT, dl));
8373 RemL =
8374 DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
8375 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8376 // OR in the partial remainder.
8377 RemL = DAG.getNode(ISD::OR, dl, HiLoVT, RemL, PartialRemL,
8379 } else if (TrailingZeros == HBitWidth) {
8380 // Shift left by exactly HBitWidth: RemH becomes RemL, RemL becomes
8381 // PartialRemL.
8382 RemH = RemL;
8383 RemL = PartialRemL;
8384 } else {
8385 // Shift left by more than HBitWidth.
8386 RemH = DAG.getNode(
8387 ISD::SHL, dl, HiLoVT, RemL,
8388 DAG.getShiftAmountConstant(TrailingZeros - HBitWidth, HiLoVT, dl));
8389 RemH = DAG.getNode(ISD::OR, dl, HiLoVT, RemH, PartialRemH,
8391 RemL = PartialRemL;
8392 }
8393 }
8394 Result.push_back(RemL);
8395 Result.push_back(RemH);
8396 }
8397
8398 return true;
8399}
8400
8401// Check that (every element of) Z is undef or not an exact multiple of BW.
8402static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
8404 Z,
8405 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
8406 /*AllowUndefs=*/true, /*AllowTruncation=*/true);
8407}
8408
8410 EVT VT = Node->getValueType(0);
8411 SDValue ShX, ShY;
8412 SDValue ShAmt, InvShAmt;
8413 SDValue X = Node->getOperand(0);
8414 SDValue Y = Node->getOperand(1);
8415 SDValue Z = Node->getOperand(2);
8416 SDValue Mask = Node->getOperand(3);
8417 SDValue VL = Node->getOperand(4);
8418
8419 unsigned BW = VT.getScalarSizeInBits();
8420 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
8421 SDLoc DL(SDValue(Node, 0));
8422
8423 EVT ShVT = Z.getValueType();
8424 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8425 // fshl: X << C | Y >> (BW - C)
8426 // fshr: X << (BW - C) | Y >> C
8427 // where C = Z % BW is not zero
8428 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8429 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8430 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
8431 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
8432 VL);
8433 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
8434 VL);
8435 } else {
8436 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8437 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8438 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
8439 if (isPowerOf2_32(BW)) {
8440 // Z % BW -> Z & (BW - 1)
8441 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
8442 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8443 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
8444 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
8445 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
8446 } else {
8447 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8448 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8449 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
8450 }
8451
8452 SDValue One = DAG.getConstant(1, DL, ShVT);
8453 if (IsFSHL) {
8454 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
8455 SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
8456 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
8457 } else {
8458 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
8459 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
8460 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
8461 }
8462 }
8463 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
8464}
8465
8467 SelectionDAG &DAG) const {
8468 if (Node->isVPOpcode())
8469 return expandVPFunnelShift(Node, DAG);
8470
8471 EVT VT = Node->getValueType(0);
8472
8473 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8477 return SDValue();
8478
8479 SDValue X = Node->getOperand(0);
8480 SDValue Y = Node->getOperand(1);
8481 SDValue Z = Node->getOperand(2);
8482
8483 unsigned BW = VT.getScalarSizeInBits();
8484 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8485 SDLoc DL(SDValue(Node, 0));
8486
8487 EVT ShVT = Z.getValueType();
8488
8489 // If a funnel shift in the other direction is more supported, use it.
8490 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8491 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8492 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
8493 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8494 // fshl X, Y, Z -> fshr X, Y, -Z
8495 // fshr X, Y, Z -> fshl X, Y, -Z
8496 Z = DAG.getNegative(Z, DL, ShVT);
8497 } else {
8498 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8499 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8500 SDValue One = DAG.getConstant(1, DL, ShVT);
8501 if (IsFSHL) {
8502 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8503 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8504 } else {
8505 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8506 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8507 }
8508 Z = DAG.getNOT(DL, Z, ShVT);
8509 }
8510 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8511 }
8512
8513 SDValue ShX, ShY;
8514 SDValue ShAmt, InvShAmt;
8515 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8516 // fshl: X << C | Y >> (BW - C)
8517 // fshr: X << (BW - C) | Y >> C
8518 // where C = Z % BW is not zero
8519 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8520 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8521 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8522 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8523 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8524 } else {
8525 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8526 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8527 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8528 if (isPowerOf2_32(BW)) {
8529 // Z % BW -> Z & (BW - 1)
8530 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8531 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8532 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8533 } else {
8534 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8535 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8536 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8537 }
8538
8539 SDValue One = DAG.getConstant(1, DL, ShVT);
8540 if (IsFSHL) {
8541 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8542 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8543 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8544 } else {
8545 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8546 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8547 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8548 }
8549 }
8550 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8551}
8552
8553// TODO: Merge with expandFunnelShift.
8555 SelectionDAG &DAG) const {
8556 EVT VT = Node->getValueType(0);
8557 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8558 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8559 SDValue Op0 = Node->getOperand(0);
8560 SDValue Op1 = Node->getOperand(1);
8561 SDLoc DL(SDValue(Node, 0));
8562
8563 EVT ShVT = Op1.getValueType();
8564 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8565
8566 // If a rotate in the other direction is more supported, use it.
8567 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8568 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8569 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8570 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8571 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8572 }
8573
8574 if (!AllowVectorOps && VT.isVector() &&
8580 return SDValue();
8581
8582 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8583 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8584 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8585 SDValue ShVal;
8586 SDValue HsVal;
8587 if (isPowerOf2_32(EltSizeInBits)) {
8588 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8589 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8590 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8591 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8592 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8593 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8594 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8595 } else {
8596 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8597 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8598 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8599 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8600 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8601 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8602 SDValue One = DAG.getConstant(1, DL, ShVT);
8603 HsVal =
8604 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8605 }
8606 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8607}
8608
8609/// Check if CLMUL on VT can eventually reach a type with legal CLMUL through
8610/// a chain of halving decompositions (halving element width) and/or vector
8611/// widening (doubling element count). This guides expansion strategy selection:
8612/// if true, the halving/widening path produces better code than bit-by-bit.
8613///
8614/// HalveDepth tracks halving steps only (each creates ~4x more operations).
8615/// Widening steps are cheap (O(1) pad/extract) and don't count.
8616/// Limiting halvings to 2 prevents exponential blowup:
8617/// 1 halving: ~4 sub-CLMULs (good, e.g. v8i16 -> v8i8)
8618/// 2 halvings: ~16 sub-CLMULs (acceptable, e.g. v4i32 -> v4i16 -> v8i8)
8619/// 3 halvings: ~64 sub-CLMULs (worse than bit-by-bit expansion)
8621 EVT VT, unsigned HalveDepth = 0,
8622 unsigned TotalDepth = 0) {
8623 if (HalveDepth > 2 || TotalDepth > 8 || !VT.isFixedLengthVector())
8624 return false;
8626 return true;
8627 if (!TLI.isTypeLegal(VT))
8628 return false;
8629
8630 unsigned BW = VT.getScalarSizeInBits();
8631
8632 // Halve: halve element width, same element count.
8633 // This is the expensive step -- each halving creates ~4x more operations.
8634 if (BW % 2 == 0) {
8635 EVT HalfEltVT = EVT::getIntegerVT(Ctx, BW / 2);
8636 EVT HalfVT = VT.changeVectorElementType(Ctx, HalfEltVT);
8637 if (TLI.isTypeLegal(HalfVT) &&
8638 canNarrowCLMULToLegal(TLI, Ctx, HalfVT, HalveDepth + 1, TotalDepth + 1))
8639 return true;
8640 }
8641
8642 // Widen: double element count (fixed-width vectors only).
8643 // This is cheap -- just INSERT_SUBVECTOR + EXTRACT_SUBVECTOR.
8644 EVT WideVT = VT.getDoubleNumVectorElementsVT(Ctx);
8645 if (TLI.isTypeLegal(WideVT) &&
8646 canNarrowCLMULToLegal(TLI, Ctx, WideVT, HalveDepth, TotalDepth + 1))
8647 return true;
8648
8649 return false;
8650}
8651
8653 SDLoc DL(Node);
8654 EVT VT = Node->getValueType(0);
8655 SDValue X = Node->getOperand(0);
8656 SDValue Y = Node->getOperand(1);
8657 unsigned BW = VT.getScalarSizeInBits();
8658 unsigned Opcode = Node->getOpcode();
8659 LLVMContext &Ctx = *DAG.getContext();
8660
8661 switch (Opcode) {
8662 case ISD::CLMUL: {
8663 // For vector types, try decomposition strategies that leverage legal
8664 // CLMUL on narrower or wider element types, avoiding the expensive
8665 // bit-by-bit expansion.
8666 if (VT.isVector()) {
8667 // Strategy 1: Halving decomposition to half-element-width CLMUL.
8668 // Applies ExpandIntRes_CLMUL's identity element-wise:
8669 // CLMUL(X, Y) = (Hi << HalfBW) | Lo
8670 // where:
8671 // Lo = CLMUL(XLo, YLo)
8672 // Hi = CLMULH(XLo, YLo) ^ CLMUL(XLo, YHi) ^ CLMUL(XHi, YLo)
8673 unsigned HalfBW = BW / 2;
8674 if (BW % 2 == 0) {
8675 EVT HalfEltVT = EVT::getIntegerVT(Ctx, HalfBW);
8676 EVT HalfVT =
8677 EVT::getVectorVT(Ctx, HalfEltVT, VT.getVectorElementCount());
8678 if (isTypeLegal(HalfVT) && canNarrowCLMULToLegal(*this, Ctx, HalfVT,
8679 /*HalveDepth=*/1)) {
8680 SDValue ShAmt = DAG.getShiftAmountConstant(HalfBW, VT, DL);
8681
8682 // Extract low and high halves of each element.
8683 SDValue XLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, X);
8684 SDValue XHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT,
8685 DAG.getNode(ISD::SRL, DL, VT, X, ShAmt));
8686 SDValue YLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, Y);
8687 SDValue YHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT,
8688 DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt));
8689
8690 // Lo = CLMUL(XLo, YLo)
8691 SDValue Lo = DAG.getNode(ISD::CLMUL, DL, HalfVT, XLo, YLo);
8692
8693 // Hi = CLMULH(XLo, YLo) ^ CLMUL(XLo, YHi) ^ CLMUL(XHi, YLo)
8694 SDValue LoH = DAG.getNode(ISD::CLMULH, DL, HalfVT, XLo, YLo);
8695 SDValue Cross1 = DAG.getNode(ISD::CLMUL, DL, HalfVT, XLo, YHi);
8696 SDValue Cross2 = DAG.getNode(ISD::CLMUL, DL, HalfVT, XHi, YLo);
8697 SDValue Cross = DAG.getNode(ISD::XOR, DL, HalfVT, Cross1, Cross2);
8698 SDValue Hi = DAG.getNode(ISD::XOR, DL, HalfVT, LoH, Cross);
8699
8700 // Reassemble: Result = ZExt(Lo) | (AnyExt(Hi) << HalfBW)
8701 SDValue LoExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo);
8702 SDValue HiExt = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Hi);
8703 SDValue HiShifted = DAG.getNode(ISD::SHL, DL, VT, HiExt, ShAmt);
8704 return DAG.getNode(ISD::OR, DL, VT, LoExt, HiShifted);
8705 }
8706 }
8707
8708 // Strategy 2: Promote to double-element-width CLMUL.
8709 // CLMUL(X, Y) = Trunc(CLMUL(AnyExt(X), AnyExt(Y)))
8710 {
8711 EVT ExtVT = VT.widenIntegerElementType(Ctx);
8712 if (isTypeLegal(ExtVT) && isOperationLegalOrCustom(ISD::CLMUL, ExtVT)) {
8713 // If CLMUL on ExtVT is Custom (not Legal), the target may
8714 // scalarize it, costing O(NumElements) scalar ops. The bit-by-bit
8715 // fallback costs O(BW) vectorized iterations. Only widen when
8716 // element count is small enough that scalarization is cheaper.
8717 unsigned NumElts = VT.getVectorMinNumElements();
8718 if (isOperationLegal(ISD::CLMUL, ExtVT) || NumElts < BW) {
8719 SDValue XExt = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, X);
8720 SDValue YExt = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, Y);
8721 SDValue Mul = DAG.getNode(ISD::CLMUL, DL, ExtVT, XExt, YExt);
8722 return DAG.getNode(ISD::TRUNCATE, DL, VT, Mul);
8723 }
8724 }
8725 }
8726
8727 // Strategy 3: Widen element count (pad with undef, do CLMUL on wider
8728 // vector, extract lower result). CLMUL is element-wise, so upper
8729 // (undef) lanes don't affect the lower results.
8730 // e.g. v4i16 => pad to v8i16 => halve to v8i8 PMUL => extract v4i16.
8731 if (auto EC = VT.getVectorElementCount(); EC.isFixed()) {
8732 EVT WideVT = EVT::getVectorVT(Ctx, VT.getVectorElementType(), EC * 2);
8733 if (isTypeLegal(WideVT) && canNarrowCLMULToLegal(*this, Ctx, WideVT)) {
8734 SDValue Undef = DAG.getUNDEF(WideVT);
8735 SDValue XWide = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, Undef,
8736 X, DAG.getVectorIdxConstant(0, DL));
8737 SDValue YWide = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, Undef,
8738 Y, DAG.getVectorIdxConstant(0, DL));
8739 SDValue WideRes = DAG.getNode(ISD::CLMUL, DL, WideVT, XWide, YWide);
8740 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, WideRes,
8741 DAG.getVectorIdxConstant(0, DL));
8742 }
8743 }
8744 }
8745
8746 // NOTE: If you change this expansion, please update the cost model
8747 // calculation in BasicTTIImpl::getTypeBasedIntrinsicInstrCost for
8748 // Intrinsic::clmul.
8749
8750 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), Ctx, VT);
8751
8752 SDValue Res = DAG.getConstant(0, DL, VT);
8753 for (unsigned I = 0; I < BW; ++I) {
8754 SDValue ShiftAmt = DAG.getShiftAmountConstant(I, VT, DL);
8755 SDValue Mask = DAG.getConstant(APInt::getOneBitSet(BW, I), DL, VT);
8756 SDValue YMasked = DAG.getNode(ISD::AND, DL, VT, Y, Mask);
8757
8758 // For targets with a fast bit test instruction (e.g., x86 BT) or without
8759 // multiply, use a shift-based expansion to avoid expensive MUL
8760 // instructions.
8761 SDValue Part;
8762 if (!hasBitTest(Y, ShiftAmt) &&
8765 Part = DAG.getNode(ISD::MUL, DL, VT, X, YMasked);
8766 } else {
8767 // Canonical bit test: (Y & (1 << I)) != 0
8768 SDValue Zero = DAG.getConstant(0, DL, VT);
8769 SDValue Cond = DAG.getSetCC(DL, SetCCVT, YMasked, Zero, ISD::SETEQ);
8770 SDValue XShifted = DAG.getNode(ISD::SHL, DL, VT, X, ShiftAmt);
8771 Part = DAG.getSelect(DL, VT, Cond, Zero, XShifted);
8772 }
8773 Res = DAG.getNode(ISD::XOR, DL, VT, Res, Part);
8774 }
8775 return Res;
8776 }
8777 case ISD::CLMULR:
8778 // If we have CLMUL/CLMULH, merge the shifted results to form CLMULR.
8781 SDValue Lo = DAG.getNode(ISD::CLMUL, DL, VT, X, Y);
8782 SDValue Hi = DAG.getNode(ISD::CLMULH, DL, VT, X, Y);
8783 Lo = DAG.getNode(ISD::SRL, DL, VT, Lo,
8784 DAG.getShiftAmountConstant(BW - 1, VT, DL));
8785 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
8786 DAG.getShiftAmountConstant(1, VT, DL));
8787 return DAG.getNode(ISD::OR, DL, VT, Lo, Hi);
8788 }
8789 [[fallthrough]];
8790 case ISD::CLMULH: {
8791 EVT ExtVT = VT.widenIntegerElementType(Ctx);
8792 // Use bitreverse-based lowering (CLMULR/H = rev(CLMUL(rev,rev)) >> S)
8793 // when any of these hold:
8794 // (a) ZERO_EXTEND to ExtVT or SRL on ExtVT isn't legal.
8795 // (b) CLMUL is legal on VT but not on ExtVT (e.g. v8i8 on AArch64).
8796 // (c) CLMUL on ExtVT isn't legal, but CLMUL on VT can be efficiently
8797 // expanded via halving/widening to reach legal CLMUL. The bitreverse
8798 // path creates CLMUL(VT) which will be expanded efficiently. The
8799 // promote path would create CLMUL(ExtVT) => halving => CLMULH(VT),
8800 // causing a cycle.
8801 // Note: when CLMUL is legal on ExtVT, the zext => CLMUL(ExtVT) => shift
8802 // => trunc path is preferred over the bitreverse path, as it avoids the
8803 // cost of 3 bitreverse operations.
8808 canNarrowCLMULToLegal(*this, Ctx, VT)))) {
8809 SDValue XRev = DAG.getNode(ISD::BITREVERSE, DL, VT, X);
8810 SDValue YRev = DAG.getNode(ISD::BITREVERSE, DL, VT, Y);
8811 SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, VT, XRev, YRev);
8812 SDValue Res = DAG.getNode(ISD::BITREVERSE, DL, VT, ClMul);
8813 if (Opcode == ISD::CLMULH)
8814 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
8815 DAG.getShiftAmountConstant(1, VT, DL));
8816 return Res;
8817 }
8818 SDValue XExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, X);
8819 SDValue YExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Y);
8820 SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, ExtVT, XExt, YExt);
8821 unsigned ShAmt = Opcode == ISD::CLMULR ? BW - 1 : BW;
8822 SDValue HiBits = DAG.getNode(ISD::SRL, DL, ExtVT, ClMul,
8823 DAG.getShiftAmountConstant(ShAmt, ExtVT, DL));
8824 return DAG.getNode(ISD::TRUNCATE, DL, VT, HiBits);
8825 }
8826 }
8827 llvm_unreachable("Expected CLMUL, CLMULR, or CLMULH");
8828}
8829
8831 SelectionDAG &DAG) const {
8832 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8833 EVT VT = Node->getValueType(0);
8834 unsigned VTBits = VT.getScalarSizeInBits();
8835 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8836
8837 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8838 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8839 SDValue ShOpLo = Node->getOperand(0);
8840 SDValue ShOpHi = Node->getOperand(1);
8841 SDValue ShAmt = Node->getOperand(2);
8842 EVT ShAmtVT = ShAmt.getValueType();
8843 EVT ShAmtCCVT =
8844 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8845 SDLoc dl(Node);
8846
8847 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8848 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8849 // away during isel.
8850 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8851 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8852 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8853 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8854 : DAG.getConstant(0, dl, VT);
8855
8856 SDValue Tmp2, Tmp3;
8857 if (IsSHL) {
8858 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8859 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8860 } else {
8861 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8862 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8863 }
8864
8865 // If the shift amount is larger or equal than the width of a part we don't
8866 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8867 // values for large shift amounts.
8868 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8869 DAG.getConstant(VTBits, dl, ShAmtVT));
8870 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8871 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8872
8873 if (IsSHL) {
8874 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8875 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8876 } else {
8877 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8878 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8879 }
8880}
8881
8883 SelectionDAG &DAG) const {
8884 // This implements llvm.canonicalize.f* by multiplication with 1.0, as
8885 // suggested in
8886 // https://llvm.org/docs/LangRef.html#llvm-canonicalize-intrinsic.
8887 // It uses strict_fp operations even outside a strict_fp context in order
8888 // to guarantee that the canonicalization is not optimized away by later
8889 // passes. The result chain introduced by that is intentionally ignored
8890 // since no ordering requirement is intended here.
8891 EVT VT = Node->getValueType(0);
8892 SDLoc DL(Node);
8893 SDNodeFlags Flags = Node->getFlags();
8894 Flags.setNoFPExcept(true);
8895 SDValue One = DAG.getConstantFP(1.0, DL, VT);
8896 SDValue Mul =
8897 DAG.getNode(ISD::STRICT_FMUL, DL, {VT, MVT::Other},
8898 {DAG.getEntryNode(), Node->getOperand(0), One}, Flags);
8899 return Mul;
8900}
8901
8903 SelectionDAG &DAG) const {
8904 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8905 SDValue Src = Node->getOperand(OpNo);
8906 EVT SrcVT = Src.getValueType();
8907 EVT DstVT = Node->getValueType(0);
8908 SDLoc dl(SDValue(Node, 0));
8909
8910 // FIXME: Only f32 to i64 conversions are supported.
8911 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8912 return false;
8913
8914 if (Node->isStrictFPOpcode())
8915 // When a NaN is converted to an integer a trap is allowed. We can't
8916 // use this expansion here because it would eliminate that trap. Other
8917 // traps are also allowed and cannot be eliminated. See
8918 // IEEE 754-2008 sec 5.8.
8919 return false;
8920
8921 // Expand f32 -> i64 conversion
8922 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8923 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8924 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8925 EVT IntVT = SrcVT.changeTypeToInteger();
8926 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8927
8928 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8929 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8930 SDValue Bias = DAG.getConstant(127, dl, IntVT);
8931 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8932 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8933 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8934
8935 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8936
8937 SDValue ExponentBits = DAG.getNode(
8938 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8939 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8940 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8941
8942 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8943 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8944 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8945 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8946
8947 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8948 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8949 DAG.getConstant(0x00800000, dl, IntVT));
8950
8951 R = DAG.getZExtOrTrunc(R, dl, DstVT);
8952
8953 R = DAG.getSelectCC(
8954 dl, Exponent, ExponentLoBit,
8955 DAG.getNode(ISD::SHL, dl, DstVT, R,
8956 DAG.getZExtOrTrunc(
8957 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8958 dl, IntShVT)),
8959 DAG.getNode(ISD::SRL, dl, DstVT, R,
8960 DAG.getZExtOrTrunc(
8961 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8962 dl, IntShVT)),
8963 ISD::SETGT);
8964
8965 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8966 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8967
8968 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8969 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8970 return true;
8971}
8972
8974 SDValue &Chain,
8975 SelectionDAG &DAG) const {
8976 SDLoc dl(SDValue(Node, 0));
8977 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8978 SDValue Src = Node->getOperand(OpNo);
8979
8980 EVT SrcVT = Src.getValueType();
8981 EVT DstVT = Node->getValueType(0);
8982 EVT SetCCVT =
8983 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8984 EVT DstSetCCVT =
8985 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8986
8987 // Only expand vector types if we have the appropriate vector bit operations.
8988 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8990 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8992 return false;
8993
8994 // If the maximum float value is smaller then the signed integer range,
8995 // the destination signmask can't be represented by the float, so we can
8996 // just use FP_TO_SINT directly.
8997 const fltSemantics &APFSem = SrcVT.getFltSemantics();
8998 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8999 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
9001 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
9002 if (Node->isStrictFPOpcode()) {
9003 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
9004 { Node->getOperand(0), Src });
9005 Chain = Result.getValue(1);
9006 } else
9007 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
9008 return true;
9009 }
9010
9011 // Don't expand it if there isn't cheap fsub instruction.
9013 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
9014 return false;
9015
9016 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
9017 SDValue Sel;
9018
9019 if (Node->isStrictFPOpcode()) {
9020 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
9021 Node->getOperand(0), /*IsSignaling*/ true);
9022 Chain = Sel.getValue(1);
9023 } else {
9024 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
9025 }
9026
9027 bool Strict = Node->isStrictFPOpcode() ||
9028 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
9029
9030 if (Strict) {
9031 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
9032 // signmask then offset (the result of which should be fully representable).
9033 // Sel = Src < 0x8000000000000000
9034 // FltOfs = select Sel, 0, 0x8000000000000000
9035 // IntOfs = select Sel, 0, 0x8000000000000000
9036 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
9037
9038 // TODO: Should any fast-math-flags be set for the FSUB?
9039 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
9040 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
9041 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
9042 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
9043 DAG.getConstant(0, dl, DstVT),
9044 DAG.getConstant(SignMask, dl, DstVT));
9045 SDValue SInt;
9046 if (Node->isStrictFPOpcode()) {
9047 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
9048 { Chain, Src, FltOfs });
9049 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
9050 { Val.getValue(1), Val });
9051 Chain = SInt.getValue(1);
9052 } else {
9053 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
9054 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
9055 }
9056 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
9057 } else {
9058 // Expand based on maximum range of FP_TO_SINT:
9059 // True = fp_to_sint(Src)
9060 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
9061 // Result = select (Src < 0x8000000000000000), True, False
9062
9063 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
9064 // TODO: Should any fast-math-flags be set for the FSUB?
9065 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
9066 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
9067 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
9068 DAG.getConstant(SignMask, dl, DstVT));
9069 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
9070 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
9071 }
9072 return true;
9073}
9074
9076 SDValue &Chain, SelectionDAG &DAG) const {
9077 // This transform is not correct for converting 0 when rounding mode is set
9078 // to round toward negative infinity which will produce -0.0. So disable
9079 // under strictfp.
9080 if (Node->isStrictFPOpcode())
9081 return false;
9082
9083 SDValue Src = Node->getOperand(0);
9084 EVT SrcVT = Src.getValueType();
9085 EVT DstVT = Node->getValueType(0);
9086
9087 // If the input is known to be non-negative and SINT_TO_FP is legal then use
9088 // it.
9089 if (Node->getFlags().hasNonNeg() &&
9091 Result =
9092 DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
9093 return true;
9094 }
9095
9096 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
9097 return false;
9098
9099 // Only expand vector types if we have the appropriate vector bit
9100 // operations.
9101 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
9106 return false;
9107
9108 SDLoc dl(SDValue(Node, 0));
9109
9110 // Implementation of unsigned i64 to f64 following the algorithm in
9111 // __floatundidf in compiler_rt. This implementation performs rounding
9112 // correctly in all rounding modes with the exception of converting 0
9113 // when rounding toward negative infinity. In that case the fsub will
9114 // produce -0.0. This will be added to +0.0 and produce -0.0 which is
9115 // incorrect.
9116 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
9117 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
9118 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
9119 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
9120 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
9121 SDValue HiShift = DAG.getShiftAmountConstant(32, SrcVT, dl);
9122
9123 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
9124 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
9125 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
9126 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
9127 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
9128 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
9129 SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
9130 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
9131 return true;
9132}
9133
9134SDValue
9136 SelectionDAG &DAG) const {
9137 unsigned Opcode = Node->getOpcode();
9138 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
9139 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
9140 "Wrong opcode");
9141
9142 if (Node->getFlags().hasNoNaNs()) {
9143 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
9144 EVT VT = Node->getValueType(0);
9145 if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
9147 VT.isVector())
9148 return SDValue();
9149 SDValue Op1 = Node->getOperand(0);
9150 SDValue Op2 = Node->getOperand(1);
9151 return DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred,
9152 Node->getFlags());
9153 }
9154
9155 return SDValue();
9156}
9157
9159 SelectionDAG &DAG) const {
9160 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
9161 return Expanded;
9162
9163 EVT VT = Node->getValueType(0);
9164 if (VT.isScalableVector())
9166 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
9167
9168 SDLoc dl(Node);
9169 unsigned NewOp =
9171
9172 if (isOperationLegalOrCustom(NewOp, VT)) {
9173 SDValue Quiet0 = Node->getOperand(0);
9174 SDValue Quiet1 = Node->getOperand(1);
9175
9176 if (!Node->getFlags().hasNoNaNs()) {
9177 // Insert canonicalizes if it's possible we need to quiet to get correct
9178 // sNaN behavior.
9179 if (!DAG.isKnownNeverSNaN(Quiet0)) {
9180 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
9181 Node->getFlags());
9182 }
9183 if (!DAG.isKnownNeverSNaN(Quiet1)) {
9184 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
9185 Node->getFlags());
9186 }
9187 }
9188
9189 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
9190 }
9191
9192 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
9193 // instead if there are no NaNs.
9194 if (Node->getFlags().hasNoNaNs() ||
9195 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
9196 DAG.isKnownNeverNaN(Node->getOperand(1)))) {
9197 unsigned IEEE2018Op =
9198 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
9199 if (isOperationLegalOrCustom(IEEE2018Op, VT))
9200 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
9201 Node->getOperand(1), Node->getFlags());
9202 }
9203
9205 return SelCC;
9206
9207 return SDValue();
9208}
9209
9211 SelectionDAG &DAG) const {
9212 if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
9213 return Expanded;
9214
9215 SDLoc DL(N);
9216 SDValue LHS = N->getOperand(0);
9217 SDValue RHS = N->getOperand(1);
9218 unsigned Opc = N->getOpcode();
9219 EVT VT = N->getValueType(0);
9220 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9221 bool IsMax = Opc == ISD::FMAXIMUM;
9222 SDNodeFlags Flags = N->getFlags();
9223
9224 // First, implement comparison not propagating NaN. If no native fmin or fmax
9225 // available, use plain select with setcc instead.
9227 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
9228 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
9229
9230 // FIXME: We should probably define fminnum/fmaxnum variants with correct
9231 // signed zero behavior.
9232 bool MinMaxMustRespectOrderedZero = false;
9233
9234 if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
9235 MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
9236 MinMaxMustRespectOrderedZero = true;
9237 } else if (isOperationLegalOrCustom(CompOpc, VT)) {
9238 MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
9239 } else {
9241 return DAG.UnrollVectorOp(N);
9242
9243 // NaN (if exists) will be propagated later, so orderness doesn't matter.
9244 SDValue Compare =
9245 DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
9246 MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
9247 }
9248
9249 // Propagate any NaN of both operands
9250 if (!N->getFlags().hasNoNaNs() &&
9251 (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
9252 ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
9254 MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
9255 DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
9256 }
9257
9258 // fminimum/fmaximum requires -0.0 less than +0.0
9259 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
9260 !DAG.isKnownNeverLogicalZero(RHS) && !DAG.isKnownNeverLogicalZero(LHS)) {
9261 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
9262 DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
9263 SDValue TestZero =
9264 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
9265 SDValue LCmp = DAG.getSelect(
9266 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
9267 MinMax, Flags);
9268 SDValue RCmp = DAG.getSelect(
9269 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
9270 LCmp, Flags);
9271 MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
9272 }
9273
9274 return MinMax;
9275}
9276
9278 SelectionDAG &DAG) const {
9279 SDLoc DL(Node);
9280 SDValue LHS = Node->getOperand(0);
9281 SDValue RHS = Node->getOperand(1);
9282 unsigned Opc = Node->getOpcode();
9283 EVT VT = Node->getValueType(0);
9284 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9285 bool IsMax = Opc == ISD::FMAXIMUMNUM;
9286 SDNodeFlags Flags = Node->getFlags();
9287
9288 unsigned NewOp =
9290
9291 if (isOperationLegalOrCustom(NewOp, VT)) {
9292 if (!Flags.hasNoNaNs()) {
9293 // Insert canonicalizes if it's possible we need to quiet to get correct
9294 // sNaN behavior.
9295 if (!DAG.isKnownNeverSNaN(LHS)) {
9296 LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
9297 }
9298 if (!DAG.isKnownNeverSNaN(RHS)) {
9299 RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
9300 }
9301 }
9302
9303 return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
9304 }
9305
9306 // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
9307 // same behaviors for all of other cases: +0.0 vs -0.0 included.
9308 if (Flags.hasNoNaNs() ||
9309 (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
9310 unsigned IEEE2019Op =
9312 if (isOperationLegalOrCustom(IEEE2019Op, VT))
9313 return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
9314 }
9315
9316 // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
9317 // either one for +0.0 vs -0.0.
9318 if ((Flags.hasNoNaNs() ||
9319 (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
9320 (Flags.hasNoSignedZeros() || DAG.isKnownNeverLogicalZero(LHS) ||
9321 DAG.isKnownNeverLogicalZero(RHS))) {
9322 unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
9323 if (isOperationLegalOrCustom(IEEE2008Op, VT))
9324 return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
9325 }
9326
9327 if (VT.isVector() &&
9330 return DAG.UnrollVectorOp(Node);
9331
9332 // If only one operand is NaN, override it with another operand.
9333 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
9334 LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
9335 }
9336 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
9337 RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
9338 }
9339
9340 // Always prefer RHS if equal.
9341 SDValue MinMax =
9342 DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
9343
9344 // TODO: We need quiet sNaN if strictfp.
9345
9346 // Fixup signed zero behavior.
9347 if (Flags.hasNoSignedZeros() || DAG.isKnownNeverLogicalZero(LHS) ||
9348 DAG.isKnownNeverLogicalZero(RHS)) {
9349 return MinMax;
9350 }
9351 SDValue TestZero =
9352 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
9353 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
9354 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
9355 EVT IntVT = VT.changeTypeToInteger();
9356 EVT FloatVT = VT.changeElementType(*DAG.getContext(), MVT::f32);
9357 SDValue LHSTrunc = LHS;
9359 LHSTrunc = DAG.getNode(ISD::FP_ROUND, DL, FloatVT, LHS,
9360 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
9361 }
9362 // It's OK to select from LHS and MinMax, with only one ISD::IS_FPCLASS, as
9363 // we preferred RHS when generate MinMax, if the operands are equal.
9364 SDValue RetZero = DAG.getSelect(
9365 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHSTrunc, TestZero), LHS,
9366 MinMax, Flags);
9367 return DAG.getSelect(DL, VT, IsZero, RetZero, MinMax, Flags);
9368}
9369
9370/// Returns a true value if if this FPClassTest can be performed with an ordered
9371/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
9372/// std::nullopt if it cannot be performed as a compare with 0.
9373static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
9374 const fltSemantics &Semantics,
9375 const MachineFunction &MF) {
9376 FPClassTest OrderedMask = Test & ~fcNan;
9377 FPClassTest NanTest = Test & fcNan;
9378 bool IsOrdered = NanTest == fcNone;
9379 bool IsUnordered = NanTest == fcNan;
9380
9381 // Skip cases that are testing for only a qnan or snan.
9382 if (!IsOrdered && !IsUnordered)
9383 return std::nullopt;
9384
9385 if (OrderedMask == fcZero &&
9386 MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
9387 return IsOrdered;
9388 if (OrderedMask == (fcZero | fcSubnormal) &&
9389 MF.getDenormalMode(Semantics).inputsAreZero())
9390 return IsOrdered;
9391 return std::nullopt;
9392}
9393
9395 const FPClassTest OrigTestMask,
9396 SDNodeFlags Flags, const SDLoc &DL,
9397 SelectionDAG &DAG) const {
9398 EVT OperandVT = Op.getValueType();
9399 assert(OperandVT.isFloatingPoint());
9400 FPClassTest Test = OrigTestMask;
9401
9402 // Degenerated cases.
9403 if (Test == fcNone)
9404 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
9405 if (Test == fcAllFlags)
9406 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
9407
9408 // PPC double double is a pair of doubles, of which the higher part determines
9409 // the value class.
9410 if (OperandVT == MVT::ppcf128) {
9411 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
9412 DAG.getConstant(1, DL, MVT::i32));
9413 OperandVT = MVT::f64;
9414 }
9415
9416 // Floating-point type properties.
9417 EVT ScalarFloatVT = OperandVT.getScalarType();
9418 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
9419 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
9420 bool IsF80 = (ScalarFloatVT == MVT::f80);
9421
9422 // Some checks can be implemented using float comparisons, if floating point
9423 // exceptions are ignored.
9424 if (Flags.hasNoFPExcept() &&
9426 FPClassTest FPTestMask = Test;
9427 bool IsInvertedFP = false;
9428
9429 if (FPClassTest InvertedFPCheck =
9430 invertFPClassTestIfSimpler(FPTestMask, true)) {
9431 FPTestMask = InvertedFPCheck;
9432 IsInvertedFP = true;
9433 }
9434
9435 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
9436 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
9437
9438 // See if we can fold an | fcNan into an unordered compare.
9439 FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
9440
9441 // Can't fold the ordered check if we're only testing for snan or qnan
9442 // individually.
9443 if ((FPTestMask & fcNan) != fcNan)
9444 OrderedFPTestMask = FPTestMask;
9445
9446 const bool IsOrdered = FPTestMask == OrderedFPTestMask;
9447
9448 if (std::optional<bool> IsCmp0 =
9449 isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
9450 IsCmp0 && (isCondCodeLegalOrCustom(
9451 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
9452 OperandVT.getScalarType().getSimpleVT()))) {
9453
9454 // If denormals could be implicitly treated as 0, this is not equivalent
9455 // to a compare with 0 since it will also be true for denormals.
9456 return DAG.getSetCC(DL, ResultVT, Op,
9457 DAG.getConstantFP(0.0, DL, OperandVT),
9458 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
9459 }
9460
9461 if (FPTestMask == fcNan &&
9463 OperandVT.getScalarType().getSimpleVT()))
9464 return DAG.getSetCC(DL, ResultVT, Op, Op,
9465 IsInvertedFP ? ISD::SETO : ISD::SETUO);
9466
9467 bool IsOrderedInf = FPTestMask == fcInf;
9468 if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
9469 isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
9470 : UnorderedCmpOpcode,
9471 OperandVT.getScalarType().getSimpleVT()) &&
9474 (OperandVT.isVector() &&
9476 // isinf(x) --> fabs(x) == inf
9477 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9478 SDValue Inf =
9479 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
9480 return DAG.getSetCC(DL, ResultVT, Abs, Inf,
9481 IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
9482 }
9483
9484 if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
9485 isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
9486 : UnorderedCmpOpcode,
9487 OperandVT.getSimpleVT())) {
9488 // isposinf(x) --> x == inf
9489 // isneginf(x) --> x == -inf
9490 // isposinf(x) || nan --> x u== inf
9491 // isneginf(x) || nan --> x u== -inf
9492
9493 SDValue Inf = DAG.getConstantFP(
9494 APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
9495 OperandVT);
9496 return DAG.getSetCC(DL, ResultVT, Op, Inf,
9497 IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
9498 }
9499
9500 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
9501 // TODO: Could handle ordered case, but it produces worse code for
9502 // x86. Maybe handle ordered if fabs is free?
9503
9504 ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9505 ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
9506
9507 if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
9508 OperandVT.getScalarType().getSimpleVT())) {
9509 // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
9510
9511 // TODO: Maybe only makes sense if fabs is free. Integer test of
9512 // exponent bits seems better for x86.
9513 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9514 SDValue SmallestNormal = DAG.getConstantFP(
9515 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9516 return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
9517 IsOrdered ? OrderedOp : UnorderedOp);
9518 }
9519 }
9520
9521 if (FPTestMask == fcNormal) {
9522 // TODO: Handle unordered
9523 ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9524 ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
9525
9526 if (isCondCodeLegalOrCustom(IsFiniteOp,
9527 OperandVT.getScalarType().getSimpleVT()) &&
9528 isCondCodeLegalOrCustom(IsNormalOp,
9529 OperandVT.getScalarType().getSimpleVT()) &&
9530 isFAbsFree(OperandVT)) {
9531 // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
9532 SDValue Inf =
9533 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
9534 SDValue SmallestNormal = DAG.getConstantFP(
9535 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9536
9537 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9538 SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
9539 SDValue IsNormal =
9540 DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
9541 unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
9542 return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
9543 }
9544 }
9545 }
9546
9547 // Some checks may be represented as inversion of simpler check, for example
9548 // "inf|normal|subnormal|zero" => !"nan".
9549 bool IsInverted = false;
9550
9551 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
9552 Test = InvertedCheck;
9553 IsInverted = true;
9554 }
9555
9556 // In the general case use integer operations.
9557 unsigned BitSize = OperandVT.getScalarSizeInBits();
9558 EVT IntVT = OperandVT.changeElementType(
9559 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), BitSize));
9560 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
9561
9562 // Various masks.
9563 APInt SignBit = APInt::getSignMask(BitSize);
9564 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9565 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9566 const unsigned ExplicitIntBitInF80 = 63;
9567 APInt ExpMask = Inf;
9568 if (IsF80)
9569 ExpMask.clearBit(ExplicitIntBitInF80);
9570 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9571 APInt QNaNBitMask =
9572 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9573 APInt InversionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
9574
9575 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
9576 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
9577 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
9578 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
9579 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
9580 SDValue ResultInversionMask = DAG.getConstant(InversionMask, DL, ResultVT);
9581
9582 SDValue Res;
9583 const auto appendResult = [&](SDValue PartialRes) {
9584 if (PartialRes) {
9585 if (Res)
9586 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
9587 else
9588 Res = PartialRes;
9589 }
9590 };
9591
9592 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
9593 const auto getIntBitIsSet = [&]() -> SDValue {
9594 if (!IntBitIsSetV) {
9595 APInt IntBitMask(BitSize, 0);
9596 IntBitMask.setBit(ExplicitIntBitInF80);
9597 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
9598 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
9599 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
9600 }
9601 return IntBitIsSetV;
9602 };
9603
9604 // Split the value into sign bit and absolute value.
9605 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
9606 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
9607 DAG.getConstant(0, DL, IntVT), ISD::SETLT);
9608
9609 // Tests that involve more than one class should be processed first.
9610 SDValue PartialRes;
9611
9612 if (IsF80)
9613 ; // Detect finite numbers of f80 by checking individual classes because
9614 // they have different settings of the explicit integer bit.
9615 else if ((Test & fcFinite) == fcFinite) {
9616 // finite(V) ==> (a << 1) < (inf << 1)
9617 //
9618 // See https://github.com/llvm/llvm-project/issues/169270, this is slightly
9619 // shorter than the `finite(V) ==> abs(V) < exp_mask` formula used before.
9620
9622 "finite check requires IEEE-like FP");
9623
9624 SDValue One = DAG.getShiftAmountConstant(1, IntVT, DL);
9625 SDValue TwiceOp = DAG.getNode(ISD::SHL, DL, IntVT, OpAsInt, One);
9626 SDValue TwiceInf = DAG.getNode(ISD::SHL, DL, IntVT, ExpMaskV, One);
9627
9628 PartialRes = DAG.getSetCC(DL, ResultVT, TwiceOp, TwiceInf, ISD::SETULT);
9629 Test &= ~fcFinite;
9630 } else if ((Test & fcFinite) == fcPosFinite) {
9631 // finite(V) && V > 0 ==> V < exp_mask
9632 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
9633 Test &= ~fcPosFinite;
9634 } else if ((Test & fcFinite) == fcNegFinite) {
9635 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
9636 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
9637 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9638 Test &= ~fcNegFinite;
9639 }
9640 appendResult(PartialRes);
9641
9642 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
9643 // fcZero | fcSubnormal => test all exponent bits are 0
9644 // TODO: Handle sign bit specific cases
9645 if (PartialCheck == (fcZero | fcSubnormal)) {
9646 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
9647 SDValue ExpIsZero =
9648 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9649 appendResult(ExpIsZero);
9650 Test &= ~PartialCheck & fcAllFlags;
9651 }
9652 }
9653
9654 // Check for individual classes.
9655
9656 if (unsigned PartialCheck = Test & fcZero) {
9657 if (PartialCheck == fcPosZero)
9658 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
9659 else if (PartialCheck == fcZero)
9660 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
9661 else // ISD::fcNegZero
9662 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
9663 appendResult(PartialRes);
9664 }
9665
9666 if (unsigned PartialCheck = Test & fcSubnormal) {
9667 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
9668 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
9669 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
9670 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
9671 SDValue VMinusOneV =
9672 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
9673 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
9674 if (PartialCheck == fcNegSubnormal)
9675 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9676 appendResult(PartialRes);
9677 }
9678
9679 if (unsigned PartialCheck = Test & fcInf) {
9680 if (PartialCheck == fcPosInf)
9681 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
9682 else if (PartialCheck == fcInf)
9683 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
9684 else { // ISD::fcNegInf
9685 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9686 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
9687 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
9688 }
9689 appendResult(PartialRes);
9690 }
9691
9692 if (unsigned PartialCheck = Test & fcNan) {
9693 APInt InfWithQnanBit = Inf | QNaNBitMask;
9694 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
9695 if (PartialCheck == fcNan) {
9696 // isnan(V) ==> abs(V) > int(inf)
9697 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9698 if (IsF80) {
9699 // Recognize unsupported values as NaNs for compatibility with glibc.
9700 // In them (exp(V)==0) == int_bit.
9701 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
9702 SDValue ExpIsZero =
9703 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9704 SDValue IsPseudo =
9705 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
9706 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
9707 }
9708 } else if (PartialCheck == fcQNan) {
9709 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
9710 PartialRes =
9711 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
9712 } else { // ISD::fcSNan
9713 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
9714 // abs(V) < (unsigned(Inf) | quiet_bit)
9715 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9716 SDValue IsNotQnan =
9717 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
9718 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
9719 }
9720 appendResult(PartialRes);
9721 }
9722
9723 if (unsigned PartialCheck = Test & fcNormal) {
9724 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
9725 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9726 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
9727 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
9728 APInt ExpLimit = ExpMask - ExpLSB;
9729 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
9730 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
9731 if (PartialCheck == fcNegNormal)
9732 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9733 else if (PartialCheck == fcPosNormal) {
9734 SDValue PosSignV =
9735 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInversionMask);
9736 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
9737 }
9738 if (IsF80)
9739 PartialRes =
9740 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
9741 appendResult(PartialRes);
9742 }
9743
9744 if (!Res)
9745 return DAG.getConstant(IsInverted, DL, ResultVT);
9746 if (IsInverted)
9747 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInversionMask);
9748 return Res;
9749}
9750
9751// Only expand vector types if we have the appropriate vector bit operations.
9752static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
9753 assert(VT.isVector() && "Expected vector type");
9754 unsigned Len = VT.getScalarSizeInBits();
9755 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
9758 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
9760}
9761
9763 SDLoc dl(Node);
9764 EVT VT = Node->getValueType(0);
9765 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9766 SDValue Op = Node->getOperand(0);
9767 unsigned Len = VT.getScalarSizeInBits();
9768 assert(VT.isInteger() && "CTPOP not implemented for this type.");
9769
9770 // TODO: Add support for irregular type lengths.
9771 if (!(Len <= 128 && Len % 8 == 0))
9772 return SDValue();
9773
9774 // Only expand vector types if we have the appropriate vector bit operations.
9775 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
9776 return SDValue();
9777
9778 // This is the "best" algorithm from
9779 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9780 SDValue Mask55 =
9781 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9782 SDValue Mask33 =
9783 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9784 SDValue Mask0F =
9785 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9786
9787 // v = v - ((v >> 1) & 0x55555555...)
9788 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
9789 DAG.getNode(ISD::AND, dl, VT,
9790 DAG.getNode(ISD::SRL, dl, VT, Op,
9791 DAG.getConstant(1, dl, ShVT)),
9792 Mask55));
9793 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9794 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
9795 DAG.getNode(ISD::AND, dl, VT,
9796 DAG.getNode(ISD::SRL, dl, VT, Op,
9797 DAG.getConstant(2, dl, ShVT)),
9798 Mask33));
9799 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9800 Op = DAG.getNode(ISD::AND, dl, VT,
9801 DAG.getNode(ISD::ADD, dl, VT, Op,
9802 DAG.getNode(ISD::SRL, dl, VT, Op,
9803 DAG.getConstant(4, dl, ShVT))),
9804 Mask0F);
9805
9806 if (Len <= 8)
9807 return Op;
9808
9809 // Avoid the multiply if we only have 2 bytes to add.
9810 // TODO: Only doing this for scalars because vectors weren't as obviously
9811 // improved.
9812 if (Len == 16 && !VT.isVector()) {
9813 // v = (v + (v >> 8)) & 0x00FF;
9814 return DAG.getNode(ISD::AND, dl, VT,
9815 DAG.getNode(ISD::ADD, dl, VT, Op,
9816 DAG.getNode(ISD::SRL, dl, VT, Op,
9817 DAG.getConstant(8, dl, ShVT))),
9818 DAG.getConstant(0xFF, dl, VT));
9819 }
9820
9821 // v = (v * 0x01010101...) >> (Len - 8)
9822 SDValue V;
9825 SDValue Mask01 =
9826 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9827 V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
9828 } else {
9829 V = Op;
9830 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9831 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9832 V = DAG.getNode(ISD::ADD, dl, VT, V,
9833 DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
9834 }
9835 }
9836 return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
9837}
9838
9840 SDLoc dl(Node);
9841 EVT VT = Node->getValueType(0);
9842 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9843 SDValue Op = Node->getOperand(0);
9844 SDValue Mask = Node->getOperand(1);
9845 SDValue VL = Node->getOperand(2);
9846 unsigned Len = VT.getScalarSizeInBits();
9847 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
9848
9849 // TODO: Add support for irregular type lengths.
9850 if (!(Len <= 128 && Len % 8 == 0))
9851 return SDValue();
9852
9853 // This is same algorithm of expandCTPOP from
9854 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9855 SDValue Mask55 =
9856 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9857 SDValue Mask33 =
9858 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9859 SDValue Mask0F =
9860 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9861
9862 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9863
9864 // v = v - ((v >> 1) & 0x55555555...)
9865 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
9866 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9867 DAG.getConstant(1, dl, ShVT), Mask, VL),
9868 Mask55, Mask, VL);
9869 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
9870
9871 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9872 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
9873 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
9874 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9875 DAG.getConstant(2, dl, ShVT), Mask, VL),
9876 Mask33, Mask, VL);
9877 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
9878
9879 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9880 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
9881 Mask, VL),
9882 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
9883 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
9884
9885 if (Len <= 8)
9886 return Op;
9887
9888 // v = (v * 0x01010101...) >> (Len - 8)
9889 SDValue V;
9891 ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
9892 SDValue Mask01 =
9893 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9894 V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
9895 } else {
9896 V = Op;
9897 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9898 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9899 V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
9900 DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
9901 Mask, VL);
9902 }
9903 }
9904 return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
9905 Mask, VL);
9906}
9907
9909 SDLoc dl(Node);
9910 EVT VT = Node->getValueType(0);
9911 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9912 SDValue Op = Node->getOperand(0);
9913 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9914
9915 // If the non-ZERO_UNDEF version is supported we can use that instead.
9916 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
9918 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
9919
9920 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9922 EVT SetCCVT =
9923 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9924 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
9925 SDValue Zero = DAG.getConstant(0, dl, VT);
9926 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9927 return DAG.getSelect(dl, VT, SrcIsZero,
9928 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
9929 }
9930
9931 // Only expand vector types if we have the appropriate vector bit operations.
9932 // This includes the operations needed to expand CTPOP if it isn't supported.
9933 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9935 !canExpandVectorCTPOP(*this, VT)) ||
9938 return SDValue();
9939
9940 // for now, we do this:
9941 // x = x | (x >> 1);
9942 // x = x | (x >> 2);
9943 // ...
9944 // x = x | (x >>16);
9945 // x = x | (x >>32); // for 64-bit input
9946 // return popcount(~x);
9947 //
9948 // Ref: "Hacker's Delight" by Henry Warren
9949 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9950 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9951 Op = DAG.getNode(ISD::OR, dl, VT, Op,
9952 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
9953 }
9954 Op = DAG.getNOT(dl, Op, VT);
9955 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
9956}
9957
9959 SDLoc dl(Node);
9960 EVT VT = Node->getValueType(0);
9961 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9962 SDValue Op = Node->getOperand(0);
9963 SDValue Mask = Node->getOperand(1);
9964 SDValue VL = Node->getOperand(2);
9965 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9966
9967 // do this:
9968 // x = x | (x >> 1);
9969 // x = x | (x >> 2);
9970 // ...
9971 // x = x | (x >>16);
9972 // x = x | (x >>32); // for 64-bit input
9973 // return popcount(~x);
9974 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9975 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9976 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
9977 DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
9978 VL);
9979 }
9980 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getAllOnesConstant(dl, VT),
9981 Mask, VL);
9982 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
9983}
9984
9986 SDLoc dl(Node);
9987 EVT VT = Node->getValueType(0);
9988 SDValue Op = DAG.getFreeze(Node->getOperand(0));
9989 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9990
9991 // CTLS(x) = CTLZ(OR(SHL(XOR(x, SRA(x, BW-1)), 1), 1))
9992 // This transforms the sign bits into leading zeros that can be counted.
9993 SDValue ShiftAmt = DAG.getShiftAmountConstant(NumBitsPerElt - 1, VT, dl);
9994 SDValue SignBit = DAG.getNode(ISD::SRA, dl, VT, Op, ShiftAmt);
9995 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, SignBit);
9996 SDValue Shl =
9997 DAG.getNode(ISD::SHL, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
9998 SDValue Or = DAG.getNode(ISD::OR, dl, VT, Shl, DAG.getConstant(1, dl, VT));
9999 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Or);
10000}
10001
10003 const SDLoc &DL, EVT VT, SDValue Op,
10004 unsigned BitWidth) const {
10005 if (BitWidth != 32 && BitWidth != 64)
10006 return SDValue();
10007
10008 const DataLayout &TD = DAG.getDataLayout();
10010 return SDValue();
10011
10012 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
10013 : APInt(64, 0x0218A392CD3D5DBFULL);
10014 MachinePointerInfo PtrInfo =
10016 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
10017 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
10018 SDValue Lookup = DAG.getNode(
10019 ISD::SRL, DL, VT,
10020 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
10021 DAG.getConstant(DeBruijn, DL, VT)),
10022 DAG.getShiftAmountConstant(ShiftAmt, VT, DL));
10024
10026 for (unsigned i = 0; i < BitWidth; i++) {
10027 APInt Shl = DeBruijn.shl(i);
10028 APInt Lshr = Shl.lshr(ShiftAmt);
10029 Table[Lshr.getZExtValue()] = i;
10030 }
10031
10032 // Create a ConstantArray in Constant Pool
10033 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
10034 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
10035 TD.getPrefTypeAlign(CA->getType()));
10036 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
10037 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
10038 PtrInfo, MVT::i8);
10039 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
10040 return ExtLoad;
10041
10042 EVT SetCCVT =
10043 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10044 SDValue Zero = DAG.getConstant(0, DL, VT);
10045 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
10046 return DAG.getSelect(DL, VT, SrcIsZero,
10047 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
10048}
10049
10051 SDLoc dl(Node);
10052 EVT VT = Node->getValueType(0);
10053 SDValue Op = Node->getOperand(0);
10054 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10055
10056 // If the non-ZERO_UNDEF version is supported we can use that instead.
10057 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
10059 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
10060
10061 // If the ZERO_UNDEF version is supported use that and handle the zero case.
10063 EVT SetCCVT =
10064 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10065 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
10066 SDValue Zero = DAG.getConstant(0, dl, VT);
10067 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
10068 return DAG.getSelect(dl, VT, SrcIsZero,
10069 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
10070 }
10071
10072 // Only expand vector types if we have the appropriate vector bit operations.
10073 // This includes the operations needed to expand CTPOP if it isn't supported.
10074 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
10077 !canExpandVectorCTPOP(*this, VT)) ||
10081 return SDValue();
10082
10083 // Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
10084 // to be expanded or converted to a libcall.
10087 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
10088 return V;
10089
10090 // for now, we use: { return popcount(~x & (x - 1)); }
10091 // unless the target has ctlz but not ctpop, in which case we use:
10092 // { return 32 - nlz(~x & (x-1)); }
10093 // Ref: "Hacker's Delight" by Henry Warren
10094 SDValue Tmp = DAG.getNode(
10095 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
10096 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
10097
10098 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
10100 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
10101 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
10102 }
10103
10104 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
10105}
10106
10108 SDValue Op = Node->getOperand(0);
10109 SDValue Mask = Node->getOperand(1);
10110 SDValue VL = Node->getOperand(2);
10111 SDLoc dl(Node);
10112 EVT VT = Node->getValueType(0);
10113
10114 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
10115 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
10116 DAG.getAllOnesConstant(dl, VT), Mask, VL);
10117 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
10118 DAG.getConstant(1, dl, VT), Mask, VL);
10119 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
10120 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
10121}
10122
10124 SelectionDAG &DAG) const {
10125 // %cond = to_bool_vec %source
10126 // %splat = splat /*val=*/VL
10127 // %tz = step_vector
10128 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
10129 // %r = vp.reduce.umin %v
10130 SDLoc DL(N);
10131 SDValue Source = N->getOperand(0);
10132 SDValue Mask = N->getOperand(1);
10133 SDValue EVL = N->getOperand(2);
10134 EVT SrcVT = Source.getValueType();
10135 EVT ResVT = N->getValueType(0);
10136 EVT ResVecVT =
10137 EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
10138
10139 // Convert to boolean vector.
10140 if (SrcVT.getScalarType() != MVT::i1) {
10141 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
10142 SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
10143 SrcVT.getVectorElementCount());
10144 Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
10145 DAG.getCondCode(ISD::SETNE), Mask, EVL);
10146 }
10147
10148 SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
10149 SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
10150 SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
10151 SDValue Select =
10152 DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
10153 return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
10154}
10155
10156/// Returns a type-legalized version of \p Mask as the first item in the
10157/// pair. The second item contains a type-legalized step vector that's
10158/// guaranteed to fit the number of elements in \p Mask.
10159/// If the stepvector would require splitting, returns an empty SDValue
10160/// as the second item to signal that the operation should be split instead.
10161static std::pair<SDValue, SDValue>
10163 SelectionDAG &DAG) {
10164 EVT MaskVT = Mask.getValueType();
10165 EVT BoolVT = MaskVT.getScalarType();
10166
10167 // Find a suitable type for a stepvector.
10168 // If zero is poison, we can assume the upper limit of the result is VF-1.
10169 ConstantRange VScaleRange(1, /*isFullSet=*/true); // Fixed length default.
10170 if (MaskVT.isScalableVector())
10171 VScaleRange = getVScaleRange(&DAG.getMachineFunction().getFunction(), 64);
10172 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10173 uint64_t EltWidth = TLI.getBitWidthForCttzElements(
10174 EVT(TLI.getVectorIdxTy(DAG.getDataLayout())),
10175 MaskVT.getVectorElementCount(), ZeroIsPoison, &VScaleRange);
10176 // If the step vector element type is smaller than the mask element type,
10177 // use the mask type directly to avoid widening issues.
10178 EltWidth = std::max(EltWidth, BoolVT.getFixedSizeInBits());
10179 EVT StepVT = MVT::getIntegerVT(EltWidth);
10180 EVT StepVecVT = MaskVT.changeVectorElementType(*DAG.getContext(), StepVT);
10181
10182 // If promotion or widening is required to make the type legal, do it here.
10183 // Promotion of integers within LegalizeVectorOps is looking for types of
10184 // the same size but with a smaller number of larger elements, not the usual
10185 // larger size with the same number of larger elements.
10187 TLI.getTypeAction(*DAG.getContext(), StepVecVT);
10188 SDValue StepVec;
10189 if (TypeAction == TargetLowering::TypePromoteInteger) {
10190 StepVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
10191 StepVec = DAG.getStepVector(DL, StepVecVT);
10192 } else if (TypeAction == TargetLowering::TypeWidenVector) {
10193 // For widening, the element count changes. Create a step vector with only
10194 // the original elements valid and zeros for padding. Also widen the mask.
10195 EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
10196 unsigned WideNumElts = WideVecVT.getVectorNumElements();
10197
10198 // Build widened step vector: <0, 1, ..., OrigNumElts-1, poison, poison, ..>
10199 SDValue OrigStepVec = DAG.getStepVector(DL, StepVecVT);
10200 SDValue UndefStep = DAG.getPOISON(WideVecVT);
10201 StepVec = DAG.getInsertSubvector(DL, UndefStep, OrigStepVec, 0);
10202
10203 // Widen mask: pad with zeros.
10204 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), BoolVT, WideNumElts);
10205 SDValue ZeroMask = DAG.getConstant(0, DL, WideMaskVT);
10206 Mask = DAG.getInsertSubvector(DL, ZeroMask, Mask, 0);
10207 } else if (TypeAction == TargetLowering::TypeSplitVector) {
10208 // The stepvector type would require splitting. Signal to the caller
10209 // that the operation should be split instead of expanded.
10210 return {Mask, SDValue()};
10211 } else {
10212 StepVec = DAG.getStepVector(DL, StepVecVT);
10213 }
10214
10215 return {Mask, StepVec};
10216}
10217
10219 SelectionDAG &DAG) const {
10220 SDLoc DL(N);
10221 auto [Mask, StepVec] = getLegalMaskAndStepVector(
10222 N->getOperand(0), /*ZeroIsPoison=*/true, DL, DAG);
10223
10224 // If StepVec is empty, the stepvector would require splitting.
10225 // Split the operation instead and let it be recursively legalized.
10226 if (!StepVec) {
10227 EVT MaskVT = N->getOperand(0).getValueType();
10228 EVT ResVT = N->getValueType(0);
10229
10230 // Split the mask
10231 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(MaskVT);
10232 auto [MaskLo, MaskHi] = DAG.SplitVector(N->getOperand(0), DL);
10233
10234 // Create split VECTOR_FIND_LAST_ACTIVE operations
10235 SDValue LoResult =
10236 DAG.getNode(ISD::VECTOR_FIND_LAST_ACTIVE, DL, ResVT, MaskLo);
10237 SDValue HiResult =
10238 DAG.getNode(ISD::VECTOR_FIND_LAST_ACTIVE, DL, ResVT, MaskHi);
10239
10240 // Check if any lane is active in the high mask.
10241 SDValue AnyHiActive = DAG.getNode(ISD::VECREDUCE_OR, DL, MVT::i1, MaskHi);
10243 AnyHiActive, DL,
10244 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i1),
10245 MVT::i1);
10246
10247 // Adjust HiResult by adding the number of elements in Lo
10248 SDValue LoNumElts =
10249 DAG.getElementCount(DL, ResVT, LoVT.getVectorElementCount());
10250 SDValue AdjustedHiResult =
10251 DAG.getNode(ISD::ADD, DL, ResVT, HiResult, LoNumElts);
10252
10253 // Return: AnyHiActive ? AdjustedHiResult : LoResult;
10254 return DAG.getNode(ISD::SELECT, DL, ResVT, Cond, AdjustedHiResult,
10255 LoResult);
10256 }
10257
10258 EVT StepVecVT = StepVec.getValueType();
10259 EVT StepVT = StepVec.getValueType().getVectorElementType();
10260
10261 // Zero out lanes with inactive elements, then find the highest remaining
10262 // value from the stepvector.
10263 SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT);
10264 SDValue ActiveElts = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
10265 SDValue HighestIdx = DAG.getNode(ISD::VECREDUCE_UMAX, DL, StepVT, ActiveElts);
10266 return DAG.getZExtOrTrunc(HighestIdx, DL, N->getValueType(0));
10267}
10268
10270 bool IsNegative) const {
10271 SDLoc dl(N);
10272 EVT VT = N->getValueType(0);
10273 SDValue Op = N->getOperand(0);
10274
10275 // abs(x) -> smax(x,sub(0,x))
10276 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
10278 SDValue Zero = DAG.getConstant(0, dl, VT);
10279 Op = DAG.getFreeze(Op);
10280 return DAG.getNode(ISD::SMAX, dl, VT, Op,
10281 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
10282 }
10283
10284 // abs(x) -> umin(x,sub(0,x))
10285 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
10287 SDValue Zero = DAG.getConstant(0, dl, VT);
10288 Op = DAG.getFreeze(Op);
10289 return DAG.getNode(ISD::UMIN, dl, VT, Op,
10290 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
10291 }
10292
10293 // 0 - abs(x) -> smin(x, sub(0,x))
10294 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
10296 SDValue Zero = DAG.getConstant(0, dl, VT);
10297 Op = DAG.getFreeze(Op);
10298 return DAG.getNode(ISD::SMIN, dl, VT, Op,
10299 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
10300 }
10301
10302 // Only expand vector types if we have the appropriate vector operations.
10303 if (VT.isVector() &&
10305 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
10306 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
10308 return SDValue();
10309
10310 Op = DAG.getFreeze(Op);
10311 SDValue Shift = DAG.getNode(
10312 ISD::SRA, dl, VT, Op,
10313 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
10314 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
10315
10316 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
10317 if (!IsNegative)
10318 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
10319
10320 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
10321 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
10322}
10323
10325 SDLoc dl(N);
10326 EVT VT = N->getValueType(0);
10327 SDValue LHS = N->getOperand(0);
10328 SDValue RHS = N->getOperand(1);
10329 bool IsSigned = N->getOpcode() == ISD::ABDS;
10330
10331 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
10332 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
10333 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
10334 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
10335 if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
10336 LHS = DAG.getFreeze(LHS);
10337 RHS = DAG.getFreeze(RHS);
10338 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
10339 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
10340 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
10341 }
10342
10343 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
10344 if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT)) {
10345 LHS = DAG.getFreeze(LHS);
10346 RHS = DAG.getFreeze(RHS);
10347 return DAG.getNode(ISD::OR, dl, VT,
10348 DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
10349 DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
10350 }
10351
10352 // If the subtract doesn't overflow then just use abs(sub())
10353 bool IsNonNegative = DAG.SignBitIsZero(LHS) && DAG.SignBitIsZero(RHS);
10354
10355 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, LHS, RHS))
10356 return DAG.getNode(ISD::ABS, dl, VT,
10357 DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
10358
10359 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, RHS, LHS))
10360 return DAG.getNode(ISD::ABS, dl, VT,
10361 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
10362
10363 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10365 LHS = DAG.getFreeze(LHS);
10366 RHS = DAG.getFreeze(RHS);
10367 SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
10368
10369 // Branchless expansion iff cmp result is allbits:
10370 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
10371 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
10372 if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10373 SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
10374 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
10375 return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
10376 }
10377
10378 // Similar to the branchless expansion, if we don't prefer selects, use the
10379 // (sign-extended) usubo overflow flag if the (scalar) type is illegal as this
10380 // is more likely to legalize cleanly: abdu(lhs, rhs) -> sub(xor(sub(lhs,
10381 // rhs), uof(lhs, rhs)), uof(lhs, rhs))
10382 if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT) &&
10384 SDValue USubO =
10385 DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
10386 SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
10387 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
10388 return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
10389 }
10390
10391 // FIXME: Should really try to split the vector in case it's legal on a
10392 // subvector.
10394 return DAG.UnrollVectorOp(N);
10395
10396 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10397 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10398 return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
10399 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
10400}
10401
10403 SDLoc dl(N);
10404 EVT VT = N->getValueType(0);
10405 SDValue LHS = N->getOperand(0);
10406 SDValue RHS = N->getOperand(1);
10407
10408 unsigned Opc = N->getOpcode();
10409 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
10410 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
10411 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
10412 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
10413 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
10414 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10416 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
10417 "Unknown AVG node");
10418
10419 // If the operands are already extended, we can add+shift.
10420 bool IsExt =
10421 (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
10422 DAG.ComputeNumSignBits(RHS) >= 2) ||
10423 (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
10424 DAG.computeKnownBits(RHS).countMinLeadingZeros() >= 1);
10425 if (IsExt) {
10426 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
10427 if (!IsFloor)
10428 Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
10429 return DAG.getNode(ShiftOpc, dl, VT, Sum,
10430 DAG.getShiftAmountConstant(1, VT, dl));
10431 }
10432
10433 // For scalars, see if we can efficiently extend/truncate to use add+shift.
10434 if (VT.isScalarInteger()) {
10435 EVT ExtVT = VT.widenIntegerElementType(*DAG.getContext());
10436 if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
10437 LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
10438 RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
10439 SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
10440 if (!IsFloor)
10441 Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
10442 DAG.getConstant(1, dl, ExtVT));
10443 // Just use SRL as we will be truncating away the extended sign bits.
10444 Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
10445 DAG.getShiftAmountConstant(1, ExtVT, dl));
10446 return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
10447 }
10448 }
10449
10450 // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
10451 if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT) &&
10454 SDValue UAddWithOverflow =
10455 DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
10456
10457 SDValue Sum = UAddWithOverflow.getValue(0);
10458 SDValue Overflow = UAddWithOverflow.getValue(1);
10459
10460 // Right shift the sum by 1
10461 SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,
10462 DAG.getShiftAmountConstant(1, VT, dl));
10463
10464 SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
10465 SDValue OverflowShl = DAG.getNode(
10466 ISD::SHL, dl, VT, ZeroExtOverflow,
10467 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
10468
10469 return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
10470 }
10471
10472 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
10473 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
10474 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
10475 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
10476 LHS = DAG.getFreeze(LHS);
10477 RHS = DAG.getFreeze(RHS);
10478 SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
10479 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
10480 SDValue Shift =
10481 DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
10482 return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
10483}
10484
10486 SDLoc dl(N);
10487 EVT VT = N->getValueType(0);
10488 SDValue Op = N->getOperand(0);
10489
10490 if (!VT.isSimple())
10491 return SDValue();
10492
10493 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10494 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
10495 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
10496 default:
10497 return SDValue();
10498 case MVT::i16:
10499 // Use a rotate by 8. This can be further expanded if necessary.
10500 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10501 case MVT::i32:
10502 // This is meant for ARM speficially, which has ROTR but no ROTL.
10504 SDValue Mask = DAG.getConstant(0x00FF00FF, dl, VT);
10505 // (x & 0x00FF00FF) rotr 8 | (x rotl 8) & 0x00FF00FF
10506 SDValue And = DAG.getNode(ISD::AND, dl, VT, Op, Mask);
10507 SDValue Rotr =
10508 DAG.getNode(ISD::ROTR, dl, VT, And, DAG.getConstant(8, dl, SHVT));
10509 SDValue Rotl =
10510 DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10511 SDValue And2 = DAG.getNode(ISD::AND, dl, VT, Rotl, Mask);
10512 return DAG.getNode(ISD::OR, dl, VT, Rotr, And2);
10513 }
10514 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10515 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
10516 DAG.getConstant(0xFF00, dl, VT));
10517 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
10518 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10519 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
10520 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10521 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
10522 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
10523 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
10524 case MVT::i64:
10525 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
10526 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
10527 DAG.getConstant(255ULL<<8, dl, VT));
10528 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
10529 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
10530 DAG.getConstant(255ULL<<16, dl, VT));
10531 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
10532 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
10533 DAG.getConstant(255ULL<<24, dl, VT));
10534 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
10535 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10536 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
10537 DAG.getConstant(255ULL<<24, dl, VT));
10538 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10539 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
10540 DAG.getConstant(255ULL<<16, dl, VT));
10541 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
10542 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
10543 DAG.getConstant(255ULL<<8, dl, VT));
10544 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
10545 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
10546 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
10547 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
10548 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
10549 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
10550 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
10551 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
10552 }
10553}
10554
10556 SDLoc dl(N);
10557 EVT VT = N->getValueType(0);
10558 SDValue Op = N->getOperand(0);
10559 SDValue Mask = N->getOperand(1);
10560 SDValue EVL = N->getOperand(2);
10561
10562 if (!VT.isSimple())
10563 return SDValue();
10564
10565 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10566 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
10567 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
10568 default:
10569 return SDValue();
10570 case MVT::i16:
10571 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10572 Mask, EVL);
10573 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10574 Mask, EVL);
10575 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
10576 case MVT::i32:
10577 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10578 Mask, EVL);
10579 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
10580 Mask, EVL);
10581 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
10582 Mask, EVL);
10583 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10584 Mask, EVL);
10585 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10586 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
10587 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10588 Mask, EVL);
10589 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
10590 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
10591 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
10592 case MVT::i64:
10593 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
10594 Mask, EVL);
10595 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10596 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
10597 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
10598 Mask, EVL);
10599 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10600 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
10601 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
10602 Mask, EVL);
10603 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10604 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
10605 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
10606 Mask, EVL);
10607 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10608 Mask, EVL);
10609 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
10610 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
10611 Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10612 Mask, EVL);
10613 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
10614 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
10615 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
10616 Mask, EVL);
10617 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10618 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
10619 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
10620 Mask, EVL);
10621 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
10622 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
10623 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
10624 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
10625 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
10626 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
10627 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
10628 }
10629}
10630
10632 SDLoc dl(N);
10633 EVT VT = N->getValueType(0);
10634 SDValue Op = N->getOperand(0);
10635 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10636 unsigned Sz = VT.getScalarSizeInBits();
10637
10638 SDValue Tmp, Tmp2, Tmp3;
10639
10640 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10641 // and finally the i1 pairs.
10642 // TODO: We can easily support i4/i2 legal types if any target ever does.
10643 if (Sz >= 8 && isPowerOf2_32(Sz)) {
10644 // Create the masks - repeating the pattern every byte.
10645 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
10646 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
10647 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
10648
10649 // BSWAP if the type is wider than a single byte.
10650 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
10651
10652 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10653 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
10654 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
10655 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
10656 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
10657 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10658
10659 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10660 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
10661 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
10662 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
10663 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
10664 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10665
10666 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10667 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
10668 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
10669 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
10670 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
10671 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10672 return Tmp;
10673 }
10674
10675 Tmp = DAG.getConstant(0, dl, VT);
10676 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
10677 if (I < J)
10678 Tmp2 =
10679 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
10680 else
10681 Tmp2 =
10682 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
10683
10684 APInt Shift = APInt::getOneBitSet(Sz, J);
10685 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
10686 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
10687 }
10688
10689 return Tmp;
10690}
10691
10693 assert(N->getOpcode() == ISD::VP_BITREVERSE);
10694
10695 SDLoc dl(N);
10696 EVT VT = N->getValueType(0);
10697 SDValue Op = N->getOperand(0);
10698 SDValue Mask = N->getOperand(1);
10699 SDValue EVL = N->getOperand(2);
10700 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10701 unsigned Sz = VT.getScalarSizeInBits();
10702
10703 SDValue Tmp, Tmp2, Tmp3;
10704
10705 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10706 // and finally the i1 pairs.
10707 // TODO: We can easily support i4/i2 legal types if any target ever does.
10708 if (Sz >= 8 && isPowerOf2_32(Sz)) {
10709 // Create the masks - repeating the pattern every byte.
10710 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
10711 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
10712 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
10713
10714 // BSWAP if the type is wider than a single byte.
10715 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
10716
10717 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10718 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
10719 Mask, EVL);
10720 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10721 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
10722 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
10723 Mask, EVL);
10724 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
10725 Mask, EVL);
10726 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10727
10728 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10729 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
10730 Mask, EVL);
10731 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10732 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
10733 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
10734 Mask, EVL);
10735 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
10736 Mask, EVL);
10737 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10738
10739 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10740 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
10741 Mask, EVL);
10742 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10743 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
10744 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
10745 Mask, EVL);
10746 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
10747 Mask, EVL);
10748 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10749 return Tmp;
10750 }
10751 return SDValue();
10752}
10753
10754std::pair<SDValue, SDValue>
10756 SelectionDAG &DAG) const {
10757 SDLoc SL(LD);
10758 SDValue Chain = LD->getChain();
10759 SDValue BasePTR = LD->getBasePtr();
10760 EVT SrcVT = LD->getMemoryVT();
10761 EVT DstVT = LD->getValueType(0);
10762 ISD::LoadExtType ExtType = LD->getExtensionType();
10763
10764 if (SrcVT.isScalableVector())
10765 report_fatal_error("Cannot scalarize scalable vector loads");
10766
10767 unsigned NumElem = SrcVT.getVectorNumElements();
10768
10769 EVT SrcEltVT = SrcVT.getScalarType();
10770 EVT DstEltVT = DstVT.getScalarType();
10771
10772 // A vector must always be stored in memory as-is, i.e. without any padding
10773 // between the elements, since various code depend on it, e.g. in the
10774 // handling of a bitcast of a vector type to int, which may be done with a
10775 // vector store followed by an integer load. A vector that does not have
10776 // elements that are byte-sized must therefore be stored as an integer
10777 // built out of the extracted vector elements.
10778 if (!SrcEltVT.isByteSized()) {
10779 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
10780 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
10781
10782 unsigned NumSrcBits = SrcVT.getSizeInBits();
10783 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
10784
10785 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
10786 SDValue SrcEltBitMask = DAG.getConstant(
10787 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
10788
10789 // Load the whole vector and avoid masking off the top bits as it makes
10790 // the codegen worse.
10791 SDValue Load =
10792 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
10793 LD->getPointerInfo(), SrcIntVT, LD->getBaseAlign(),
10794 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10795
10797 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10798 unsigned ShiftIntoIdx =
10799 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10800 SDValue ShiftAmount = DAG.getShiftAmountConstant(
10801 ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
10802 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
10803 SDValue Elt =
10804 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
10805 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
10806
10807 if (ExtType != ISD::NON_EXTLOAD) {
10808 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
10809 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
10810 }
10811
10812 Vals.push_back(Scalar);
10813 }
10814
10815 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10816 return std::make_pair(Value, Load.getValue(1));
10817 }
10818
10819 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
10820 assert(SrcEltVT.isByteSized());
10821
10823 SmallVector<SDValue, 8> LoadChains;
10824
10825 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10826 SDValue ScalarLoad = DAG.getExtLoad(
10827 ExtType, SL, DstEltVT, Chain, BasePTR,
10828 LD->getPointerInfo().getWithOffset(Idx * Stride), SrcEltVT,
10829 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
10830
10831 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
10832
10833 Vals.push_back(ScalarLoad.getValue(0));
10834 LoadChains.push_back(ScalarLoad.getValue(1));
10835 }
10836
10837 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
10838 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10839
10840 return std::make_pair(Value, NewChain);
10841}
10842
10844 SelectionDAG &DAG) const {
10845 SDLoc SL(ST);
10846
10847 SDValue Chain = ST->getChain();
10848 SDValue BasePtr = ST->getBasePtr();
10849 SDValue Value = ST->getValue();
10850 EVT StVT = ST->getMemoryVT();
10851
10852 if (StVT.isScalableVector())
10853 report_fatal_error("Cannot scalarize scalable vector stores");
10854
10855 // The type of the data we want to save
10856 EVT RegVT = Value.getValueType();
10857 EVT RegSclVT = RegVT.getScalarType();
10858
10859 // The type of data as saved in memory.
10860 EVT MemSclVT = StVT.getScalarType();
10861
10862 unsigned NumElem = StVT.getVectorNumElements();
10863
10864 // A vector must always be stored in memory as-is, i.e. without any padding
10865 // between the elements, since various code depend on it, e.g. in the
10866 // handling of a bitcast of a vector type to int, which may be done with a
10867 // vector store followed by an integer load. A vector that does not have
10868 // elements that are byte-sized must therefore be stored as an integer
10869 // built out of the extracted vector elements.
10870 if (!MemSclVT.isByteSized()) {
10871 unsigned NumBits = StVT.getSizeInBits();
10872 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
10873
10874 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
10875
10876 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10877 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
10878 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
10879 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
10880 unsigned ShiftIntoIdx =
10881 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10882 SDValue ShiftAmount =
10883 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
10884 SDValue ShiftedElt =
10885 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
10886 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
10887 }
10888
10889 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
10890 ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
10891 ST->getAAInfo());
10892 }
10893
10894 // Store Stride in bytes
10895 unsigned Stride = MemSclVT.getSizeInBits() / 8;
10896 assert(Stride && "Zero stride!");
10897 // Extract each of the elements from the original vector and save them into
10898 // memory individually.
10900 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10901 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
10902
10903 SDValue Ptr =
10904 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
10905
10906 // This scalar TruncStore may be illegal, but we legalize it later.
10907 SDValue Store = DAG.getTruncStore(
10908 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
10909 MemSclVT, ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
10910 ST->getAAInfo());
10911
10912 Stores.push_back(Store);
10913 }
10914
10915 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
10916}
10917
10918std::pair<SDValue, SDValue>
10920 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
10921 "unaligned indexed loads not implemented!");
10922 SDValue Chain = LD->getChain();
10923 SDValue Ptr = LD->getBasePtr();
10924 EVT VT = LD->getValueType(0);
10925 EVT LoadedVT = LD->getMemoryVT();
10926 SDLoc dl(LD);
10927 auto &MF = DAG.getMachineFunction();
10928
10929 if (VT.isFloatingPoint() || VT.isVector()) {
10930 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
10931 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
10932 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
10933 LoadedVT.isVector()) {
10934 // Scalarize the load and let the individual components be handled.
10935 return scalarizeVectorLoad(LD, DAG);
10936 }
10937
10938 // Expand to a (misaligned) integer load of the same size,
10939 // then bitconvert to floating point or vector.
10940 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
10941 LD->getMemOperand());
10942 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
10943 if (LoadedVT != VT)
10944 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
10945 ISD::ANY_EXTEND, dl, VT, Result);
10946
10947 return std::make_pair(Result, newLoad.getValue(1));
10948 }
10949
10950 // Copy the value to a (aligned) stack slot using (unaligned) integer
10951 // loads and stores, then do a (aligned) load from the stack slot.
10952 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
10953 unsigned LoadedBytes = LoadedVT.getStoreSize();
10954 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10955 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
10956
10957 // Make sure the stack slot is also aligned for the register type.
10958 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
10959 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
10961 SDValue StackPtr = StackBase;
10962 unsigned Offset = 0;
10963
10964 EVT PtrVT = Ptr.getValueType();
10965 EVT StackPtrVT = StackPtr.getValueType();
10966
10967 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10968 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10969
10970 // Do all but one copies using the full register width.
10971 for (unsigned i = 1; i < NumRegs; i++) {
10972 // Load one integer register's worth from the original location.
10973 SDValue Load = DAG.getLoad(
10974 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
10975 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
10976 // Follow the load with a store to the stack slot. Remember the store.
10977 Stores.push_back(DAG.getStore(
10978 Load.getValue(1), dl, Load, StackPtr,
10979 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
10980 // Increment the pointers.
10981 Offset += RegBytes;
10982
10983 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10984 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10985 }
10986
10987 // The last copy may be partial. Do an extending load.
10988 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
10989 8 * (LoadedBytes - Offset));
10990 SDValue Load = DAG.getExtLoad(
10991 ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
10992 LD->getPointerInfo().getWithOffset(Offset), MemVT, LD->getBaseAlign(),
10993 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10994 // Follow the load with a store to the stack slot. Remember the store.
10995 // On big-endian machines this requires a truncating store to ensure
10996 // that the bits end up in the right place.
10997 Stores.push_back(DAG.getTruncStore(
10998 Load.getValue(1), dl, Load, StackPtr,
10999 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
11000
11001 // The order of the stores doesn't matter - say it with a TokenFactor.
11002 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
11003
11004 // Finally, perform the original load only redirected to the stack slot.
11005 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
11006 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
11007 LoadedVT);
11008
11009 // Callers expect a MERGE_VALUES node.
11010 return std::make_pair(Load, TF);
11011 }
11012
11013 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
11014 "Unaligned load of unsupported type.");
11015
11016 // Compute the new VT that is half the size of the old one. This is an
11017 // integer MVT.
11018 unsigned NumBits = LoadedVT.getSizeInBits();
11019 EVT NewLoadedVT;
11020 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
11021 NumBits >>= 1;
11022
11023 Align Alignment = LD->getBaseAlign();
11024 unsigned IncrementSize = NumBits / 8;
11025 ISD::LoadExtType HiExtType = LD->getExtensionType();
11026
11027 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
11028 if (HiExtType == ISD::NON_EXTLOAD)
11029 HiExtType = ISD::ZEXTLOAD;
11030
11031 // Load the value in two parts
11032 SDValue Lo, Hi;
11033 if (DAG.getDataLayout().isLittleEndian()) {
11034 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
11035 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
11036 LD->getAAInfo());
11037
11038 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
11039 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
11040 LD->getPointerInfo().getWithOffset(IncrementSize),
11041 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
11042 LD->getAAInfo());
11043 } else {
11044 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
11045 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
11046 LD->getAAInfo());
11047
11048 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
11049 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
11050 LD->getPointerInfo().getWithOffset(IncrementSize),
11051 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
11052 LD->getAAInfo());
11053 }
11054
11055 // aggregate the two parts
11056 SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
11057 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
11058 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
11059
11060 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
11061 Hi.getValue(1));
11062
11063 return std::make_pair(Result, TF);
11064}
11065
11067 SelectionDAG &DAG) const {
11068 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
11069 "unaligned indexed stores not implemented!");
11070 SDValue Chain = ST->getChain();
11071 SDValue Ptr = ST->getBasePtr();
11072 SDValue Val = ST->getValue();
11073 EVT VT = Val.getValueType();
11074 Align Alignment = ST->getBaseAlign();
11075 auto &MF = DAG.getMachineFunction();
11076 EVT StoreMemVT = ST->getMemoryVT();
11077
11078 SDLoc dl(ST);
11079 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
11080 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
11081 if (isTypeLegal(intVT)) {
11082 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
11083 StoreMemVT.isVector()) {
11084 // Scalarize the store and let the individual components be handled.
11085 SDValue Result = scalarizeVectorStore(ST, DAG);
11086 return Result;
11087 }
11088 // Expand to a bitconvert of the value to the integer type of the
11089 // same size, then a (misaligned) int store.
11090 // FIXME: Does not handle truncating floating point stores!
11091 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
11092 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
11093 Alignment, ST->getMemOperand()->getFlags());
11094 return Result;
11095 }
11096 // Do a (aligned) store to a stack slot, then copy from the stack slot
11097 // to the final destination using (unaligned) integer loads and stores.
11098 MVT RegVT = getRegisterType(
11099 *DAG.getContext(),
11100 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
11101 EVT PtrVT = Ptr.getValueType();
11102 unsigned StoredBytes = StoreMemVT.getStoreSize();
11103 unsigned RegBytes = RegVT.getSizeInBits() / 8;
11104 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
11105
11106 // Make sure the stack slot is also aligned for the register type.
11107 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
11108 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11109
11110 // Perform the original store, only redirected to the stack slot.
11111 SDValue Store = DAG.getTruncStore(
11112 Chain, dl, Val, StackPtr,
11113 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
11114
11115 EVT StackPtrVT = StackPtr.getValueType();
11116
11117 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
11118 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
11120 unsigned Offset = 0;
11121
11122 // Do all but one copies using the full register width.
11123 for (unsigned i = 1; i < NumRegs; i++) {
11124 // Load one integer register's worth from the stack slot.
11125 SDValue Load = DAG.getLoad(
11126 RegVT, dl, Store, StackPtr,
11127 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
11128 // Store it to the final location. Remember the store.
11129 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
11130 ST->getPointerInfo().getWithOffset(Offset),
11131 ST->getBaseAlign(),
11132 ST->getMemOperand()->getFlags()));
11133 // Increment the pointers.
11134 Offset += RegBytes;
11135 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
11136 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
11137 }
11138
11139 // The last store may be partial. Do a truncating store. On big-endian
11140 // machines this requires an extending load from the stack slot to ensure
11141 // that the bits are in the right place.
11142 EVT LoadMemVT =
11143 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
11144
11145 // Load from the stack slot.
11146 SDValue Load = DAG.getExtLoad(
11147 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
11148 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
11149
11150 Stores.push_back(DAG.getTruncStore(
11151 Load.getValue(1), dl, Load, Ptr,
11152 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
11153 ST->getBaseAlign(), ST->getMemOperand()->getFlags(), ST->getAAInfo()));
11154 // The order of the stores doesn't matter - say it with a TokenFactor.
11155 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
11156 return Result;
11157 }
11158
11159 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
11160 "Unaligned store of unknown type.");
11161 // Get the half-size VT
11162 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
11163 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
11164 unsigned IncrementSize = NumBits / 8;
11165
11166 // Divide the stored value in two parts.
11167 SDValue ShiftAmount =
11168 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
11169 SDValue Lo = Val;
11170 // If Val is a constant, replace the upper bits with 0. The SRL will constant
11171 // fold and not use the upper bits. A smaller constant may be easier to
11172 // materialize.
11173 if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
11174 Lo = DAG.getNode(
11175 ISD::AND, dl, VT, Lo,
11176 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
11177 VT));
11178 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
11179
11180 // Store the two parts
11181 SDValue Store1, Store2;
11182 Store1 = DAG.getTruncStore(Chain, dl,
11183 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
11184 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
11185 ST->getMemOperand()->getFlags());
11186
11187 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
11188 Store2 = DAG.getTruncStore(
11189 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
11190 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
11191 ST->getMemOperand()->getFlags(), ST->getAAInfo());
11192
11193 SDValue Result =
11194 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
11195 return Result;
11196}
11197
11198SDValue
11200 const SDLoc &DL, EVT DataVT,
11201 SelectionDAG &DAG,
11202 bool IsCompressedMemory) const {
11204 EVT AddrVT = Addr.getValueType();
11205 EVT MaskVT = Mask.getValueType();
11206 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
11207 "Incompatible types of Data and Mask");
11208 if (IsCompressedMemory) {
11209 // Incrementing the pointer according to number of '1's in the mask.
11210 if (DataVT.isScalableVector()) {
11211 EVT MaskExtVT = MaskVT.changeElementType(*DAG.getContext(), MVT::i32);
11212 SDValue MaskExt = DAG.getNode(ISD::ZERO_EXTEND, DL, MaskExtVT, Mask);
11213 Increment = DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, MaskExt);
11214 } else {
11215 EVT MaskIntVT =
11216 EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
11217 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
11218 if (MaskIntVT.getSizeInBits() < 32) {
11219 MaskInIntReg =
11220 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
11221 MaskIntVT = MVT::i32;
11222 }
11223 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
11224 }
11225 // Scale is an element size in bytes.
11226 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
11227 AddrVT);
11228 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
11229 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
11230 } else
11231 Increment = DAG.getTypeSize(DL, AddrVT, DataVT.getStoreSize());
11232
11233 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
11234}
11235
11237 EVT VecVT, const SDLoc &dl,
11238 ElementCount SubEC) {
11239 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
11240 "Cannot index a scalable vector within a fixed-width vector");
11241
11242 unsigned NElts = VecVT.getVectorMinNumElements();
11243 unsigned NumSubElts = SubEC.getKnownMinValue();
11244 EVT IdxVT = Idx.getValueType();
11245
11246 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
11247 // If this is a constant index and we know the value plus the number of the
11248 // elements in the subvector minus one is less than the minimum number of
11249 // elements then it's safe to return Idx.
11250 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
11251 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
11252 return Idx;
11253 SDValue VS =
11254 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
11255 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
11256 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
11257 DAG.getConstant(NumSubElts, dl, IdxVT));
11258 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
11259 }
11260 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
11261 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
11262 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
11263 DAG.getConstant(Imm, dl, IdxVT));
11264 }
11265 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
11266 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
11267 DAG.getConstant(MaxIndex, dl, IdxVT));
11268}
11269
11270SDValue
11272 EVT VecVT, SDValue Index,
11273 const SDNodeFlags PtrArithFlags) const {
11275 DAG, VecPtr, VecVT,
11277 Index, PtrArithFlags);
11278}
11279
11280SDValue
11282 EVT VecVT, EVT SubVecVT, SDValue Index,
11283 const SDNodeFlags PtrArithFlags) const {
11284 SDLoc dl(Index);
11285 // Make sure the index type is big enough to compute in.
11286 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
11287
11288 EVT EltVT = VecVT.getVectorElementType();
11289
11290 // Calculate the element offset and add it to the pointer.
11291 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
11292 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
11293 "Converting bits to bytes lost precision");
11294 assert(SubVecVT.getVectorElementType() == EltVT &&
11295 "Sub-vector must be a vector with matching element type");
11296 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
11297 SubVecVT.getVectorElementCount());
11298
11299 EVT IdxVT = Index.getValueType();
11300 if (SubVecVT.isScalableVector())
11301 Index =
11302 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
11303 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
11304
11305 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
11306 DAG.getConstant(EltSize, dl, IdxVT));
11307 return DAG.getMemBasePlusOffset(VecPtr, Index, dl, PtrArithFlags);
11308}
11309
11310//===----------------------------------------------------------------------===//
11311// Implementation of Emulated TLS Model
11312//===----------------------------------------------------------------------===//
11313
11315 SelectionDAG &DAG) const {
11316 // Access to address of TLS varialbe xyz is lowered to a function call:
11317 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
11318 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11319 PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
11320 SDLoc dl(GA);
11321
11322 ArgListTy Args;
11323 const GlobalValue *GV =
11325 SmallString<32> NameString("__emutls_v.");
11326 NameString += GV->getName();
11327 StringRef EmuTlsVarName(NameString);
11328 const GlobalVariable *EmuTlsVar =
11329 GV->getParent()->getNamedGlobal(EmuTlsVarName);
11330 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
11331 Args.emplace_back(DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT), VoidPtrType);
11332
11333 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
11334
11336 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
11337 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
11338 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
11339
11340 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
11341 // At last for X86 targets, maybe good for other targets too?
11343 MFI.setAdjustsStack(true); // Is this only for X86 target?
11344 MFI.setHasCalls(true);
11345
11346 assert((GA->getOffset() == 0) &&
11347 "Emulated TLS must have zero offset in GlobalAddressSDNode");
11348 return CallResult.first;
11349}
11350
11352 SelectionDAG &DAG) const {
11353 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
11354 if (!isCtlzFast())
11355 return SDValue();
11356 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11357 SDLoc dl(Op);
11358 if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
11359 EVT VT = Op.getOperand(0).getValueType();
11360 SDValue Zext = Op.getOperand(0);
11361 if (VT.bitsLT(MVT::i32)) {
11362 VT = MVT::i32;
11363 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
11364 }
11365 unsigned Log2b = Log2_32(VT.getSizeInBits());
11366 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
11367 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
11368 DAG.getConstant(Log2b, dl, MVT::i32));
11369 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
11370 }
11371 return SDValue();
11372}
11373
11375 SDValue Op0 = Node->getOperand(0);
11376 SDValue Op1 = Node->getOperand(1);
11377 EVT VT = Op0.getValueType();
11378 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11379 unsigned Opcode = Node->getOpcode();
11380 SDLoc DL(Node);
11381
11382 // If both sign bits are zero, flip UMIN/UMAX <-> SMIN/SMAX if legal.
11383 unsigned AltOpcode = ISD::getOppositeSignednessMinMaxOpcode(Opcode);
11384 if (isOperationLegal(AltOpcode, VT) && DAG.SignBitIsZero(Op0) &&
11385 DAG.SignBitIsZero(Op1))
11386 return DAG.getNode(AltOpcode, DL, VT, Op0, Op1);
11387
11388 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
11389 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
11391 Op0 = DAG.getFreeze(Op0);
11392 SDValue Zero = DAG.getConstant(0, DL, VT);
11393 return DAG.getNode(ISD::SUB, DL, VT, Op0,
11394 DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
11395 }
11396
11397 // umin(x,y) -> sub(x,usubsat(x,y))
11398 // TODO: Missing freeze(Op0)?
11399 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
11401 return DAG.getNode(ISD::SUB, DL, VT, Op0,
11402 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
11403 }
11404
11405 // umax(x,y) -> add(x,usubsat(y,x))
11406 // TODO: Missing freeze(Op0)?
11407 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
11409 return DAG.getNode(ISD::ADD, DL, VT, Op0,
11410 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
11411 }
11412
11413 // FIXME: Should really try to split the vector in case it's legal on a
11414 // subvector.
11416 return DAG.UnrollVectorOp(Node);
11417
11418 // Attempt to find an existing SETCC node that we can reuse.
11419 // TODO: Do we need a generic doesSETCCNodeExist?
11420 // TODO: Missing freeze(Op0)/freeze(Op1)?
11421 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
11422 ISD::CondCode PrefCommuteCC,
11423 ISD::CondCode AltCommuteCC) {
11424 SDVTList BoolVTList = DAG.getVTList(BoolVT);
11425 for (ISD::CondCode CC : {PrefCC, AltCC}) {
11426 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
11427 {Op0, Op1, DAG.getCondCode(CC)})) {
11428 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
11429 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
11430 }
11431 }
11432 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
11433 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
11434 {Op0, Op1, DAG.getCondCode(CC)})) {
11435 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
11436 return DAG.getSelect(DL, VT, Cond, Op1, Op0);
11437 }
11438 }
11439 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
11440 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
11441 };
11442
11443 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
11444 // -> Y = (A < B) ? B : A
11445 // -> Y = (A >= B) ? A : B
11446 // -> Y = (A <= B) ? B : A
11447 switch (Opcode) {
11448 case ISD::SMAX:
11449 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
11450 case ISD::SMIN:
11451 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
11452 case ISD::UMAX:
11453 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
11454 case ISD::UMIN:
11455 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
11456 }
11457
11458 llvm_unreachable("How did we get here?");
11459}
11460
11462 unsigned Opcode = Node->getOpcode();
11463 SDValue LHS = Node->getOperand(0);
11464 SDValue RHS = Node->getOperand(1);
11465 EVT VT = LHS.getValueType();
11466 SDLoc dl(Node);
11467
11468 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
11469 assert(VT.isInteger() && "Expected operands to be integers");
11470
11471 // usub.sat(a, b) -> umax(a, b) - b
11472 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
11473 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
11474 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
11475 }
11476
11477 // usub.sat(a, 1) -> sub(a, zext(a != 0))
11478 if (Opcode == ISD::USUBSAT && isOneOrOneSplat(RHS)) {
11479 LHS = DAG.getFreeze(LHS);
11480 SDValue Zero = DAG.getConstant(0, dl, VT);
11481 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11482 SDValue IsNonZero = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETNE);
11483 SDValue Subtrahend = DAG.getBoolExtOrTrunc(IsNonZero, dl, VT, BoolVT);
11484 Subtrahend =
11485 DAG.getNode(ISD::AND, dl, VT, Subtrahend, DAG.getConstant(1, dl, VT));
11486 return DAG.getNode(ISD::SUB, dl, VT, LHS, Subtrahend);
11487 }
11488
11489 // uadd.sat(a, b) -> umin(a, ~b) + b
11490 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
11491 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
11492 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
11493 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
11494 }
11495
11496 unsigned OverflowOp;
11497 switch (Opcode) {
11498 case ISD::SADDSAT:
11499 OverflowOp = ISD::SADDO;
11500 break;
11501 case ISD::UADDSAT:
11502 OverflowOp = ISD::UADDO;
11503 break;
11504 case ISD::SSUBSAT:
11505 OverflowOp = ISD::SSUBO;
11506 break;
11507 case ISD::USUBSAT:
11508 OverflowOp = ISD::USUBO;
11509 break;
11510 default:
11511 llvm_unreachable("Expected method to receive signed or unsigned saturation "
11512 "addition or subtraction node.");
11513 }
11514
11515 // FIXME: Should really try to split the vector in case it's legal on a
11516 // subvector.
11518 return DAG.UnrollVectorOp(Node);
11519
11520 unsigned BitWidth = LHS.getScalarValueSizeInBits();
11521 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11522 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11523 SDValue SumDiff = Result.getValue(0);
11524 SDValue Overflow = Result.getValue(1);
11525 SDValue Zero = DAG.getConstant(0, dl, VT);
11526 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
11527
11528 if (Opcode == ISD::UADDSAT) {
11530 // (LHS + RHS) | OverflowMask
11531 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
11532 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
11533 }
11534 // Overflow ? 0xffff.... : (LHS + RHS)
11535 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
11536 }
11537
11538 if (Opcode == ISD::USUBSAT) {
11540 // (LHS - RHS) & ~OverflowMask
11541 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
11542 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
11543 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
11544 }
11545 // Overflow ? 0 : (LHS - RHS)
11546 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
11547 }
11548
11549 assert((Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) &&
11550 "Expected signed saturating add/sub opcode");
11551
11552 const APInt MinVal = APInt::getSignedMinValue(BitWidth);
11553 const APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
11554
11555 KnownBits KnownLHS = DAG.computeKnownBits(LHS);
11556 KnownBits KnownRHS = DAG.computeKnownBits(RHS);
11557
11558 // If either of the operand signs are known, then they are guaranteed to
11559 // only saturate in one direction. If non-negative they will saturate
11560 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
11561 //
11562 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
11563 // sign of 'y' has to be flipped.
11564
11565 bool LHSIsNonNegative = KnownLHS.isNonNegative();
11566 bool RHSIsNonNegative =
11567 Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative() : KnownRHS.isNegative();
11568 if (LHSIsNonNegative || RHSIsNonNegative) {
11569 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11570 return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
11571 }
11572
11573 bool LHSIsNegative = KnownLHS.isNegative();
11574 bool RHSIsNegative =
11575 Opcode == ISD::SADDSAT ? KnownRHS.isNegative() : KnownRHS.isNonNegative();
11576 if (LHSIsNegative || RHSIsNegative) {
11577 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11578 return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
11579 }
11580
11581 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
11582 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11583 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
11584 DAG.getConstant(BitWidth - 1, dl, VT));
11585 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
11586 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
11587}
11588
11590 unsigned Opcode = Node->getOpcode();
11591 SDValue LHS = Node->getOperand(0);
11592 SDValue RHS = Node->getOperand(1);
11593 EVT VT = LHS.getValueType();
11594 EVT ResVT = Node->getValueType(0);
11595 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11596 SDLoc dl(Node);
11597
11598 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
11599 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
11600 SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
11601 SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
11602
11603 // We can't perform arithmetic on i1 values. Extending them would
11604 // probably result in worse codegen, so let's just use two selects instead.
11605 // Some targets are also just better off using selects rather than subtraction
11606 // because one of the conditions can be merged with one of the selects.
11607 // And finally, if we don't know the contents of high bits of a boolean value
11608 // we can't perform any arithmetic either.
11610 BoolVT.getScalarSizeInBits() == 1 ||
11612 SDValue SelectZeroOrOne =
11613 DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
11614 DAG.getConstant(0, dl, ResVT));
11615 return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
11616 SelectZeroOrOne);
11617 }
11618
11620 std::swap(IsGT, IsLT);
11621 return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
11622 ResVT);
11623}
11624
11626 unsigned Opcode = Node->getOpcode();
11627 bool IsSigned = Opcode == ISD::SSHLSAT;
11628 SDValue LHS = Node->getOperand(0);
11629 SDValue RHS = Node->getOperand(1);
11630 EVT VT = LHS.getValueType();
11631 SDLoc dl(Node);
11632
11633 assert((Node->getOpcode() == ISD::SSHLSAT ||
11634 Node->getOpcode() == ISD::USHLSAT) &&
11635 "Expected a SHLSAT opcode");
11636 assert(VT.isInteger() && "Expected operands to be integers");
11637
11639 return DAG.UnrollVectorOp(Node);
11640
11641 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
11642
11643 unsigned BW = VT.getScalarSizeInBits();
11644 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11645 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
11646 SDValue Orig =
11647 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
11648
11649 SDValue SatVal;
11650 if (IsSigned) {
11651 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
11652 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
11653 SDValue Cond =
11654 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
11655 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
11656 } else {
11657 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
11658 }
11659 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
11660 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
11661}
11662
11664 bool Signed, SDValue &Lo, SDValue &Hi,
11665 SDValue LHS, SDValue RHS,
11666 SDValue HiLHS, SDValue HiRHS) const {
11667 EVT VT = LHS.getValueType();
11668 assert(RHS.getValueType() == VT && "Mismatching operand types");
11669
11670 assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));
11671 assert((!Signed || !HiLHS) &&
11672 "Signed flag should only be set when HiLHS and RiRHS are null");
11673
11674 // We'll expand the multiplication by brute force because we have no other
11675 // options. This is a trivially-generalized version of the code from
11676 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
11677 // 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
11678 // sign bits while calculating the Hi half.
11679 unsigned Bits = VT.getSizeInBits();
11680 unsigned HalfBits = Bits / 2;
11681 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
11682 SDValue LL = DAG.getNode(ISD::AND, dl, VT, LHS, Mask);
11683 SDValue RL = DAG.getNode(ISD::AND, dl, VT, RHS, Mask);
11684
11685 SDValue T = DAG.getNode(ISD::MUL, dl, VT, LL, RL);
11686 SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
11687
11688 SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
11689 // This is always an unsigned shift.
11690 SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
11691
11692 unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
11693 SDValue LH = DAG.getNode(ShiftOpc, dl, VT, LHS, Shift);
11694 SDValue RH = DAG.getNode(ShiftOpc, dl, VT, RHS, Shift);
11695
11696 SDValue U =
11697 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RL), TH);
11698 SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
11699 SDValue UH = DAG.getNode(ShiftOpc, dl, VT, U, Shift);
11700
11701 SDValue V =
11702 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LL, RH), UL);
11703 SDValue VH = DAG.getNode(ShiftOpc, dl, VT, V, Shift);
11704
11705 Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
11706 DAG.getNode(ISD::SHL, dl, VT, V, Shift));
11707
11708 Hi = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RH),
11709 DAG.getNode(ISD::ADD, dl, VT, UH, VH));
11710
11711 // If HiLHS and HiRHS are set, multiply them by the opposite low part and add
11712 // the products to Hi.
11713 if (HiLHS) {
11714 SDValue RHLL = DAG.getNode(ISD::MUL, dl, VT, HiRHS, LHS);
11715 SDValue RLLH = DAG.getNode(ISD::MUL, dl, VT, RHS, HiLHS);
11716 Hi = DAG.getNode(ISD::ADD, dl, VT, Hi,
11717 DAG.getNode(ISD::ADD, dl, VT, RHLL, RLLH));
11718 }
11719}
11720
11722 bool Signed, const SDValue LHS,
11723 const SDValue RHS, SDValue &Lo,
11724 SDValue &Hi) const {
11725 EVT VT = LHS.getValueType();
11726 assert(RHS.getValueType() == VT && "Mismatching operand types");
11727 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
11728 // We can fall back to a libcall with an illegal type for the MUL if we
11729 // have a libcall big enough.
11730 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
11731 if (WideVT == MVT::i16)
11732 LC = RTLIB::MUL_I16;
11733 else if (WideVT == MVT::i32)
11734 LC = RTLIB::MUL_I32;
11735 else if (WideVT == MVT::i64)
11736 LC = RTLIB::MUL_I64;
11737 else if (WideVT == MVT::i128)
11738 LC = RTLIB::MUL_I128;
11739
11740 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC);
11741 if (LibcallImpl == RTLIB::Unsupported) {
11742 forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS);
11743 return;
11744 }
11745
11746 SDValue HiLHS, HiRHS;
11747 if (Signed) {
11748 // The high part is obtained by SRA'ing all but one of the bits of low
11749 // part.
11750 unsigned LoSize = VT.getFixedSizeInBits();
11751 SDValue Shift = DAG.getShiftAmountConstant(LoSize - 1, VT, dl);
11752 HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, Shift);
11753 HiRHS = DAG.getNode(ISD::SRA, dl, VT, RHS, Shift);
11754 } else {
11755 HiLHS = DAG.getConstant(0, dl, VT);
11756 HiRHS = DAG.getConstant(0, dl, VT);
11757 }
11758
11759 // Attempt a libcall.
11760 SDValue Ret;
11762 CallOptions.setIsSigned(Signed);
11763 CallOptions.setIsPostTypeLegalization(true);
11765 // Halves of WideVT are packed into registers in different order
11766 // depending on platform endianness. This is usually handled by
11767 // the C calling convention, but we can't defer to it in
11768 // the legalizer.
11769 SDValue Args[] = {LHS, HiLHS, RHS, HiRHS};
11770 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
11771 } else {
11772 SDValue Args[] = {HiLHS, LHS, HiRHS, RHS};
11773 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
11774 }
11776 "Ret value is a collection of constituent nodes holding result.");
11777 if (DAG.getDataLayout().isLittleEndian()) {
11778 // Same as above.
11779 Lo = Ret.getOperand(0);
11780 Hi = Ret.getOperand(1);
11781 } else {
11782 Lo = Ret.getOperand(1);
11783 Hi = Ret.getOperand(0);
11784 }
11785}
11786
11787SDValue
11789 assert((Node->getOpcode() == ISD::SMULFIX ||
11790 Node->getOpcode() == ISD::UMULFIX ||
11791 Node->getOpcode() == ISD::SMULFIXSAT ||
11792 Node->getOpcode() == ISD::UMULFIXSAT) &&
11793 "Expected a fixed point multiplication opcode");
11794
11795 SDLoc dl(Node);
11796 SDValue LHS = Node->getOperand(0);
11797 SDValue RHS = Node->getOperand(1);
11798 EVT VT = LHS.getValueType();
11799 unsigned Scale = Node->getConstantOperandVal(2);
11800 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
11801 Node->getOpcode() == ISD::UMULFIXSAT);
11802 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
11803 Node->getOpcode() == ISD::SMULFIXSAT);
11804 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11805 unsigned VTSize = VT.getScalarSizeInBits();
11806
11807 if (!Scale) {
11808 // [us]mul.fix(a, b, 0) -> mul(a, b)
11809 if (!Saturating) {
11811 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11812 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
11813 SDValue Result =
11814 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11815 SDValue Product = Result.getValue(0);
11816 SDValue Overflow = Result.getValue(1);
11817 SDValue Zero = DAG.getConstant(0, dl, VT);
11818
11819 APInt MinVal = APInt::getSignedMinValue(VTSize);
11820 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
11821 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11822 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11823 // Xor the inputs, if resulting sign bit is 0 the product will be
11824 // positive, else negative.
11825 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
11826 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
11827 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
11828 return DAG.getSelect(dl, VT, Overflow, Result, Product);
11829 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
11830 SDValue Result =
11831 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11832 SDValue Product = Result.getValue(0);
11833 SDValue Overflow = Result.getValue(1);
11834
11835 APInt MaxVal = APInt::getMaxValue(VTSize);
11836 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11837 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
11838 }
11839 }
11840
11841 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
11842 "Expected scale to be less than the number of bits if signed or at "
11843 "most the number of bits if unsigned.");
11844 assert(LHS.getValueType() == RHS.getValueType() &&
11845 "Expected both operands to be the same type");
11846
11847 // Get the upper and lower bits of the result.
11848 SDValue Lo, Hi;
11849 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
11850 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
11851 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
11852 if (isOperationLegalOrCustom(LoHiOp, VT)) {
11853 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
11854 Lo = Result.getValue(0);
11855 Hi = Result.getValue(1);
11856 } else if (isOperationLegalOrCustom(HiOp, VT)) {
11857 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11858 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
11859 } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
11860 // Try for a multiplication using a wider type.
11861 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11862 SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
11863 SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
11864 SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
11865 Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
11866 SDValue Shifted =
11867 DAG.getNode(ISD::SRA, dl, WideVT, Res,
11868 DAG.getShiftAmountConstant(VTSize, WideVT, dl));
11869 Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
11870 } else if (VT.isVector()) {
11871 return SDValue();
11872 } else {
11873 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
11874 }
11875
11876 if (Scale == VTSize)
11877 // Result is just the top half since we'd be shifting by the width of the
11878 // operand. Overflow impossible so this works for both UMULFIX and
11879 // UMULFIXSAT.
11880 return Hi;
11881
11882 // The result will need to be shifted right by the scale since both operands
11883 // are scaled. The result is given to us in 2 halves, so we only want part of
11884 // both in the result.
11885 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
11886 DAG.getShiftAmountConstant(Scale, VT, dl));
11887 if (!Saturating)
11888 return Result;
11889
11890 if (!Signed) {
11891 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
11892 // widened multiplication) aren't all zeroes.
11893
11894 // Saturate to max if ((Hi >> Scale) != 0),
11895 // which is the same as if (Hi > ((1 << Scale) - 1))
11896 APInt MaxVal = APInt::getMaxValue(VTSize);
11897 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
11898 dl, VT);
11899 Result = DAG.getSelectCC(dl, Hi, LowMask,
11900 DAG.getConstant(MaxVal, dl, VT), Result,
11901 ISD::SETUGT);
11902
11903 return Result;
11904 }
11905
11906 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
11907 // widened multiplication) aren't all ones or all zeroes.
11908
11909 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
11910 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
11911
11912 if (Scale == 0) {
11913 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
11914 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
11915 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
11916 // Saturated to SatMin if wide product is negative, and SatMax if wide
11917 // product is positive ...
11918 SDValue Zero = DAG.getConstant(0, dl, VT);
11919 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
11920 ISD::SETLT);
11921 // ... but only if we overflowed.
11922 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
11923 }
11924
11925 // We handled Scale==0 above so all the bits to examine is in Hi.
11926
11927 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
11928 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
11929 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
11930 dl, VT);
11931 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
11932 // Saturate to min if (Hi >> (Scale - 1)) < -1),
11933 // which is the same as if (HI < (-1 << (Scale - 1))
11934 SDValue HighMask =
11935 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
11936 dl, VT);
11937 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
11938 return Result;
11939}
11940
11941SDValue
11943 SDValue LHS, SDValue RHS,
11944 unsigned Scale, SelectionDAG &DAG) const {
11945 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
11946 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
11947 "Expected a fixed point division opcode");
11948
11949 EVT VT = LHS.getValueType();
11950 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
11951 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
11952 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11953
11954 // If there is enough room in the type to upscale the LHS or downscale the
11955 // RHS before the division, we can perform it in this type without having to
11956 // resize. For signed operations, the LHS headroom is the number of
11957 // redundant sign bits, and for unsigned ones it is the number of zeroes.
11958 // The headroom for the RHS is the number of trailing zeroes.
11959 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
11961 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11962
11963 // For signed saturating operations, we need to be able to detect true integer
11964 // division overflow; that is, when you have MIN / -EPS. However, this
11965 // is undefined behavior and if we emit divisions that could take such
11966 // values it may cause undesired behavior (arithmetic exceptions on x86, for
11967 // example).
11968 // Avoid this by requiring an extra bit so that we never get this case.
11969 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11970 // signed saturating division, we need to emit a whopping 32-bit division.
11971 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
11972 return SDValue();
11973
11974 unsigned LHSShift = std::min(LHSLead, Scale);
11975 unsigned RHSShift = Scale - LHSShift;
11976
11977 // At this point, we know that if we shift the LHS up by LHSShift and the
11978 // RHS down by RHSShift, we can emit a regular division with a final scaling
11979 // factor of Scale.
11980
11981 if (LHSShift)
11982 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
11983 DAG.getShiftAmountConstant(LHSShift, VT, dl));
11984 if (RHSShift)
11985 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
11986 DAG.getShiftAmountConstant(RHSShift, VT, dl));
11987
11988 SDValue Quot;
11989 if (Signed) {
11990 // For signed operations, if the resulting quotient is negative and the
11991 // remainder is nonzero, subtract 1 from the quotient to round towards
11992 // negative infinity.
11993 SDValue Rem;
11994 // FIXME: Ideally we would always produce an SDIVREM here, but if the
11995 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
11996 // we couldn't just form a libcall, but the type legalizer doesn't do it.
11997 if (isTypeLegal(VT) &&
11999 Quot = DAG.getNode(ISD::SDIVREM, dl,
12000 DAG.getVTList(VT, VT),
12001 LHS, RHS);
12002 Rem = Quot.getValue(1);
12003 Quot = Quot.getValue(0);
12004 } else {
12005 Quot = DAG.getNode(ISD::SDIV, dl, VT,
12006 LHS, RHS);
12007 Rem = DAG.getNode(ISD::SREM, dl, VT,
12008 LHS, RHS);
12009 }
12010 SDValue Zero = DAG.getConstant(0, dl, VT);
12011 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
12012 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
12013 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
12014 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
12015 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
12016 DAG.getConstant(1, dl, VT));
12017 Quot = DAG.getSelect(dl, VT,
12018 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
12019 Sub1, Quot);
12020 } else
12021 Quot = DAG.getNode(ISD::UDIV, dl, VT,
12022 LHS, RHS);
12023
12024 return Quot;
12025}
12026
12028 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
12029 SDLoc dl(Node);
12030 SDValue LHS = Node->getOperand(0);
12031 SDValue RHS = Node->getOperand(1);
12032 bool IsAdd = Node->getOpcode() == ISD::UADDO;
12033
12034 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
12035 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
12036 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
12037 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
12038 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
12039 { LHS, RHS, CarryIn });
12040 Result = SDValue(NodeCarry.getNode(), 0);
12041 Overflow = SDValue(NodeCarry.getNode(), 1);
12042 return;
12043 }
12044
12045 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
12046 LHS.getValueType(), LHS, RHS);
12047
12048 EVT ResultType = Node->getValueType(1);
12049 EVT SetCCType = getSetCCResultType(
12050 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
12051 SDValue SetCC;
12052 if (IsAdd && isOneConstant(RHS)) {
12053 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
12054 // the live range of X. We assume comparing with 0 is cheap.
12055 // The general case (X + C) < C is not necessarily beneficial. Although we
12056 // reduce the live range of X, we may introduce the materialization of
12057 // constant C.
12058 SetCC =
12059 DAG.getSetCC(dl, SetCCType, Result,
12060 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
12061 } else if (IsAdd && isAllOnesConstant(RHS)) {
12062 // Special case: uaddo X, -1 overflows if X != 0.
12063 SetCC =
12064 DAG.getSetCC(dl, SetCCType, LHS,
12065 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
12066 } else {
12067 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
12068 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
12069 }
12070 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
12071}
12072
12074 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
12075 SDLoc dl(Node);
12076 SDValue LHS = Node->getOperand(0);
12077 SDValue RHS = Node->getOperand(1);
12078 bool IsAdd = Node->getOpcode() == ISD::SADDO;
12079
12080 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
12081 LHS.getValueType(), LHS, RHS);
12082
12083 EVT ResultType = Node->getValueType(1);
12084 EVT OType = getSetCCResultType(
12085 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
12086
12087 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
12088 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
12089 if (isOperationLegal(OpcSat, LHS.getValueType())) {
12090 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
12091 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
12092 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
12093 return;
12094 }
12095
12096 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
12097
12098 // For an addition, the result should be less than one of the operands (LHS)
12099 // if and only if the other operand (RHS) is negative, otherwise there will
12100 // be overflow.
12101 // For a subtraction, the result should be less than one of the operands
12102 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
12103 // otherwise there will be overflow.
12104 SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
12105 SDValue ConditionRHS =
12106 DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
12107
12108 Overflow = DAG.getBoolExtOrTrunc(
12109 DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
12110 ResultType, ResultType);
12111}
12112
12114 SDValue &Overflow, SelectionDAG &DAG) const {
12115 SDLoc dl(Node);
12116 EVT VT = Node->getValueType(0);
12117 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12118 SDValue LHS = Node->getOperand(0);
12119 SDValue RHS = Node->getOperand(1);
12120 bool isSigned = Node->getOpcode() == ISD::SMULO;
12121
12122 // For power-of-two multiplications we can use a simpler shift expansion.
12123 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
12124 const APInt &C = RHSC->getAPIntValue();
12125 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
12126 if (C.isPowerOf2()) {
12127 // smulo(x, signed_min) is same as umulo(x, signed_min).
12128 bool UseArithShift = isSigned && !C.isMinSignedValue();
12129 SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
12130 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
12131 Overflow = DAG.getSetCC(dl, SetCCVT,
12132 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
12133 dl, VT, Result, ShiftAmt),
12134 LHS, ISD::SETNE);
12135 return true;
12136 }
12137 }
12138
12139 SDValue BottomHalf;
12140 SDValue TopHalf;
12141 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
12142
12143 static const unsigned Ops[2][3] =
12146 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
12147 BottomHalf = DAG.getNode(Ops[isSigned][0], dl, DAG.getVTList(VT, VT), LHS,
12148 RHS);
12149 TopHalf = BottomHalf.getValue(1);
12150 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
12151 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
12152 TopHalf = DAG.getNode(Ops[isSigned][1], dl, VT, LHS, RHS);
12153 } else if (isTypeLegal(WideVT)) {
12154 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
12155 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
12156 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
12157 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
12158 SDValue ShiftAmt =
12159 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
12160 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
12161 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
12162 } else {
12163 if (VT.isVector())
12164 return false;
12165
12166 forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
12167 }
12168
12169 Result = BottomHalf;
12170 if (isSigned) {
12171 SDValue ShiftAmt = DAG.getShiftAmountConstant(
12172 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
12173 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
12174 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
12175 } else {
12176 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
12177 DAG.getConstant(0, dl, VT), ISD::SETNE);
12178 }
12179
12180 // Truncate the result if SetCC returns a larger type than needed.
12181 EVT RType = Node->getValueType(1);
12182 if (RType.bitsLT(Overflow.getValueType()))
12183 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
12184
12185 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
12186 "Unexpected result type for S/UMULO legalization");
12187 return true;
12188}
12189
12191 SDLoc dl(Node);
12192 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
12193 SDValue Op = Node->getOperand(0);
12194 EVT VT = Op.getValueType();
12195
12196 // Try to use a shuffle reduction for power of two vectors.
12197 if (VT.isPow2VectorType()) {
12199 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
12200 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
12201 break;
12202
12203 SDValue Lo, Hi;
12204 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
12205 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
12206 VT = HalfVT;
12207
12208 // Stop if splitting is enough to make the reduction legal.
12209 if (isOperationLegalOrCustom(Node->getOpcode(), HalfVT))
12210 return DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Op,
12211 Node->getFlags());
12212 }
12213 }
12214
12215 if (VT.isScalableVector())
12217 "Expanding reductions for scalable vectors is undefined.");
12218
12219 EVT EltVT = VT.getVectorElementType();
12220 unsigned NumElts = VT.getVectorNumElements();
12221
12223 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
12224
12225 SDValue Res = Ops[0];
12226 for (unsigned i = 1; i < NumElts; i++)
12227 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
12228
12229 // Result type may be wider than element type.
12230 if (EltVT != Node->getValueType(0))
12231 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
12232 return Res;
12233}
12234
12236 SDLoc dl(Node);
12237 SDValue AccOp = Node->getOperand(0);
12238 SDValue VecOp = Node->getOperand(1);
12239 SDNodeFlags Flags = Node->getFlags();
12240
12241 EVT VT = VecOp.getValueType();
12242 EVT EltVT = VT.getVectorElementType();
12243
12244 if (VT.isScalableVector())
12246 "Expanding reductions for scalable vectors is undefined.");
12247
12248 unsigned NumElts = VT.getVectorNumElements();
12249
12251 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
12252
12253 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
12254
12255 SDValue Res = AccOp;
12256 for (unsigned i = 0; i < NumElts; i++)
12257 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
12258
12259 return Res;
12260}
12261
12263 SelectionDAG &DAG) const {
12264 EVT VT = Node->getValueType(0);
12265 SDLoc dl(Node);
12266 bool isSigned = Node->getOpcode() == ISD::SREM;
12267 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
12268 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
12269 SDValue Dividend = Node->getOperand(0);
12270 SDValue Divisor = Node->getOperand(1);
12271 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
12272 SDVTList VTs = DAG.getVTList(VT, VT);
12273 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
12274 return true;
12275 }
12276 if (isOperationLegalOrCustom(DivOpc, VT)) {
12277 // X % Y -> X-X/Y*Y
12278 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
12279 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
12280 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
12281 return true;
12282 }
12283 return false;
12284}
12285
12287 SelectionDAG &DAG) const {
12288 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
12289 SDLoc dl(SDValue(Node, 0));
12290 SDValue Src = Node->getOperand(0);
12291
12292 // DstVT is the result type, while SatVT is the size to which we saturate
12293 EVT SrcVT = Src.getValueType();
12294 EVT DstVT = Node->getValueType(0);
12295
12296 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
12297 unsigned SatWidth = SatVT.getScalarSizeInBits();
12298 unsigned DstWidth = DstVT.getScalarSizeInBits();
12299 assert(SatWidth <= DstWidth &&
12300 "Expected saturation width smaller than result width");
12301
12302 // Determine minimum and maximum integer values and their corresponding
12303 // floating-point values.
12304 APInt MinInt, MaxInt;
12305 if (IsSigned) {
12306 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
12307 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
12308 } else {
12309 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
12310 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
12311 }
12312
12313 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
12314 // libcall emission cannot handle this. Large result types will fail.
12315 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
12316 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
12317 SrcVT = Src.getValueType();
12318 }
12319
12320 const fltSemantics &Sem = SrcVT.getFltSemantics();
12321 APFloat MinFloat(Sem);
12322 APFloat MaxFloat(Sem);
12323
12324 APFloat::opStatus MinStatus =
12325 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
12326 APFloat::opStatus MaxStatus =
12327 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
12328 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
12329 !(MaxStatus & APFloat::opStatus::opInexact);
12330
12331 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
12332 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
12333
12334 // If the integer bounds are exactly representable as floats and min/max are
12335 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
12336 // of comparisons and selects.
12337 auto EmitMinMax = [&](unsigned MinOpcode, unsigned MaxOpcode,
12338 bool MayPropagateNaN) {
12339 bool MinMaxLegal = isOperationLegalOrCustom(MinOpcode, SrcVT) &&
12340 isOperationLegalOrCustom(MaxOpcode, SrcVT);
12341 if (!MinMaxLegal)
12342 return SDValue();
12343
12344 SDValue Clamped = Src;
12345
12346 // Clamp Src by MinFloat from below. If !MayPropagateNaN and Src is NaN
12347 // then the result is MinFloat.
12348 Clamped = DAG.getNode(MaxOpcode, dl, SrcVT, Clamped, MinFloatNode);
12349 // Clamp by MaxFloat from above. If !MayPropagateNaN then NaN cannot occur.
12350 Clamped = DAG.getNode(MinOpcode, dl, SrcVT, Clamped, MaxFloatNode);
12351 // Convert clamped value to integer.
12352 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
12353 dl, DstVT, Clamped);
12354
12355 // If !MayPropagateNan and the conversion is unsigned case we're done,
12356 // because we mapped NaN to MinFloat, which will cast to zero.
12357 if (!MayPropagateNaN && !IsSigned)
12358 return FpToInt;
12359
12360 // Otherwise, select 0 if Src is NaN.
12361 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
12362 EVT SetCCVT =
12363 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
12364 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
12365 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
12366 };
12367 if (AreExactFloatBounds) {
12368 if (SDValue Res = EmitMinMax(ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM,
12369 /*MayPropagateNaN=*/false))
12370 return Res;
12371 // These may propagate NaN for sNaN operands.
12372 if (SDValue Res =
12373 EmitMinMax(ISD::FMINNUM, ISD::FMAXNUM, /*MayPropagateNaN=*/true))
12374 return Res;
12375 // These always propagate NaN.
12376 if (SDValue Res =
12377 EmitMinMax(ISD::FMINIMUM, ISD::FMAXIMUM, /*MayPropagateNaN=*/true))
12378 return Res;
12379 }
12380
12381 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
12382 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
12383
12384 // Result of direct conversion. The assumption here is that the operation is
12385 // non-trapping and it's fine to apply it to an out-of-range value if we
12386 // select it away later.
12387 SDValue FpToInt =
12388 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
12389
12390 SDValue Select = FpToInt;
12391
12392 EVT SetCCVT =
12393 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
12394
12395 // If Src ULT MinFloat, select MinInt. In particular, this also selects
12396 // MinInt if Src is NaN.
12397 SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
12398 Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
12399 // If Src OGT MaxFloat, select MaxInt.
12400 SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
12401 Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
12402
12403 // In the unsigned case we are done, because we mapped NaN to MinInt, which
12404 // is already zero.
12405 if (!IsSigned)
12406 return Select;
12407
12408 // Otherwise, select 0 if Src is NaN.
12409 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
12410 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
12411 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
12412}
12413
12415 const SDLoc &dl,
12416 SelectionDAG &DAG) const {
12417 EVT OperandVT = Op.getValueType();
12418 if (OperandVT.getScalarType() == ResultVT.getScalarType())
12419 return Op;
12420 EVT ResultIntVT = ResultVT.changeTypeToInteger();
12421 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
12422 // can induce double-rounding which may alter the results. We can
12423 // correct for this using a trick explained in: Boldo, Sylvie, and
12424 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
12425 // World Congress. 2005.
12426 SDValue Narrow = DAG.getFPExtendOrRound(Op, dl, ResultVT);
12427 SDValue NarrowAsWide = DAG.getFPExtendOrRound(Narrow, dl, OperandVT);
12428
12429 // We can keep the narrow value as-is if narrowing was exact (no
12430 // rounding error), the wide value was NaN (the narrow value is also
12431 // NaN and should be preserved) or if we rounded to the odd value.
12432 SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, Narrow);
12433 SDValue One = DAG.getConstant(1, dl, ResultIntVT);
12434 SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
12435 SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
12436 EVT ResultIntVTCCVT = getSetCCResultType(
12437 DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
12438 SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
12439 // The result is already odd so we don't need to do anything.
12440 SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
12441
12442 EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12443 Op.getValueType());
12444 // We keep results which are exact, odd or NaN.
12445 SDValue KeepNarrow =
12446 DAG.getSetCC(dl, WideSetCCVT, Op, NarrowAsWide, ISD::SETUEQ);
12447 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
12448 // We morally performed a round-down if AbsNarrow is smaller than
12449 // AbsWide.
12450 SDValue AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
12451 SDValue AbsNarrowAsWide = DAG.getNode(ISD::FABS, dl, OperandVT, NarrowAsWide);
12452 SDValue NarrowIsRd =
12453 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
12454 // If the narrow value is odd or exact, pick it.
12455 // Otherwise, narrow is even and corresponds to either the rounded-up
12456 // or rounded-down value. If narrow is the rounded-down value, we want
12457 // the rounded-up value as it will be odd.
12458 SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
12459 SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
12460 Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
12461 return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
12462}
12463
12465 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
12466 SDValue Op = Node->getOperand(0);
12467 EVT VT = Node->getValueType(0);
12468 SDLoc dl(Node);
12469 if (VT.getScalarType() == MVT::bf16) {
12470 if (Node->getConstantOperandVal(1) == 1) {
12471 return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
12472 }
12473 EVT OperandVT = Op.getValueType();
12474 SDValue IsNaN = DAG.getSetCC(
12475 dl,
12476 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
12477 Op, Op, ISD::SETUO);
12478
12479 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
12480 // can induce double-rounding which may alter the results. We can
12481 // correct for this using a trick explained in: Boldo, Sylvie, and
12482 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
12483 // World Congress. 2005.
12484 EVT F32 = VT.changeElementType(*DAG.getContext(), MVT::f32);
12485 EVT I32 = F32.changeTypeToInteger();
12486 Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
12487 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
12488
12489 // Conversions should set NaN's quiet bit. This also prevents NaNs from
12490 // turning into infinities.
12491 SDValue NaN =
12492 DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
12493
12494 // Factor in the contribution of the low 16 bits.
12495 SDValue One = DAG.getConstant(1, dl, I32);
12496 SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
12497 DAG.getShiftAmountConstant(16, I32, dl));
12498 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
12499 SDValue RoundingBias =
12500 DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
12501 SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
12502
12503 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
12504 // 0x80000000.
12505 Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
12506
12507 // Now that we have rounded, shift the bits into position.
12508 Op = DAG.getNode(ISD::SRL, dl, I32, Op,
12509 DAG.getShiftAmountConstant(16, I32, dl));
12510 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
12511 EVT I16 = I32.changeElementType(*DAG.getContext(), MVT::i16);
12512 Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
12513 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
12514 }
12515 return SDValue();
12516}
12517
12519 SelectionDAG &DAG) const {
12520 assert((Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT ||
12521 Node->getOpcode() == ISD::VECTOR_SPLICE_RIGHT) &&
12522 "Unexpected opcode!");
12523 assert((Node->getValueType(0).isScalableVector() ||
12524 !isa<ConstantSDNode>(Node->getOperand(2))) &&
12525 "Fixed length vector types with constant offsets expected to use "
12526 "SHUFFLE_VECTOR!");
12527
12528 EVT VT = Node->getValueType(0);
12529 SDValue V1 = Node->getOperand(0);
12530 SDValue V2 = Node->getOperand(1);
12531 SDValue Offset = Node->getOperand(2);
12532 SDLoc DL(Node);
12533
12534 // Expand through memory thusly:
12535 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
12536 // Store V1, Ptr
12537 // Store V2, Ptr + sizeof(V1)
12538 // if (VECTOR_SPLICE_LEFT)
12539 // Ptr = Ptr + (Offset * sizeof(VT.Elt))
12540 // else
12541 // Ptr = Ptr + sizeof(V1) - (Offset * size(VT.Elt))
12542 // Res = Load Ptr
12543
12544 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
12545
12547 VT.getVectorElementCount() * 2);
12548 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
12549 EVT PtrVT = StackPtr.getValueType();
12550 auto &MF = DAG.getMachineFunction();
12551 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12552 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12553
12554 // Store the lo part of CONCAT_VECTORS(V1, V2)
12555 SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
12556 // Store the hi part of CONCAT_VECTORS(V1, V2)
12557 SDValue VTBytes = DAG.getTypeSize(DL, PtrVT, VT.getStoreSize());
12558 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, VTBytes);
12559 SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
12560
12561 // NOTE: TrailingBytes must be clamped so as not to read outside of V1:V2.
12562 SDValue EltByteSize =
12563 DAG.getTypeSize(DL, PtrVT, VT.getVectorElementType().getStoreSize());
12564 Offset = DAG.getZExtOrTrunc(Offset, DL, PtrVT);
12565 SDValue TrailingBytes = DAG.getNode(ISD::MUL, DL, PtrVT, Offset, EltByteSize);
12566
12567 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VTBytes);
12568
12569 if (Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT)
12570 StackPtr = DAG.getMemBasePlusOffset(StackPtr, TrailingBytes, DL);
12571 else
12572 StackPtr = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
12573
12574 // Load the spliced result
12575 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
12577}
12578
12580 SelectionDAG &DAG) const {
12581 SDLoc DL(Node);
12582 SDValue Vec = Node->getOperand(0);
12583 SDValue Mask = Node->getOperand(1);
12584 SDValue Passthru = Node->getOperand(2);
12585
12586 EVT VecVT = Vec.getValueType();
12587 EVT ScalarVT = VecVT.getScalarType();
12588 EVT MaskVT = Mask.getValueType();
12589 EVT MaskScalarVT = MaskVT.getScalarType();
12590
12591 // Needs to be handled by targets that have scalable vector types.
12592 if (VecVT.isScalableVector())
12593 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
12594
12595 SDValue StackPtr = DAG.CreateStackTemporary(
12596 VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
12597 int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12598 MachinePointerInfo PtrInfo =
12600
12601 MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
12602 SDValue Chain = DAG.getEntryNode();
12603 SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
12604
12605 bool HasPassthru = !Passthru.isUndef();
12606
12607 // If we have a passthru vector, store it on the stack, overwrite the matching
12608 // positions and then re-write the last element that was potentially
12609 // overwritten even though mask[i] = false.
12610 if (HasPassthru)
12611 Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo);
12612
12613 SDValue LastWriteVal;
12614 APInt PassthruSplatVal;
12615 bool IsSplatPassthru =
12616 ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
12617
12618 if (IsSplatPassthru) {
12619 // As we do not know which position we wrote to last, we cannot simply
12620 // access that index from the passthru vector. So we first check if passthru
12621 // is a splat vector, to use any element ...
12622 LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
12623 } else if (HasPassthru) {
12624 // ... if it is not a splat vector, we need to get the passthru value at
12625 // position = popcount(mask) and re-load it from the stack before it is
12626 // overwritten in the loop below.
12627 EVT PopcountVT = ScalarVT.changeTypeToInteger();
12628 SDValue Popcount = DAG.getNode(
12630 MaskVT.changeVectorElementType(*DAG.getContext(), MVT::i1), Mask);
12631 Popcount = DAG.getNode(
12633 MaskVT.changeVectorElementType(*DAG.getContext(), PopcountVT),
12634 Popcount);
12635 Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, PopcountVT, Popcount);
12636 SDValue LastElmtPtr =
12637 getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
12638 LastWriteVal = DAG.getLoad(
12639 ScalarVT, DL, Chain, LastElmtPtr,
12641 Chain = LastWriteVal.getValue(1);
12642 }
12643
12644 unsigned NumElms = VecVT.getVectorNumElements();
12645 for (unsigned I = 0; I < NumElms; I++) {
12646 SDValue ValI = DAG.getExtractVectorElt(DL, ScalarVT, Vec, I);
12647 SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
12648 Chain = DAG.getStore(
12649 Chain, DL, ValI, OutPtr,
12651
12652 // Get the mask value and add it to the current output position. This
12653 // either increments by 1 if MaskI is true or adds 0 otherwise.
12654 // Freeze in case we have poison/undef mask entries.
12655 SDValue MaskI = DAG.getExtractVectorElt(DL, MaskScalarVT, Mask, I);
12656 MaskI = DAG.getFreeze(MaskI);
12657 MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
12658 MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
12659 OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
12660
12661 if (HasPassthru && I == NumElms - 1) {
12662 SDValue EndOfVector =
12663 DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
12664 SDValue AllLanesSelected =
12665 DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
12666 OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
12667 OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
12668
12669 // Re-write the last ValI if all lanes were selected. Otherwise,
12670 // overwrite the last write it with the passthru value.
12671 LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,
12672 LastWriteVal, SDNodeFlags::Unpredictable);
12673 Chain = DAG.getStore(
12674 Chain, DL, LastWriteVal, OutPtr,
12676 }
12677 }
12678
12679 return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
12680}
12681
12683 SDLoc DL(Node);
12684 EVT VT = Node->getValueType(0);
12685
12686 bool ZeroIsPoison = Node->getOpcode() == ISD::CTTZ_ELTS_ZERO_POISON;
12687 auto [Mask, StepVec] =
12688 getLegalMaskAndStepVector(Node->getOperand(0), ZeroIsPoison, DL, DAG);
12689 EVT StepVecVT = StepVec.getValueType();
12690 EVT StepVT = StepVecVT.getVectorElementType();
12691
12692 // Promote the scalar result type early to avoid redundant zexts.
12694 StepVT = getTypeToTransformTo(*DAG.getContext(), StepVT);
12695
12696 SDValue VL =
12697 DAG.getElementCount(DL, StepVT, StepVecVT.getVectorElementCount());
12698 SDValue SplatVL = DAG.getSplat(StepVecVT, DL, VL);
12699 StepVec = DAG.getNode(ISD::SUB, DL, StepVecVT, SplatVL, StepVec);
12700 SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT);
12701 SDValue Select = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
12703 StepVecVT.getVectorElementType(), Select);
12704 SDValue Sub = DAG.getNode(ISD::SUB, DL, StepVT, VL,
12705 DAG.getZExtOrTrunc(Max, DL, StepVT));
12706
12707 return DAG.getZExtOrTrunc(Sub, DL, VT);
12708}
12709
12711 SelectionDAG &DAG) const {
12712 SDLoc DL(N);
12713 SDValue Acc = N->getOperand(0);
12714 SDValue MulLHS = N->getOperand(1);
12715 SDValue MulRHS = N->getOperand(2);
12716 EVT AccVT = Acc.getValueType();
12717 EVT MulOpVT = MulLHS.getValueType();
12718
12719 EVT ExtMulOpVT =
12721 MulOpVT.getVectorElementCount());
12722
12723 unsigned ExtOpcLHS, ExtOpcRHS;
12724 switch (N->getOpcode()) {
12725 default:
12726 llvm_unreachable("Unexpected opcode");
12728 ExtOpcLHS = ExtOpcRHS = ISD::ZERO_EXTEND;
12729 break;
12731 ExtOpcLHS = ExtOpcRHS = ISD::SIGN_EXTEND;
12732 break;
12734 ExtOpcLHS = ExtOpcRHS = ISD::FP_EXTEND;
12735 break;
12736 }
12737
12738 if (ExtMulOpVT != MulOpVT) {
12739 MulLHS = DAG.getNode(ExtOpcLHS, DL, ExtMulOpVT, MulLHS);
12740 MulRHS = DAG.getNode(ExtOpcRHS, DL, ExtMulOpVT, MulRHS);
12741 }
12742 SDValue Input = MulLHS;
12743 if (N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA) {
12744 if (!llvm::isOneOrOneSplatFP(MulRHS))
12745 Input = DAG.getNode(ISD::FMUL, DL, ExtMulOpVT, MulLHS, MulRHS);
12746 } else if (!llvm::isOneOrOneSplat(MulRHS)) {
12747 Input = DAG.getNode(ISD::MUL, DL, ExtMulOpVT, MulLHS, MulRHS);
12748 }
12749
12750 unsigned Stride = AccVT.getVectorMinNumElements();
12751 unsigned ScaleFactor = MulOpVT.getVectorMinNumElements() / Stride;
12752
12753 // Collect all of the subvectors
12754 std::deque<SDValue> Subvectors = {Acc};
12755 for (unsigned I = 0; I < ScaleFactor; I++)
12756 Subvectors.push_back(DAG.getExtractSubvector(DL, AccVT, Input, I * Stride));
12757
12758 unsigned FlatNode =
12759 N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA ? ISD::FADD : ISD::ADD;
12760
12761 // Flatten the subvector tree
12762 while (Subvectors.size() > 1) {
12763 Subvectors.push_back(
12764 DAG.getNode(FlatNode, DL, AccVT, {Subvectors[0], Subvectors[1]}));
12765 Subvectors.pop_front();
12766 Subvectors.pop_front();
12767 }
12768
12769 assert(Subvectors.size() == 1 &&
12770 "There should only be one subvector after tree flattening");
12771
12772 return Subvectors[0];
12773}
12774
12775/// Given a store node \p StoreNode, return true if it is safe to fold that node
12776/// into \p FPNode, which expands to a library call with output pointers.
12778 SDNode *FPNode) {
12780 SmallVector<const SDNode *, 8> DeferredNodes;
12782
12783 // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode).
12784 for (SDValue Op : StoreNode->ops())
12785 if (Op.getNode() != FPNode)
12786 Worklist.push_back(Op.getNode());
12787
12789 while (!Worklist.empty()) {
12790 const SDNode *Node = Worklist.pop_back_val();
12791 auto [_, Inserted] = Visited.insert(Node);
12792 if (!Inserted)
12793 continue;
12794
12795 if (MaxSteps > 0 && Visited.size() >= MaxSteps)
12796 return false;
12797
12798 // Reached the FPNode (would result in a cycle).
12799 // OR Reached CALLSEQ_START (would result in nested call sequences).
12800 if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START)
12801 return false;
12802
12803 if (Node->getOpcode() == ISD::CALLSEQ_END) {
12804 // Defer looking into call sequences (so we can check we're outside one).
12805 // We still need to look through these for the predecessor check.
12806 DeferredNodes.push_back(Node);
12807 continue;
12808 }
12809
12810 for (SDValue Op : Node->ops())
12811 Worklist.push_back(Op.getNode());
12812 }
12813
12814 // True if we're outside a call sequence and don't have the FPNode as a
12815 // predecessor. No cycles or nested call sequences possible.
12816 return !SDNode::hasPredecessorHelper(FPNode, Visited, DeferredNodes,
12817 MaxSteps);
12818}
12819
12821 SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node,
12823 std::optional<unsigned> CallRetResNo) const {
12824 if (LC == RTLIB::UNKNOWN_LIBCALL)
12825 return false;
12826
12827 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC);
12828 if (LibcallImpl == RTLIB::Unsupported)
12829 return false;
12830
12831 LLVMContext &Ctx = *DAG.getContext();
12832 EVT VT = Node->getValueType(0);
12833 unsigned NumResults = Node->getNumValues();
12834
12835 // Find users of the node that store the results (and share input chains). The
12836 // destination pointers can be used instead of creating stack allocations.
12837 SDValue StoresInChain;
12838 SmallVector<StoreSDNode *, 2> ResultStores(NumResults);
12839 for (SDNode *User : Node->users()) {
12841 continue;
12842 auto *ST = cast<StoreSDNode>(User);
12843 SDValue StoreValue = ST->getValue();
12844 unsigned ResNo = StoreValue.getResNo();
12845 // Ensure the store corresponds to an output pointer.
12846 if (CallRetResNo == ResNo)
12847 continue;
12848 // Ensure the store to the default address space and not atomic or volatile.
12849 if (!ST->isSimple() || ST->getAddressSpace() != 0)
12850 continue;
12851 // Ensure all store chains are the same (so they don't alias).
12852 if (StoresInChain && ST->getChain() != StoresInChain)
12853 continue;
12854 // Ensure the store is properly aligned.
12855 Type *StoreType = StoreValue.getValueType().getTypeForEVT(Ctx);
12856 if (ST->getAlign() <
12857 DAG.getDataLayout().getABITypeAlign(StoreType->getScalarType()))
12858 continue;
12859 // Avoid:
12860 // 1. Creating cyclic dependencies.
12861 // 2. Expanding the node to a call within a call sequence.
12863 continue;
12864 ResultStores[ResNo] = ST;
12865 StoresInChain = ST->getChain();
12866 }
12867
12868 ArgListTy Args;
12869
12870 // Pass the arguments.
12871 for (const SDValue &Op : Node->op_values()) {
12872 EVT ArgVT = Op.getValueType();
12873 Type *ArgTy = ArgVT.getTypeForEVT(Ctx);
12874 Args.emplace_back(Op, ArgTy);
12875 }
12876
12877 // Pass the output pointers.
12878 SmallVector<SDValue, 2> ResultPtrs(NumResults);
12880 for (auto [ResNo, ST] : llvm::enumerate(ResultStores)) {
12881 if (ResNo == CallRetResNo)
12882 continue;
12883 EVT ResVT = Node->getValueType(ResNo);
12884 SDValue ResultPtr = ST ? ST->getBasePtr() : DAG.CreateStackTemporary(ResVT);
12885 ResultPtrs[ResNo] = ResultPtr;
12886 Args.emplace_back(ResultPtr, PointerTy);
12887 }
12888
12889 SDLoc DL(Node);
12890
12892 // Pass the vector mask (if required).
12893 EVT MaskVT = getSetCCResultType(DAG.getDataLayout(), Ctx, VT);
12894 SDValue Mask = DAG.getBoolConstant(true, DL, MaskVT, VT);
12895 Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx));
12896 }
12897
12898 Type *RetType = CallRetResNo.has_value()
12899 ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx)
12900 : Type::getVoidTy(Ctx);
12901 SDValue InChain = StoresInChain ? StoresInChain : DAG.getEntryNode();
12902 SDValue Callee =
12903 DAG.getExternalSymbol(LibcallImpl, getPointerTy(DAG.getDataLayout()));
12905 CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(
12906 getLibcallImplCallingConv(LibcallImpl), RetType, Callee, std::move(Args));
12907
12908 auto [Call, CallChain] = LowerCallTo(CLI);
12909
12910 for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) {
12911 if (ResNo == CallRetResNo) {
12912 Results.push_back(Call);
12913 continue;
12914 }
12915 MachinePointerInfo PtrInfo;
12916 SDValue LoadResult = DAG.getLoad(Node->getValueType(ResNo), DL, CallChain,
12917 ResultPtr, PtrInfo);
12918 SDValue OutChain = LoadResult.getValue(1);
12919
12920 if (StoreSDNode *ST = ResultStores[ResNo]) {
12921 // Replace store with the library call.
12922 DAG.ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain);
12923 PtrInfo = ST->getPointerInfo();
12924 } else {
12926 DAG.getMachineFunction(),
12927 cast<FrameIndexSDNode>(ResultPtr)->getIndex());
12928 }
12929
12930 Results.push_back(LoadResult);
12931 }
12932
12933 return true;
12934}
12935
12937 SDValue &LHS, SDValue &RHS,
12938 SDValue &CC, SDValue Mask,
12939 SDValue EVL, bool &NeedInvert,
12940 const SDLoc &dl, SDValue &Chain,
12941 bool IsSignaling) const {
12942 MVT OpVT = LHS.getSimpleValueType();
12943 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
12944 NeedInvert = false;
12945 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
12946 bool IsNonVP = !EVL;
12947 switch (getCondCodeAction(CCCode, OpVT)) {
12948 default:
12949 llvm_unreachable("Unknown condition code action!");
12951 // Nothing to do.
12952 break;
12955 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
12956 std::swap(LHS, RHS);
12957 CC = DAG.getCondCode(InvCC);
12958 return true;
12959 }
12960 // Swapping operands didn't work. Try inverting the condition.
12961 bool NeedSwap = false;
12962 InvCC = getSetCCInverse(CCCode, OpVT);
12963 if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
12964 // If inverting the condition is not enough, try swapping operands
12965 // on top of it.
12966 InvCC = ISD::getSetCCSwappedOperands(InvCC);
12967 NeedSwap = true;
12968 }
12969 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
12970 CC = DAG.getCondCode(InvCC);
12971 NeedInvert = true;
12972 if (NeedSwap)
12973 std::swap(LHS, RHS);
12974 return true;
12975 }
12976
12977 // Special case: expand i1 comparisons using logical operations.
12978 if (OpVT == MVT::i1) {
12979 SDValue Ret;
12980 switch (CCCode) {
12981 default:
12982 llvm_unreachable("Unknown integer setcc!");
12983 case ISD::SETEQ: // X == Y --> ~(X ^ Y)
12984 Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS),
12985 MVT::i1);
12986 break;
12987 case ISD::SETNE: // X != Y --> (X ^ Y)
12988 Ret = DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS);
12989 break;
12990 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
12991 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
12992 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, RHS,
12993 DAG.getNOT(dl, LHS, MVT::i1));
12994 break;
12995 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
12996 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
12997 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, LHS,
12998 DAG.getNOT(dl, RHS, MVT::i1));
12999 break;
13000 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
13001 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
13002 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, RHS,
13003 DAG.getNOT(dl, LHS, MVT::i1));
13004 break;
13005 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
13006 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
13007 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, LHS,
13008 DAG.getNOT(dl, RHS, MVT::i1));
13009 break;
13010 }
13011
13012 LHS = DAG.getZExtOrTrunc(Ret, dl, VT);
13013 RHS = SDValue();
13014 CC = SDValue();
13015 return true;
13016 }
13017
13019 unsigned Opc = 0;
13020 switch (CCCode) {
13021 default:
13022 llvm_unreachable("Don't know how to expand this condition!");
13023 case ISD::SETUO:
13024 if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
13025 CC1 = ISD::SETUNE;
13026 CC2 = ISD::SETUNE;
13027 Opc = ISD::OR;
13028 break;
13029 }
13031 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
13032 NeedInvert = true;
13033 [[fallthrough]];
13034 case ISD::SETO:
13036 "If SETO is expanded, SETOEQ must be legal!");
13037 CC1 = ISD::SETOEQ;
13038 CC2 = ISD::SETOEQ;
13039 Opc = ISD::AND;
13040 break;
13041 case ISD::SETONE:
13042 case ISD::SETUEQ:
13043 // If the SETUO or SETO CC isn't legal, we might be able to use
13044 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
13045 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
13046 // the operands.
13047 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
13048 if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
13049 isCondCodeLegal(ISD::SETOLT, OpVT))) {
13050 CC1 = ISD::SETOGT;
13051 CC2 = ISD::SETOLT;
13052 Opc = ISD::OR;
13053 NeedInvert = ((unsigned)CCCode & 0x8U);
13054 break;
13055 }
13056 [[fallthrough]];
13057 case ISD::SETOEQ:
13058 case ISD::SETOGT:
13059 case ISD::SETOGE:
13060 case ISD::SETOLT:
13061 case ISD::SETOLE:
13062 case ISD::SETUNE:
13063 case ISD::SETUGT:
13064 case ISD::SETUGE:
13065 case ISD::SETULT:
13066 case ISD::SETULE:
13067 // If we are floating point, assign and break, otherwise fall through.
13068 if (!OpVT.isInteger()) {
13069 // We can use the 4th bit to tell if we are the unordered
13070 // or ordered version of the opcode.
13071 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
13072 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
13073 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
13074 break;
13075 }
13076 // Fallthrough if we are unsigned integer.
13077 [[fallthrough]];
13078 case ISD::SETLE:
13079 case ISD::SETGT:
13080 case ISD::SETGE:
13081 case ISD::SETLT:
13082 case ISD::SETNE:
13083 case ISD::SETEQ:
13084 // If all combinations of inverting the condition and swapping operands
13085 // didn't work then we have no means to expand the condition.
13086 llvm_unreachable("Don't know how to expand this condition!");
13087 }
13088
13089 SDValue SetCC1, SetCC2;
13090 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
13091 // If we aren't the ordered or unorder operation,
13092 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
13093 if (IsNonVP) {
13094 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
13095 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
13096 } else {
13097 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
13098 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
13099 }
13100 } else {
13101 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
13102 if (IsNonVP) {
13103 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
13104 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
13105 } else {
13106 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
13107 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
13108 }
13109 }
13110 if (Chain)
13111 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
13112 SetCC2.getValue(1));
13113 if (IsNonVP)
13114 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
13115 else {
13116 // Transform the binary opcode to the VP equivalent.
13117 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
13118 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
13119 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
13120 }
13121 RHS = SDValue();
13122 CC = SDValue();
13123 return true;
13124 }
13125 }
13126 return false;
13127}
13128
13130 SelectionDAG &DAG) const {
13131 EVT VT = Node->getValueType(0);
13132 // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
13133 // split into two equal parts.
13134 if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))
13135 return SDValue();
13136
13137 // Restrict expansion to cases where both parts can be concatenated.
13138 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
13139 if (LoVT != HiVT || !isTypeLegal(LoVT))
13140 return SDValue();
13141
13142 SDLoc DL(Node);
13143 unsigned Opcode = Node->getOpcode();
13144
13145 // Don't expand if the result is likely to be unrolled anyway.
13146 if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))
13147 return SDValue();
13148
13149 SmallVector<SDValue, 4> LoOps, HiOps;
13150 for (const SDValue &V : Node->op_values()) {
13151 auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT);
13152 LoOps.push_back(Lo);
13153 HiOps.push_back(Hi);
13154 }
13155
13156 SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps);
13157 SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps);
13158 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi);
13159}
13160
13162 const SDLoc &DL,
13163 EVT InVecVT, SDValue EltNo,
13164 LoadSDNode *OriginalLoad,
13165 SelectionDAG &DAG) const {
13166 assert(OriginalLoad->isSimple());
13167
13168 EVT VecEltVT = InVecVT.getVectorElementType();
13169
13170 // If the vector element type is not a multiple of a byte then we are unable
13171 // to correctly compute an address to load only the extracted element as a
13172 // scalar.
13173 if (!VecEltVT.isByteSized())
13174 return SDValue();
13175
13176 ISD::LoadExtType ExtTy =
13177 ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
13178 if (!isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
13179 return SDValue();
13180
13181 std::optional<unsigned> ByteOffset;
13182 Align Alignment = OriginalLoad->getAlign();
13184 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
13185 int Elt = ConstEltNo->getZExtValue();
13186 ByteOffset = VecEltVT.getSizeInBits() * Elt / 8;
13187 MPI = OriginalLoad->getPointerInfo().getWithOffset(*ByteOffset);
13188 Alignment = commonAlignment(Alignment, *ByteOffset);
13189 } else {
13190 // Discard the pointer info except the address space because the memory
13191 // operand can't represent this new access since the offset is variable.
13192 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
13193 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
13194 }
13195
13196 if (!shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT, ByteOffset))
13197 return SDValue();
13198
13199 unsigned IsFast = 0;
13200 if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
13201 OriginalLoad->getAddressSpace(), Alignment,
13202 OriginalLoad->getMemOperand()->getFlags(), &IsFast) ||
13203 !IsFast)
13204 return SDValue();
13205
13206 // The original DAG loaded the entire vector from memory, so arithmetic
13207 // within it must be inbounds.
13209 DAG, OriginalLoad->getBasePtr(), InVecVT, EltNo);
13210
13211 // We are replacing a vector load with a scalar load. The new load must have
13212 // identical memory op ordering to the original.
13213 SDValue Load;
13214 if (ResultVT.bitsGT(VecEltVT)) {
13215 // If the result type of vextract is wider than the load, then issue an
13216 // extending load instead.
13217 ISD::LoadExtType ExtType =
13218 isLoadLegal(ResultVT, VecEltVT, Alignment,
13219 OriginalLoad->getAddressSpace(), ISD::ZEXTLOAD, false)
13221 : ISD::EXTLOAD;
13222 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
13223 NewPtr, MPI, VecEltVT, Alignment,
13224 OriginalLoad->getMemOperand()->getFlags(),
13225 OriginalLoad->getAAInfo());
13226 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
13227 } else {
13228 // The result type is narrower or the same width as the vector element
13229 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
13230 Alignment, OriginalLoad->getMemOperand()->getFlags(),
13231 OriginalLoad->getAAInfo());
13232 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
13233 if (ResultVT.bitsLT(VecEltVT))
13234 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
13235 else
13236 Load = DAG.getBitcast(ResultVT, Load);
13237 }
13238
13239 return Load;
13240}
13241
13242// Set type id for call site info and metadata 'call_target'.
13243// We are filtering for:
13244// a) The call-graph-section use case that wants to know about indirect
13245// calls, or
13246// b) We want to annotate indirect calls.
13248 const CallBase *CB, MachineFunction &MF,
13249 MachineFunction::CallSiteInfo &CSInfo) const {
13250 if (CB && CB->isIndirectCall() &&
13253 CSInfo = MachineFunction::CallSiteInfo(*CB);
13254}
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT F32
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
#define _
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
lazy value info
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static bool isUndef(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
#define T
#define T1
uint64_t High
#define P(N)
Function const char * Passes
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static std::pair< SDValue, SDValue > getLegalMaskAndStepVector(SDValue Mask, bool ZeroIsPoison, SDLoc DL, SelectionDAG &DAG)
Returns a type-legalized version of Mask as the first item in the pair.
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact UDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static bool canNarrowCLMULToLegal(const TargetLowering &TLI, LLVMContext &Ctx, EVT VT, unsigned HalveDepth=0, unsigned TotalDepth=0)
Check if CLMUL on VT can eventually reach a type with legal CLMUL through a chain of halving decompos...
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode, SDNode *FPNode)
Given a store node StoreNode, return true if it is safe to fold that node into FPNode,...
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineShiftToAVG(SDValue Op, TargetLowering::TargetLoweringOpt &TLO, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
This file describes how to lower LLVM code to machine code.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
Value * RHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition APFloat.cpp:255
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1402
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.h:1213
APInt bitcastToAPInt() const
Definition APFloat.h:1408
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1193
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1153
void changeSign()
Definition APFloat.h:1352
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1164
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1615
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1809
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1429
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1054
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition APInt.h:424
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1414
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1408
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1075
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1535
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:967
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1708
void setSignBit()
Set the sign bit to 1.
Definition APInt.h:1363
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:217
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1256
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1419
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:841
void negate()
Negate this APInt in place.
Definition APInt.h:1491
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1621
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1554
unsigned countLeadingZeros() const
Definition APInt.h:1629
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
void clearLowBits(unsigned loBits)
Set bottom loBits bits to 0.
Definition APInt.h:1458
unsigned logBase2() const
Definition APInt.h:1784
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:834
void setAllBits()
Set every bit to 1.
Definition APInt.h:1342
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1316
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition APInt.h:406
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1157
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1027
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition APInt.h:1390
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:880
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
void clearBits(unsigned LoBit, unsigned HiBit)
Clear the bits from LoBit (inclusive) to HiBit (exclusive) to 0.
Definition APInt.h:1440
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1411
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
void clearHighBits(unsigned hiBits)
Set top hiBits bits to 0.
Definition APInt.h:1465
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1679
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
Definition APInt.h:1366
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:859
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
This class represents a range of values.
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:215
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
std::vector< std::string > ConstraintCodeVector
Definition InlineAsm.h:104
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:354
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Machine Value Type.
SimpleValueType SimpleTy
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Function & getFunction()
Return the LLVM function that this machine code represents.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
Flags getFlags() const
Return the raw flags of the source value,.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MCRegister getLiveInPhysReg(Register VReg) const
getLiveInPhysReg - If VReg is a live-in virtual register, return the corresponding live-in physical r...
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition Module.h:445
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
iterator end() const
Definition ArrayRef.h:343
iterator begin() const
Definition ArrayRef.h:342
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC)
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI bool isKnownNeverLogicalZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Test whether the given floating point SDValue (or all elements of it, if it is a vector) is known to ...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
LLVM_ABI SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl, SDNodeFlags Flags={})
Constant fold a setcc to true or false.
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
LLVM_ABI std::optional< unsigned > getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has shift amounts that are all less than the element bit-width of the shift n...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
static LLVM_ABI unsigned getHasPredecessorMaxSteps()
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
LLVM_ABI bool shouldOptForSize() const
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI std::optional< unsigned > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getTypeSize(const SDLoc &DL, EVT VT, TypeSize TS)
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI bool isKnownToBeAPowerOfTwo(SDValue Val, bool OrZero=false, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:137
iterator end() const
Definition StringRef.h:115
Class to represent struct types.
LLVM_ABI void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
unsigned getBitWidthForCttzElements(EVT RetVT, ElementCount EC, bool ZeroIsPoison, const ConstantRange *VScaleRange) const
Return the minimum number of bits required to hold the maximum possible number of trailing zero vecto...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool preferSelectsOverBooleanArithmetic(EVT VT) const
Should we prefer selects to doing arithmetic on boolean types.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const
Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
EVT getLegalTypeToTransformTo(LLVMContext &Context, EVT VT) const
Perform getTypeToTransformTo repeatedly until a legal type is obtained.
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
CallingConv::ID getLibcallImplCallingConv(RTLIB::LibcallImpl Call) const
Get the CallingConv that should be used for the specified libcall implementation.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
ISD::CondCode getSoftFloatCmpLibcallPredicate(RTLIB::LibcallImpl Call) const
Get the comparison predicate that's to be used to test the result of the comparison libcall against z...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
TargetLoweringBase(const TargetMachine &TM, const TargetSubtargetInfo &STI)
NOTE: The TargetMachine owns TLOF.
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
BooleanContent
Enum that describes how the target represents true/false values.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
virtual EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom for a comparison of the specified type...
bool isLoadLegal(EVT ValVT, EVT MemVT, Align Alignment, unsigned AddrSpace, unsigned ExtType, bool Atomic) const
Return true if the specified load with extension is legal on this target.
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
bool expandMultipleResultFPLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl< SDValue > &Results, std::optional< unsigned > CallRetResNo={}) const
Expands a node with multiple results to an FP or vector libcall.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
SmallVector< ConstraintPair > ConstraintGroup
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
virtual Align computeKnownAlignForTargetInstr(GISelValueTracking &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, const SDValue LHS, const SDValue RHS, SDValue &Lo, SDValue &Hi) const
Calculate full product of LHS and RHS either via a libcall or through brute force expansion of the mu...
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandFCANONICALIZE(SDNode *Node, SelectionDAG &DAG) const
Expand FCANONICALIZE to FMUL with 1.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_UNDEF nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_UNDEF nodes.
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue expandCLMUL(SDNode *N, SelectionDAG &DAG) const
Expand carryless multiply.
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
virtual bool isTargetCanonicalConstantNode(SDValue Op) const
Returns true if the given Opc is considered a canonical constant for the target, which should not be ...
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
SDValue expandCttzElts(SDNode *Node, SelectionDAG &DAG) const
Expand a CTTZ_ELTS or CTTZ_ELTS_ZERO_POISON by calculating (VL - i) for each active lane (i),...
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual unsigned computeNumSignBitsForTargetInstr(GISelValueTracking &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, EVT *LargestVT=nullptr) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual SDValue unwrapAddress(SDValue N) const
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_UNDEF nodes.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimumnum/fmaximumnum into multiple comparison with selects.
void forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl, bool Signed, SDValue &Lo, SDValue &Hi, SDValue LHS, SDValue RHS, SDValue HiLHS=SDValue(), SDValue HiRHS=SDValue()) const
Calculate the product twice the width of LHS and RHS.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index, const SDNodeFlags PtrArithFlags=SDNodeFlags()) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
virtual void computeKnownBitsForTargetInstr(GISelValueTracking &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const
~TargetLowering() override
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isPositionIndependent() const
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
TargetLowering(const TargetLowering &)=delete
virtual bool shouldSimplifyDemandedVectorElts(SDValue Op, const TargetLoweringOpt &TLO) const
Return true if the target supports simplifying demanded vector elements by converting them to undefs.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
virtual bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const
For most targets, an LLVM type must be broken down into multiple smaller types.
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const
Expand a vector VECTOR_COMPRESS into a sequence of extract element, store temporarily,...
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode, SDNodeFlags Flags={}) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index, const SDNodeFlags PtrArithFlags=SDNodeFlags()) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual void computeKnownFPClassForTargetNode(const SDValue Op, KnownFPClass &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine floating-point class information for a target node.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual void computeKnownFPClassForTargetInstr(GISelValueTracking &Analysis, Register R, KnownFPClass &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]CMP.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue getInboundsVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const
Expand VECTOR_FIND_LAST_ACTIVE nodes.
SDValue expandPartialReduceMLA(SDNode *Node, SelectionDAG &DAG) const
Expands PARTIAL_REDUCE_S/UMLA nodes to a series of simpler operations, consisting of zext/sext,...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad, SelectionDAG &DAG) const
Replace an extraction of a load with a narrowed load.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const
Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
SDValue expandCTLS(SDNode *N, SelectionDAG &DAG) const
Expand CTLS (count leading sign bits) nodes.
void setTypeIdForCallsiteInfo(const CallBase *CB, MachineFunction &MF, MachineFunction::CallSiteInfo &CSInfo) const
Primary interface to the complete machine description for the target machine.
bool isPositionIndependent() const
const Triple & getTargetTriple() const
TargetOptions Options
unsigned EmitCallSiteInfo
The flag enables call site info production.
unsigned EmitCallGraphSection
Emit section containing call graph metadata.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< regclass_iterator > regclasses() const
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition Triple.h:775
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition Type.h:313
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:286
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:370
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:328
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:130
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:110
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:713
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition APInt.cpp:3060
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:788
@ PTRADD
PTRADD represents pointer arithmetic semantics, for targets that opt in using shouldPreservePtrArith(...
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition ISDOpcodes.h:538
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:394
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:294
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:522
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:400
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ VECTOR_FIND_LAST_ACTIVE
Finds the index of the last active mask element Operands: Mask.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:910
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:528
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ CLMUL
Carry-less multiplication operations.
Definition ISDOpcodes.h:774
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition ISDOpcodes.h:407
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:787
@ PARTIAL_REDUCE_FMLA
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ BRIND
BRIND - Indirect branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:386
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:356
@ VECTOR_SPLICE_LEFT
VECTOR_SPLICE_LEFT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1, VEC2) left by OFFSET elements an...
Definition ISDOpcodes.h:653
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:899
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:413
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:805
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:328
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:179
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:710
@ VECTOR_SPLICE_RIGHT
VECTOR_SPLICE_RIGHT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1,VEC2) right by OFFSET elements a...
Definition ISDOpcodes.h:657
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:304
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:921
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:945
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:833
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
@ CTTZ_ELTS_ZERO_POISON
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
LLVM_ABI NodeType getOppositeSignednessMinMaxOpcode(unsigned MinMaxOpc)
Given a MinMaxOpc of ISD::(U|S)MIN or ISD::(U|S)MAX, returns the corresponding opcode with the opposi...
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
LLVM_ABI NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_ABI bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTruncation=false)
Hook for matching ConstantSDNode predicate.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Or< Preds... > m_AnyOf(const Preds &...preds)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
NUses_match< 1, Value_match > m_OneUse()
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
void stable_sort(R &&Range)
Definition STLExtras.h:2116
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
InstructionCost Cost
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1548
@ Undef
Value of the register doesn't matter.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2554
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
LLVM_ABI bool isOneOrOneSplatFP(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant floating-point value, or a splatted vector of a constant float...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
void * PointerTy
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1530
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:362
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:173
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1777
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ Other
Any other memory.
Definition ModRef.h:68
@ AfterLegalizeTypes
Definition DAGCombine.h:17
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isZeroOrZeroSplat(SDValue N, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition APFloat.h:1636
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:403
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:129
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:292
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:308
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
ElementCount getVectorElementCount() const
Definition ValueTypes.h:358
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:479
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:251
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:367
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition ValueTypes.h:438
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:486
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:420
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
bool isFixedLengthVector() const
Definition ValueTypes.h:189
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT widenIntegerElementType(LLVMContext &Context) const
Return a VT for an integer element type with doubled bit width.
Definition ValueTypes.h:452
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:182
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
EVT changeElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a type whose attributes match ourselves with the exception of the element type that i...
Definition ValueTypes.h:121
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:316
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:469
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition KnownBits.h:315
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:190
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition KnownBits.h:269
static LLVM_ABI KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:106
bool isZero() const
Returns true if value is all zero.
Definition KnownBits.h:78
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:256
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:64
void setAllConflict()
Make all bits known to be both zero and one.
Definition KnownBits.h:97
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:165
KnownBits byteSwap() const
Definition KnownBits.h:545
static LLVM_ABI std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:303
KnownBits reverseBits() const
Definition KnownBits.h:549
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
Definition KnownBits.h:247
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
static LLVM_ABI KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:176
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:72
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition KnownBits.h:335
bool isSignUnknown() const
Returns true if we don't know the sign bit.
Definition KnownBits.h:67
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:325
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:184
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:146
static LLVM_ABI KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
static LLVM_ABI std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
static LLVM_ABI std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
static LLVM_ABI KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition KnownBits.cpp:61
static LLVM_ABI std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
static LLVM_ABI std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:103
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition KnownBits.h:171
static LLVM_ABI std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
static LLVM_ABI std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:300
static LLVM_ABI std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
static LLVM_ABI KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static bool hasVectorMaskArgument(RTLIB::LibcallImpl Impl)
Returns true if the function has a vector mask argument, which is assumed to be the last argument.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
void setNoSignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This contains information for each constraint that we are lowering.
std::string ConstraintCode
This contains the actual string for the code, like "m".
LLVM_ABI unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
LLVM_ABI bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
MakeLibCallOptions & setIsSigned(bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true, bool AllowWidenOptimization=false)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...