LLVM 23.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
27#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/MC/MCAsmInfo.h"
32#include "llvm/MC/MCExpr.h"
38#include <cctype>
39#include <deque>
40using namespace llvm;
41using namespace llvm::SDPatternMatch;
42
43/// NOTE: The TargetMachine owns TLOF.
47
48// Define the virtual destructor out-of-line for build efficiency.
50
51const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
52 return nullptr;
53}
54
58
59/// Check whether a given call node is in tail position within its function. If
60/// so, it sets Chain to the input chain of the tail call.
62 SDValue &Chain) const {
64
65 // First, check if tail calls have been disabled in this function.
66 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
67 return false;
68
69 // Conservatively require the attributes of the call to match those of
70 // the return. Ignore following attributes because they don't affect the
71 // call sequence.
72 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
73 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
74 Attribute::DereferenceableOrNull, Attribute::NoAlias,
75 Attribute::NonNull, Attribute::NoUndef,
76 Attribute::Range, Attribute::NoFPClass})
77 CallerAttrs.removeAttribute(Attr);
78
79 if (CallerAttrs.hasAttributes())
80 return false;
81
82 // It's not safe to eliminate the sign / zero extension of the return value.
83 if (CallerAttrs.contains(Attribute::ZExt) ||
84 CallerAttrs.contains(Attribute::SExt))
85 return false;
86
87 // Check if the only use is a function return node.
88 return isUsedByReturnOnly(Node, Chain);
89}
90
92 const uint32_t *CallerPreservedMask,
93 const SmallVectorImpl<CCValAssign> &ArgLocs,
94 const SmallVectorImpl<SDValue> &OutVals) const {
95 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
96 const CCValAssign &ArgLoc = ArgLocs[I];
97 if (!ArgLoc.isRegLoc())
98 continue;
99 MCRegister Reg = ArgLoc.getLocReg();
100 // Only look at callee saved registers.
101 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
102 continue;
103 // Check that we pass the value used for the caller.
104 // (We look for a CopyFromReg reading a virtual register that is used
105 // for the function live-in value of register Reg)
106 SDValue Value = OutVals[I];
107 if (Value->getOpcode() == ISD::AssertZext)
108 Value = Value.getOperand(0);
109 if (Value->getOpcode() != ISD::CopyFromReg)
110 return false;
111 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
112 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
113 return false;
114 }
115 return true;
116}
117
118/// Set CallLoweringInfo attribute flags based on a call instruction
119/// and called function attributes.
121 unsigned ArgIdx) {
122 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
123 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
124 IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
125 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
126 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
127 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
128 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
129 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
130 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
131 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
132 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
133 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
134 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
135 Alignment = Call->getParamStackAlign(ArgIdx);
136 IndirectType = nullptr;
138 "multiple ABI attributes?");
139 if (IsByVal) {
140 IndirectType = Call->getParamByValType(ArgIdx);
141 if (!Alignment)
142 Alignment = Call->getParamAlign(ArgIdx);
143 }
144 if (IsPreallocated)
145 IndirectType = Call->getParamPreallocatedType(ArgIdx);
146 if (IsInAlloca)
147 IndirectType = Call->getParamInAllocaType(ArgIdx);
148 if (IsSRet)
149 IndirectType = Call->getParamStructRetType(ArgIdx);
150}
151
152/// Generate a libcall taking the given operands as arguments and returning a
153/// result of type RetVT.
154std::pair<SDValue, SDValue>
155TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl,
157 MakeLibCallOptions CallOptions, const SDLoc &dl,
158 SDValue InChain) const {
159 if (LibcallImpl == RTLIB::Unsupported)
160 reportFatalInternalError("unsupported library call operation");
161
162 if (!InChain)
163 InChain = DAG.getEntryNode();
164
166 Args.reserve(Ops.size());
167
168 ArrayRef<Type *> OpsTypeOverrides = CallOptions.OpsTypeOverrides;
169 for (unsigned i = 0; i < Ops.size(); ++i) {
170 SDValue NewOp = Ops[i];
171 Type *Ty = i < OpsTypeOverrides.size() && OpsTypeOverrides[i]
172 ? OpsTypeOverrides[i]
173 : NewOp.getValueType().getTypeForEVT(*DAG.getContext());
174 TargetLowering::ArgListEntry Entry(NewOp, Ty);
175 if (CallOptions.IsSoften)
176 Entry.OrigTy =
177 CallOptions.OpsVTBeforeSoften[i].getTypeForEVT(*DAG.getContext());
178
179 Entry.IsSExt =
180 shouldSignExtendTypeInLibCall(Entry.Ty, CallOptions.IsSigned);
181 Entry.IsZExt = !Entry.IsSExt;
182
183 if (CallOptions.IsSoften &&
185 Entry.IsSExt = Entry.IsZExt = false;
186 }
187 Args.push_back(Entry);
188 }
189
190 SDValue Callee =
191 DAG.getExternalSymbol(LibcallImpl, getPointerTy(DAG.getDataLayout()));
192
193 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
194 Type *OrigRetTy = RetTy;
196 bool signExtend = shouldSignExtendTypeInLibCall(RetTy, CallOptions.IsSigned);
197 bool zeroExtend = !signExtend;
198
199 if (CallOptions.IsSoften) {
200 OrigRetTy = CallOptions.RetVTBeforeSoften.getTypeForEVT(*DAG.getContext());
202 signExtend = zeroExtend = false;
203 }
204
205 CLI.setDebugLoc(dl)
206 .setChain(InChain)
207 .setLibCallee(getLibcallImplCallingConv(LibcallImpl), RetTy, OrigRetTy,
208 Callee, std::move(Args))
209 .setNoReturn(CallOptions.DoesNotReturn)
212 .setSExtResult(signExtend)
213 .setZExtResult(zeroExtend);
214 return LowerCallTo(CLI);
215}
216
218 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
219 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
220 const AttributeList &FuncAttributes, EVT *LargestVT) const {
221 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
222 Op.getSrcAlign() < Op.getDstAlign())
223 return false;
224
225 EVT VT = getOptimalMemOpType(Context, Op, FuncAttributes);
226
227 if (VT == MVT::Other) {
228 // Use the largest integer type whose alignment constraints are satisfied.
229 // We only need to check DstAlign here as SrcAlign is always greater or
230 // equal to DstAlign (or zero).
231 VT = MVT::LAST_INTEGER_VALUETYPE;
232 if (Op.isFixedDstAlign())
233 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
234 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
236 assert(VT.isInteger());
237
238 // Find the largest legal integer type.
239 MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
240 while (!isTypeLegal(LVT))
241 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
242 assert(LVT.isInteger());
243
244 // If the type we've chosen is larger than the largest legal integer type
245 // then use that instead.
246 if (VT.bitsGT(LVT))
247 VT = LVT;
248 }
249
250 unsigned NumMemOps = 0;
251 uint64_t Size = Op.size();
252 while (Size) {
253 unsigned VTSize = VT.getSizeInBits() / 8;
254 while (VTSize > Size) {
255 // For now, only use non-vector load / store's for the left-over pieces.
256 EVT NewVT = VT;
257 unsigned NewVTSize;
258
259 bool Found = false;
260 if (VT.isVector() || VT.isFloatingPoint()) {
261 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
264 Found = true;
265 else if (NewVT == MVT::i64 &&
267 isSafeMemOpType(MVT::f64)) {
268 // i64 is usually not legal on 32-bit targets, but f64 may be.
269 NewVT = MVT::f64;
270 Found = true;
271 }
272 }
273
274 if (!Found) {
275 do {
276 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
277 if (NewVT == MVT::i8)
278 break;
279 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
280 }
281 NewVTSize = NewVT.getSizeInBits() / 8;
282
283 // If the new VT cannot cover all of the remaining bits, then consider
284 // issuing a (or a pair of) unaligned and overlapping load / store.
285 unsigned Fast;
286 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
288 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
290 Fast)
291 VTSize = Size;
292 else {
293 VT = NewVT;
294 VTSize = NewVTSize;
295 }
296 }
297
298 if (++NumMemOps > Limit)
299 return false;
300
301 MemOps.push_back(VT);
302 Size -= VTSize;
303 }
304
305 return true;
306}
307
308/// Soften the operands of a comparison. This code is shared among BR_CC,
309/// SELECT_CC, and SETCC handlers.
311 SDValue &NewLHS, SDValue &NewRHS,
312 ISD::CondCode &CCCode,
313 const SDLoc &dl, const SDValue OldLHS,
314 const SDValue OldRHS) const {
315 SDValue Chain;
316 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
317 OldRHS, Chain);
318}
319
321 SDValue &NewLHS, SDValue &NewRHS,
322 ISD::CondCode &CCCode,
323 const SDLoc &dl, const SDValue OldLHS,
324 const SDValue OldRHS,
325 SDValue &Chain,
326 bool IsSignaling) const {
327 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
328 // not supporting it. We can update this code when libgcc provides such
329 // functions.
330
331 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
332 && "Unsupported setcc type!");
333
334 // Expand into one or more soft-fp libcall(s).
335 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
336 bool ShouldInvertCC = false;
337 switch (CCCode) {
338 case ISD::SETEQ:
339 case ISD::SETOEQ:
340 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
341 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
342 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
343 break;
344 case ISD::SETNE:
345 case ISD::SETUNE:
346 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
347 (VT == MVT::f64) ? RTLIB::UNE_F64 :
348 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
349 break;
350 case ISD::SETGE:
351 case ISD::SETOGE:
352 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
353 (VT == MVT::f64) ? RTLIB::OGE_F64 :
354 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
355 break;
356 case ISD::SETLT:
357 case ISD::SETOLT:
358 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
359 (VT == MVT::f64) ? RTLIB::OLT_F64 :
360 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
361 break;
362 case ISD::SETLE:
363 case ISD::SETOLE:
364 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
365 (VT == MVT::f64) ? RTLIB::OLE_F64 :
366 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
367 break;
368 case ISD::SETGT:
369 case ISD::SETOGT:
370 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
371 (VT == MVT::f64) ? RTLIB::OGT_F64 :
372 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
373 break;
374 case ISD::SETO:
375 ShouldInvertCC = true;
376 [[fallthrough]];
377 case ISD::SETUO:
378 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
379 (VT == MVT::f64) ? RTLIB::UO_F64 :
380 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
381 break;
382 case ISD::SETONE:
383 // SETONE = O && UNE
384 ShouldInvertCC = true;
385 [[fallthrough]];
386 case ISD::SETUEQ:
387 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
388 (VT == MVT::f64) ? RTLIB::UO_F64 :
389 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
390 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
391 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
392 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
393 break;
394 default:
395 // Invert CC for unordered comparisons
396 ShouldInvertCC = true;
397 switch (CCCode) {
398 case ISD::SETULT:
399 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
400 (VT == MVT::f64) ? RTLIB::OGE_F64 :
401 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
402 break;
403 case ISD::SETULE:
404 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
405 (VT == MVT::f64) ? RTLIB::OGT_F64 :
406 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
407 break;
408 case ISD::SETUGT:
409 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
410 (VT == MVT::f64) ? RTLIB::OLE_F64 :
411 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
412 break;
413 case ISD::SETUGE:
414 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
415 (VT == MVT::f64) ? RTLIB::OLT_F64 :
416 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
417 break;
418 default: llvm_unreachable("Do not know how to soften this setcc!");
419 }
420 }
421
422 // Use the target specific return value for comparison lib calls.
424 SDValue Ops[2] = {NewLHS, NewRHS};
426 EVT OpsVT[2] = { OldLHS.getValueType(),
427 OldRHS.getValueType() };
428 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT);
429 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
430 NewLHS = Call.first;
431 NewRHS = DAG.getConstant(0, dl, RetVT);
432
433 RTLIB::LibcallImpl LC1Impl = getLibcallImpl(LC1);
434 if (LC1Impl == RTLIB::Unsupported) {
436 "no libcall available to soften floating-point compare");
437 }
438
439 CCCode = getSoftFloatCmpLibcallPredicate(LC1Impl);
440 if (ShouldInvertCC) {
441 assert(RetVT.isInteger());
442 CCCode = getSetCCInverse(CCCode, RetVT);
443 }
444
445 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
446 // Update Chain.
447 Chain = Call.second;
448 } else {
449 RTLIB::LibcallImpl LC2Impl = getLibcallImpl(LC2);
450 if (LC2Impl == RTLIB::Unsupported) {
452 "no libcall available to soften floating-point compare");
453 }
454
455 assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
456 "unordered call should be simple boolean");
457
458 EVT SetCCVT =
459 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
461 NewLHS = DAG.getNode(ISD::AssertZext, dl, RetVT, Call.first,
462 DAG.getValueType(MVT::i1));
463 }
464
465 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
466 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
467 CCCode = getSoftFloatCmpLibcallPredicate(LC2Impl);
468 if (ShouldInvertCC)
469 CCCode = getSetCCInverse(CCCode, RetVT);
470 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
471 if (Chain)
472 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
473 Call2.second);
474 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
475 Tmp.getValueType(), Tmp, NewLHS);
476 NewRHS = SDValue();
477 }
478}
479
480/// Return the entry encoding for a jump table in the current function. The
481/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
483 // In non-pic modes, just use the address of a block.
486
487 // Otherwise, use a label difference.
489}
490
492 SelectionDAG &DAG) const {
493 return Table;
494}
495
496/// This returns the relocation base for the given PIC jumptable, the same as
497/// getPICJumpTableRelocBase, but as an MCExpr.
498const MCExpr *
500 unsigned JTI,MCContext &Ctx) const{
501 // The normal PIC reloc base is the label at the start of the jump table.
502 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
503}
504
506 SDValue Addr, int JTI,
507 SelectionDAG &DAG) const {
508 SDValue Chain = Value;
509 // Jump table debug info is only needed if CodeView is enabled.
511 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
512 }
513 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
514}
515
516bool
518 const TargetMachine &TM = getTargetMachine();
519 const GlobalValue *GV = GA->getGlobal();
520
521 // If the address is not even local to this DSO we will have to load it from
522 // a got and then add the offset.
523 if (!TM.shouldAssumeDSOLocal(GV))
524 return false;
525
526 // If the code is position independent we will have to add a base register.
528 return false;
529
530 // Otherwise we can do it.
531 return true;
532}
533
534//===----------------------------------------------------------------------===//
535// Optimization Methods
536//===----------------------------------------------------------------------===//
537
538/// If the specified instruction has a constant integer operand and there are
539/// bits set in that constant that are not demanded, then clear those bits and
540/// return true.
542 const APInt &DemandedBits,
543 const APInt &DemandedElts,
544 TargetLoweringOpt &TLO) const {
545 SDLoc DL(Op);
546 unsigned Opcode = Op.getOpcode();
547
548 // Early-out if we've ended up calling an undemanded node, leave this to
549 // constant folding.
550 if (DemandedBits.isZero() || DemandedElts.isZero())
551 return false;
552
553 // Do target-specific constant optimization.
554 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
555 return TLO.New.getNode();
556
557 // FIXME: ISD::SELECT, ISD::SELECT_CC
558 switch (Opcode) {
559 default:
560 break;
561 case ISD::XOR:
562 case ISD::AND:
563 case ISD::OR: {
564 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
565 if (!Op1C || Op1C->isOpaque())
566 return false;
567
568 // If this is a 'not' op, don't touch it because that's a canonical form.
569 const APInt &C = Op1C->getAPIntValue();
570 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
571 return false;
572
573 if (!C.isSubsetOf(DemandedBits)) {
574 EVT VT = Op.getValueType();
575 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
576 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
577 Op->getFlags());
578 return TLO.CombineTo(Op, NewOp);
579 }
580
581 break;
582 }
583 }
584
585 return false;
586}
587
589 const APInt &DemandedBits,
590 TargetLoweringOpt &TLO) const {
591 EVT VT = Op.getValueType();
592 APInt DemandedElts = VT.isVector()
594 : APInt(1, 1);
595 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
596}
597
598/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
599/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
600/// but it could be generalized for targets with other types of implicit
601/// widening casts.
603 const APInt &DemandedBits,
604 TargetLoweringOpt &TLO) const {
605 assert(Op.getNumOperands() == 2 &&
606 "ShrinkDemandedOp only supports binary operators!");
607 assert(Op.getNode()->getNumValues() == 1 &&
608 "ShrinkDemandedOp only supports nodes with one result!");
609
610 EVT VT = Op.getValueType();
611 SelectionDAG &DAG = TLO.DAG;
612 SDLoc dl(Op);
613
614 // Early return, as this function cannot handle vector types.
615 if (VT.isVector())
616 return false;
617
618 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
619 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
620 "ShrinkDemandedOp only supports operands that have the same size!");
621
622 // Don't do this if the node has another user, which may require the
623 // full value.
624 if (!Op.getNode()->hasOneUse())
625 return false;
626
627 // Search for the smallest integer type with free casts to and from
628 // Op's type. For expedience, just check power-of-2 integer types.
629 unsigned DemandedSize = DemandedBits.getActiveBits();
630 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
631 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
632 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
633 if (isTruncateFree(Op, SmallVT) && isZExtFree(SmallVT, VT)) {
634 // We found a type with free casts.
635
636 // If the operation has the 'disjoint' flag, then the
637 // operands on the new node are also disjoint.
638 SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
640 unsigned Opcode = Op.getOpcode();
641 if (Opcode == ISD::PTRADD) {
642 // It isn't a ptradd anymore if it doesn't operate on the entire
643 // pointer.
644 Opcode = ISD::ADD;
645 }
646 SDValue X = DAG.getNode(
647 Opcode, dl, SmallVT,
648 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
649 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags);
650 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
651 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
652 return TLO.CombineTo(Op, Z);
653 }
654 }
655 return false;
656}
657
659 DAGCombinerInfo &DCI) const {
660 SelectionDAG &DAG = DCI.DAG;
661 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
662 !DCI.isBeforeLegalizeOps());
663 KnownBits Known;
664
665 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
666 if (Simplified) {
667 DCI.AddToWorklist(Op.getNode());
669 }
670 return Simplified;
671}
672
674 const APInt &DemandedElts,
675 DAGCombinerInfo &DCI) const {
676 SelectionDAG &DAG = DCI.DAG;
677 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
678 !DCI.isBeforeLegalizeOps());
679 KnownBits Known;
680
681 bool Simplified =
682 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
683 if (Simplified) {
684 DCI.AddToWorklist(Op.getNode());
686 }
687 return Simplified;
688}
689
691 KnownBits &Known,
693 unsigned Depth,
694 bool AssumeSingleUse) const {
695 EVT VT = Op.getValueType();
696
697 // Since the number of lanes in a scalable vector is unknown at compile time,
698 // we track one bit which is implicitly broadcast to all lanes. This means
699 // that all lanes in a scalable vector are considered demanded.
700 APInt DemandedElts = VT.isFixedLengthVector()
702 : APInt(1, 1);
703 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
704 AssumeSingleUse);
705}
706
707// TODO: Under what circumstances can we create nodes? Constant folding?
709 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
710 SelectionDAG &DAG, unsigned Depth) const {
711 EVT VT = Op.getValueType();
712
713 // Limit search depth.
715 return SDValue();
716
717 // Ignore UNDEFs.
718 if (Op.isUndef())
719 return SDValue();
720
721 // Not demanding any bits/elts from Op.
722 if (DemandedBits == 0 || DemandedElts == 0)
723 return DAG.getUNDEF(VT);
724
725 bool IsLE = DAG.getDataLayout().isLittleEndian();
726 unsigned NumElts = DemandedElts.getBitWidth();
727 unsigned BitWidth = DemandedBits.getBitWidth();
728 KnownBits LHSKnown, RHSKnown;
729 switch (Op.getOpcode()) {
730 case ISD::BITCAST: {
731 if (VT.isScalableVector())
732 return SDValue();
733
734 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
735 EVT SrcVT = Src.getValueType();
736 EVT DstVT = Op.getValueType();
737 if (SrcVT == DstVT)
738 return Src;
739
740 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
741 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
742 if (NumSrcEltBits == NumDstEltBits)
744 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
745 return DAG.getBitcast(DstVT, V);
746
747 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
748 unsigned Scale = NumDstEltBits / NumSrcEltBits;
749 unsigned NumSrcElts = SrcVT.getVectorNumElements();
750 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
751 for (unsigned i = 0; i != Scale; ++i) {
752 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
753 unsigned BitOffset = EltOffset * NumSrcEltBits;
754 DemandedSrcBits |= DemandedBits.extractBits(NumSrcEltBits, BitOffset);
755 }
756 // Recursive calls below may turn not demanded elements into poison, so we
757 // need to demand all smaller source elements that maps to a demanded
758 // destination element.
759 APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
760
762 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
763 return DAG.getBitcast(DstVT, V);
764 }
765
766 // TODO - bigendian once we have test coverage.
767 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
768 unsigned Scale = NumSrcEltBits / NumDstEltBits;
769 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
770 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
771 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
772 for (unsigned i = 0; i != NumElts; ++i)
773 if (DemandedElts[i]) {
774 unsigned Offset = (i % Scale) * NumDstEltBits;
775 DemandedSrcBits.insertBits(DemandedBits, Offset);
776 DemandedSrcElts.setBit(i / Scale);
777 }
778
780 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
781 return DAG.getBitcast(DstVT, V);
782 }
783
784 break;
785 }
786 case ISD::AND: {
787 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
788 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
789
790 // If all of the demanded bits are known 1 on one side, return the other.
791 // These bits cannot contribute to the result of the 'and' in this
792 // context.
793 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
794 return Op.getOperand(0);
795 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
796 return Op.getOperand(1);
797 break;
798 }
799 case ISD::OR: {
800 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
801 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
802
803 // If all of the demanded bits are known zero on one side, return the
804 // other. These bits cannot contribute to the result of the 'or' in this
805 // context.
806 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
807 return Op.getOperand(0);
808 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
809 return Op.getOperand(1);
810 break;
811 }
812 case ISD::XOR: {
813 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
814 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
815
816 // If all of the demanded bits are known zero on one side, return the
817 // other.
818 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
819 return Op.getOperand(0);
820 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
821 return Op.getOperand(1);
822 break;
823 }
824 case ISD::ADD: {
825 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
826 if (RHSKnown.isZero())
827 return Op.getOperand(0);
828
829 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
830 if (LHSKnown.isZero())
831 return Op.getOperand(1);
832 break;
833 }
834 case ISD::SHL: {
835 // If we are only demanding sign bits then we can use the shift source
836 // directly.
837 if (std::optional<unsigned> MaxSA =
838 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
839 SDValue Op0 = Op.getOperand(0);
840 unsigned ShAmt = *MaxSA;
841 unsigned NumSignBits =
842 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
843 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
844 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
845 return Op0;
846 }
847 break;
848 }
849 case ISD::SRL: {
850 // If we are only demanding sign bits then we can use the shift source
851 // directly.
852 if (std::optional<unsigned> MaxSA =
853 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
854 SDValue Op0 = Op.getOperand(0);
855 unsigned ShAmt = *MaxSA;
856 // Must already be signbits in DemandedBits bounds, and can't demand any
857 // shifted in zeroes.
858 if (DemandedBits.countl_zero() >= ShAmt) {
859 unsigned NumSignBits =
860 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
861 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
862 return Op0;
863 }
864 }
865 break;
866 }
867 case ISD::SETCC: {
868 SDValue Op0 = Op.getOperand(0);
869 SDValue Op1 = Op.getOperand(1);
870 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
871 // If (1) we only need the sign-bit, (2) the setcc operands are the same
872 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
873 // -1, we may be able to bypass the setcc.
874 if (DemandedBits.isSignMask() &&
878 // If we're testing X < 0, then this compare isn't needed - just use X!
879 // FIXME: We're limiting to integer types here, but this should also work
880 // if we don't care about FP signed-zero. The use of SETLT with FP means
881 // that we don't care about NaNs.
882 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
884 return Op0;
885 }
886 break;
887 }
889 // If none of the extended bits are demanded, eliminate the sextinreg.
890 SDValue Op0 = Op.getOperand(0);
891 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
892 unsigned ExBits = ExVT.getScalarSizeInBits();
893 if (DemandedBits.getActiveBits() <= ExBits &&
895 return Op0;
896 // If the input is already sign extended, just drop the extension.
897 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
898 if (NumSignBits >= (BitWidth - ExBits + 1))
899 return Op0;
900 break;
901 }
905 if (VT.isScalableVector())
906 return SDValue();
907
908 // If we only want the lowest element and none of extended bits, then we can
909 // return the bitcasted source vector.
910 SDValue Src = Op.getOperand(0);
911 EVT SrcVT = Src.getValueType();
912 EVT DstVT = Op.getValueType();
913 if (IsLE && DemandedElts == 1 &&
914 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
915 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
916 return DAG.getBitcast(DstVT, Src);
917 }
918 break;
919 }
921 if (VT.isScalableVector())
922 return SDValue();
923
924 // If we don't demand the inserted element, return the base vector.
925 SDValue Vec = Op.getOperand(0);
926 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
927 EVT VecVT = Vec.getValueType();
928 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
929 !DemandedElts[CIdx->getZExtValue()])
930 return Vec;
931 break;
932 }
934 if (VT.isScalableVector())
935 return SDValue();
936
937 SDValue Vec = Op.getOperand(0);
938 SDValue Sub = Op.getOperand(1);
939 uint64_t Idx = Op.getConstantOperandVal(2);
940 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
941 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
942 // If we don't demand the inserted subvector, return the base vector.
943 if (DemandedSubElts == 0)
944 return Vec;
945 break;
946 }
947 case ISD::VECTOR_SHUFFLE: {
949 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
950
951 // If all the demanded elts are from one operand and are inline,
952 // then we can use the operand directly.
953 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
954 for (unsigned i = 0; i != NumElts; ++i) {
955 int M = ShuffleMask[i];
956 if (M < 0 || !DemandedElts[i])
957 continue;
958 AllUndef = false;
959 IdentityLHS &= (M == (int)i);
960 IdentityRHS &= ((M - NumElts) == i);
961 }
962
963 if (AllUndef)
964 return DAG.getUNDEF(Op.getValueType());
965 if (IdentityLHS)
966 return Op.getOperand(0);
967 if (IdentityRHS)
968 return Op.getOperand(1);
969 break;
970 }
971 default:
972 // TODO: Probably okay to remove after audit; here to reduce change size
973 // in initial enablement patch for scalable vectors
974 if (VT.isScalableVector())
975 return SDValue();
976
977 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
979 Op, DemandedBits, DemandedElts, DAG, Depth))
980 return V;
981 break;
982 }
983 return SDValue();
984}
985
988 unsigned Depth) const {
989 EVT VT = Op.getValueType();
990 // Since the number of lanes in a scalable vector is unknown at compile time,
991 // we track one bit which is implicitly broadcast to all lanes. This means
992 // that all lanes in a scalable vector are considered demanded.
993 APInt DemandedElts = VT.isFixedLengthVector()
995 : APInt(1, 1);
996 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
997 Depth);
998}
999
1001 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
1002 unsigned Depth) const {
1003 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
1004 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1005 Depth);
1006}
1007
1008// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
1009// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
1012 const TargetLowering &TLI,
1013 const APInt &DemandedBits,
1014 const APInt &DemandedElts, unsigned Depth) {
1015 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
1016 "SRL or SRA node is required here!");
1017 // Is the right shift using an immediate value of 1?
1018 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
1019 if (!N1C || !N1C->isOne())
1020 return SDValue();
1021
1022 // We are looking for an avgfloor
1023 // add(ext, ext)
1024 // or one of these as a avgceil
1025 // add(add(ext, ext), 1)
1026 // add(add(ext, 1), ext)
1027 // add(ext, add(ext, 1))
1028 SDValue Add = Op.getOperand(0);
1029 if (Add.getOpcode() != ISD::ADD)
1030 return SDValue();
1031
1032 SDValue ExtOpA = Add.getOperand(0);
1033 SDValue ExtOpB = Add.getOperand(1);
1034 SDValue Add2;
1035 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1036 ConstantSDNode *ConstOp;
1037 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
1038 ConstOp->isOne()) {
1039 ExtOpA = Op1;
1040 ExtOpB = Op3;
1041 Add2 = A;
1042 return true;
1043 }
1044 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
1045 ConstOp->isOne()) {
1046 ExtOpA = Op1;
1047 ExtOpB = Op2;
1048 Add2 = A;
1049 return true;
1050 }
1051 return false;
1052 };
1053 bool IsCeil =
1054 (ExtOpA.getOpcode() == ISD::ADD &&
1055 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1056 (ExtOpB.getOpcode() == ISD::ADD &&
1057 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1058
1059 // If the shift is signed (sra):
1060 // - Needs >= 2 sign bit for both operands.
1061 // - Needs >= 2 zero bits.
1062 // If the shift is unsigned (srl):
1063 // - Needs >= 1 zero bit for both operands.
1064 // - Needs 1 demanded bit zero and >= 2 sign bits.
1065 SelectionDAG &DAG = TLO.DAG;
1066 unsigned ShiftOpc = Op.getOpcode();
1067 bool IsSigned = false;
1068 unsigned KnownBits;
1069 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1070 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1071 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1072 unsigned NumZeroA =
1073 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1074 unsigned NumZeroB =
1075 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1076 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1077
1078 switch (ShiftOpc) {
1079 default:
1080 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1081 case ISD::SRA: {
1082 if (NumZero >= 2 && NumSigned < NumZero) {
1083 IsSigned = false;
1084 KnownBits = NumZero;
1085 break;
1086 }
1087 if (NumSigned >= 1) {
1088 IsSigned = true;
1089 KnownBits = NumSigned;
1090 break;
1091 }
1092 return SDValue();
1093 }
1094 case ISD::SRL: {
1095 if (NumZero >= 1 && NumSigned < NumZero) {
1096 IsSigned = false;
1097 KnownBits = NumZero;
1098 break;
1099 }
1100 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1101 IsSigned = true;
1102 KnownBits = NumSigned;
1103 break;
1104 }
1105 return SDValue();
1106 }
1107 }
1108
1109 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1110 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1111
1112 // Find the smallest power-2 type that is legal for this vector size and
1113 // operation, given the original type size and the number of known sign/zero
1114 // bits.
1115 EVT VT = Op.getValueType();
1116 unsigned MinWidth =
1117 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1118 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1120 return SDValue();
1121 if (VT.isVector())
1122 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1123 if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1124 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1125 // larger type size to do the transform.
1126 if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1127 return SDValue();
1128 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1129 Add.getOperand(1)) &&
1130 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1131 Add2.getOperand(1))))
1132 NVT = VT;
1133 else
1134 return SDValue();
1135 }
1136
1137 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1138 // this is likely to stop other folds (reassociation, value tracking etc.)
1139 if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1140 (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1141 return SDValue();
1142
1143 SDLoc DL(Op);
1144 SDValue ResultAVG =
1145 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1146 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1147 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1148}
1149
1150/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1151/// result of Op are ever used downstream. If we can use this information to
1152/// simplify Op, create a new simplified DAG node and return true, returning the
1153/// original and new nodes in Old and New. Otherwise, analyze the expression and
1154/// return a mask of Known bits for the expression (used to simplify the
1155/// caller). The Known bits may only be accurate for those bits in the
1156/// OriginalDemandedBits and OriginalDemandedElts.
1158 SDValue Op, const APInt &OriginalDemandedBits,
1159 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1160 unsigned Depth, bool AssumeSingleUse) const {
1161 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1162 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1163 "Mask size mismatches value type size!");
1164
1165 // Don't know anything.
1166 Known = KnownBits(BitWidth);
1167
1168 EVT VT = Op.getValueType();
1169 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1170 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1171 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1172 "Unexpected vector size");
1173
1174 APInt DemandedBits = OriginalDemandedBits;
1175 APInt DemandedElts = OriginalDemandedElts;
1176 SDLoc dl(Op);
1177
1178 // Undef operand.
1179 if (Op.isUndef())
1180 return false;
1181
1182 // We can't simplify target constants.
1183 if (Op.getOpcode() == ISD::TargetConstant)
1184 return false;
1185
1186 if (Op.getOpcode() == ISD::Constant) {
1187 // We know all of the bits for a constant!
1188 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1189 return false;
1190 }
1191
1192 if (Op.getOpcode() == ISD::ConstantFP) {
1193 // We know all of the bits for a floating point constant!
1195 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1196 return false;
1197 }
1198
1199 // Other users may use these bits.
1200 bool HasMultiUse = false;
1201 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1203 // Limit search depth.
1204 return false;
1205 }
1206 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1208 DemandedElts = APInt::getAllOnes(NumElts);
1209 HasMultiUse = true;
1210 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1211 // Not demanding any bits/elts from Op.
1212 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1213 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1214 // Limit search depth.
1215 return false;
1216 }
1217
1218 KnownBits Known2;
1219 switch (Op.getOpcode()) {
1220 case ISD::SCALAR_TO_VECTOR: {
1221 if (VT.isScalableVector())
1222 return false;
1223 if (!DemandedElts[0])
1224 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1225
1226 KnownBits SrcKnown;
1227 SDValue Src = Op.getOperand(0);
1228 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1229 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1230 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1231 return true;
1232
1233 // Upper elements are undef, so only get the knownbits if we just demand
1234 // the bottom element.
1235 if (DemandedElts == 1)
1236 Known = SrcKnown.anyextOrTrunc(BitWidth);
1237 break;
1238 }
1239 case ISD::BUILD_VECTOR:
1240 // Collect the known bits that are shared by every demanded element.
1241 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1242 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1243 return false; // Don't fall through, will infinitely loop.
1244 case ISD::SPLAT_VECTOR: {
1245 SDValue Scl = Op.getOperand(0);
1246 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1247 KnownBits KnownScl;
1248 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1249 return true;
1250
1251 // Implicitly truncate the bits to match the official semantics of
1252 // SPLAT_VECTOR.
1253 Known = KnownScl.trunc(BitWidth);
1254 break;
1255 }
1256 case ISD::LOAD: {
1257 auto *LD = cast<LoadSDNode>(Op);
1258 if (getTargetConstantFromLoad(LD)) {
1259 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1260 return false; // Don't fall through, will infinitely loop.
1261 }
1262 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1263 // If this is a ZEXTLoad and we are looking at the loaded value.
1264 EVT MemVT = LD->getMemoryVT();
1265 unsigned MemBits = MemVT.getScalarSizeInBits();
1266 Known.Zero.setBitsFrom(MemBits);
1267 return false; // Don't fall through, will infinitely loop.
1268 }
1269 break;
1270 }
1272 if (VT.isScalableVector())
1273 return false;
1274 SDValue Vec = Op.getOperand(0);
1275 SDValue Scl = Op.getOperand(1);
1276 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1277 EVT VecVT = Vec.getValueType();
1278
1279 // If index isn't constant, assume we need all vector elements AND the
1280 // inserted element.
1281 APInt DemandedVecElts(DemandedElts);
1282 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1283 unsigned Idx = CIdx->getZExtValue();
1284 DemandedVecElts.clearBit(Idx);
1285
1286 // Inserted element is not required.
1287 if (!DemandedElts[Idx])
1288 return TLO.CombineTo(Op, Vec);
1289 }
1290
1291 KnownBits KnownScl;
1292 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1293 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1294 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1295 return true;
1296
1297 Known = KnownScl.anyextOrTrunc(BitWidth);
1298
1299 KnownBits KnownVec;
1300 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1301 Depth + 1))
1302 return true;
1303
1304 if (!!DemandedVecElts)
1305 Known = Known.intersectWith(KnownVec);
1306
1307 return false;
1308 }
1309 case ISD::INSERT_SUBVECTOR: {
1310 if (VT.isScalableVector())
1311 return false;
1312 // Demand any elements from the subvector and the remainder from the src its
1313 // inserted into.
1314 SDValue Src = Op.getOperand(0);
1315 SDValue Sub = Op.getOperand(1);
1316 uint64_t Idx = Op.getConstantOperandVal(2);
1317 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1318 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1319 APInt DemandedSrcElts = DemandedElts;
1320 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
1321
1322 KnownBits KnownSub, KnownSrc;
1323 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1324 Depth + 1))
1325 return true;
1326 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1327 Depth + 1))
1328 return true;
1329
1330 Known.setAllConflict();
1331 if (!!DemandedSubElts)
1332 Known = Known.intersectWith(KnownSub);
1333 if (!!DemandedSrcElts)
1334 Known = Known.intersectWith(KnownSrc);
1335
1336 // Attempt to avoid multi-use src if we don't need anything from it.
1337 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1338 !DemandedSrcElts.isAllOnes()) {
1340 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1342 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1343 if (NewSub || NewSrc) {
1344 NewSub = NewSub ? NewSub : Sub;
1345 NewSrc = NewSrc ? NewSrc : Src;
1346 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1347 Op.getOperand(2));
1348 return TLO.CombineTo(Op, NewOp);
1349 }
1350 }
1351 break;
1352 }
1354 if (VT.isScalableVector())
1355 return false;
1356 // Offset the demanded elts by the subvector index.
1357 SDValue Src = Op.getOperand(0);
1358 if (Src.getValueType().isScalableVector())
1359 break;
1360 uint64_t Idx = Op.getConstantOperandVal(1);
1361 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1362 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1363
1364 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1365 Depth + 1))
1366 return true;
1367
1368 // Attempt to avoid multi-use src if we don't need anything from it.
1369 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1371 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1372 if (DemandedSrc) {
1373 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1374 Op.getOperand(1));
1375 return TLO.CombineTo(Op, NewOp);
1376 }
1377 }
1378 break;
1379 }
1380 case ISD::CONCAT_VECTORS: {
1381 if (VT.isScalableVector())
1382 return false;
1383 Known.setAllConflict();
1384 EVT SubVT = Op.getOperand(0).getValueType();
1385 unsigned NumSubVecs = Op.getNumOperands();
1386 unsigned NumSubElts = SubVT.getVectorNumElements();
1387 for (unsigned i = 0; i != NumSubVecs; ++i) {
1388 APInt DemandedSubElts =
1389 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1390 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1391 Known2, TLO, Depth + 1))
1392 return true;
1393 // Known bits are shared by every demanded subvector element.
1394 if (!!DemandedSubElts)
1395 Known = Known.intersectWith(Known2);
1396 }
1397 break;
1398 }
1399 case ISD::VECTOR_SHUFFLE: {
1400 assert(!VT.isScalableVector());
1401 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1402
1403 // Collect demanded elements from shuffle operands..
1404 APInt DemandedLHS, DemandedRHS;
1405 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1406 DemandedRHS))
1407 break;
1408
1409 if (!!DemandedLHS || !!DemandedRHS) {
1410 SDValue Op0 = Op.getOperand(0);
1411 SDValue Op1 = Op.getOperand(1);
1412
1413 Known.setAllConflict();
1414 if (!!DemandedLHS) {
1415 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1416 Depth + 1))
1417 return true;
1418 Known = Known.intersectWith(Known2);
1419 }
1420 if (!!DemandedRHS) {
1421 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1422 Depth + 1))
1423 return true;
1424 Known = Known.intersectWith(Known2);
1425 }
1426
1427 // Attempt to avoid multi-use ops if we don't need anything from them.
1429 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1431 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1432 if (DemandedOp0 || DemandedOp1) {
1433 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1434 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1435 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1436 return TLO.CombineTo(Op, NewOp);
1437 }
1438 }
1439 break;
1440 }
1441 case ISD::AND: {
1442 SDValue Op0 = Op.getOperand(0);
1443 SDValue Op1 = Op.getOperand(1);
1444
1445 // If the RHS is a constant, check to see if the LHS would be zero without
1446 // using the bits from the RHS. Below, we use knowledge about the RHS to
1447 // simplify the LHS, here we're using information from the LHS to simplify
1448 // the RHS.
1449 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1450 // Do not increment Depth here; that can cause an infinite loop.
1451 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1452 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1453 if ((LHSKnown.Zero & DemandedBits) ==
1454 (~RHSC->getAPIntValue() & DemandedBits))
1455 return TLO.CombineTo(Op, Op0);
1456
1457 // If any of the set bits in the RHS are known zero on the LHS, shrink
1458 // the constant.
1459 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1460 DemandedElts, TLO))
1461 return true;
1462
1463 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1464 // constant, but if this 'and' is only clearing bits that were just set by
1465 // the xor, then this 'and' can be eliminated by shrinking the mask of
1466 // the xor. For example, for a 32-bit X:
1467 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1468 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1469 LHSKnown.One == ~RHSC->getAPIntValue()) {
1470 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1471 return TLO.CombineTo(Op, Xor);
1472 }
1473 }
1474
1475 // (X +/- Y) & Y --> ~X & Y when Y is a power of 2 (or zero).
1476 SDValue X, Y;
1477 if (sd_match(Op,
1478 m_And(m_Value(Y),
1480 m_Sub(m_Value(X), m_Deferred(Y)))))) &&
1481 TLO.DAG.isKnownToBeAPowerOfTwo(Y, DemandedElts, /*OrZero=*/true)) {
1482 return TLO.CombineTo(
1483 Op, TLO.DAG.getNode(ISD::AND, dl, VT, TLO.DAG.getNOT(dl, X, VT), Y));
1484 }
1485
1486 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1487 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1488 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1489 (Op0.getOperand(0).isUndef() ||
1491 Op0->hasOneUse()) {
1492 unsigned NumSubElts =
1494 unsigned SubIdx = Op0.getConstantOperandVal(2);
1495 APInt DemandedSub =
1496 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1497 KnownBits KnownSubMask =
1498 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1499 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1500 SDValue NewAnd =
1501 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1502 SDValue NewInsert =
1503 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1504 Op0.getOperand(1), Op0.getOperand(2));
1505 return TLO.CombineTo(Op, NewInsert);
1506 }
1507 }
1508
1509 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1510 Depth + 1))
1511 return true;
1512 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1513 Known2, TLO, Depth + 1))
1514 return true;
1515
1516 // If all of the demanded bits are known one on one side, return the other.
1517 // These bits cannot contribute to the result of the 'and'.
1518 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1519 return TLO.CombineTo(Op, Op0);
1520 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1521 return TLO.CombineTo(Op, Op1);
1522 // If all of the demanded bits in the inputs are known zeros, return zero.
1523 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1524 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1525 // If the RHS is a constant, see if we can simplify it.
1526 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1527 TLO))
1528 return true;
1529 // If the operation can be done in a smaller type, do so.
1531 return true;
1532
1533 // Attempt to avoid multi-use ops if we don't need anything from them.
1534 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1536 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1538 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1539 if (DemandedOp0 || DemandedOp1) {
1540 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1541 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1542 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1543 return TLO.CombineTo(Op, NewOp);
1544 }
1545 }
1546
1547 Known &= Known2;
1548 break;
1549 }
1550 case ISD::OR: {
1551 SDValue Op0 = Op.getOperand(0);
1552 SDValue Op1 = Op.getOperand(1);
1553 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1554 Depth + 1)) {
1555 Op->dropFlags(SDNodeFlags::Disjoint);
1556 return true;
1557 }
1558
1559 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1560 Known2, TLO, Depth + 1)) {
1561 Op->dropFlags(SDNodeFlags::Disjoint);
1562 return true;
1563 }
1564
1565 // If all of the demanded bits are known zero on one side, return the other.
1566 // These bits cannot contribute to the result of the 'or'.
1567 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1568 return TLO.CombineTo(Op, Op0);
1569 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1570 return TLO.CombineTo(Op, Op1);
1571 // If the RHS is a constant, see if we can simplify it.
1572 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1573 return true;
1574 // If the operation can be done in a smaller type, do so.
1576 return true;
1577
1578 // Attempt to avoid multi-use ops if we don't need anything from them.
1579 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1581 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1583 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1584 if (DemandedOp0 || DemandedOp1) {
1585 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1586 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1587 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1588 return TLO.CombineTo(Op, NewOp);
1589 }
1590 }
1591
1592 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1593 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1594 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1595 Op0->hasOneUse() && Op1->hasOneUse()) {
1596 // Attempt to match all commutations - m_c_Or would've been useful!
1597 for (int I = 0; I != 2; ++I) {
1598 SDValue X = Op.getOperand(I).getOperand(0);
1599 SDValue C1 = Op.getOperand(I).getOperand(1);
1600 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1601 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1602 if (Alt.getOpcode() == ISD::OR) {
1603 for (int J = 0; J != 2; ++J) {
1604 if (X == Alt.getOperand(J)) {
1605 SDValue Y = Alt.getOperand(1 - J);
1606 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1607 {C1, C2})) {
1608 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1609 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1610 return TLO.CombineTo(
1611 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1612 }
1613 }
1614 }
1615 }
1616 }
1617 }
1618
1619 Known |= Known2;
1620 break;
1621 }
1622 case ISD::XOR: {
1623 SDValue Op0 = Op.getOperand(0);
1624 SDValue Op1 = Op.getOperand(1);
1625
1626 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1627 Depth + 1))
1628 return true;
1629 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1630 Depth + 1))
1631 return true;
1632
1633 // If all of the demanded bits are known zero on one side, return the other.
1634 // These bits cannot contribute to the result of the 'xor'.
1635 if (DemandedBits.isSubsetOf(Known.Zero))
1636 return TLO.CombineTo(Op, Op0);
1637 if (DemandedBits.isSubsetOf(Known2.Zero))
1638 return TLO.CombineTo(Op, Op1);
1639 // If the operation can be done in a smaller type, do so.
1641 return true;
1642
1643 // If all of the unknown bits are known to be zero on one side or the other
1644 // turn this into an *inclusive* or.
1645 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1646 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1647 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1648
1649 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1650 if (C) {
1651 // If one side is a constant, and all of the set bits in the constant are
1652 // also known set on the other side, turn this into an AND, as we know
1653 // the bits will be cleared.
1654 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1655 // NB: it is okay if more bits are known than are requested
1656 if (C->getAPIntValue() == Known2.One) {
1657 SDValue ANDC =
1658 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1659 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1660 }
1661
1662 // If the RHS is a constant, see if we can change it. Don't alter a -1
1663 // constant because that's a 'not' op, and that is better for combining
1664 // and codegen.
1665 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1666 // We're flipping all demanded bits. Flip the undemanded bits too.
1667 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1668 return TLO.CombineTo(Op, New);
1669 }
1670
1671 unsigned Op0Opcode = Op0.getOpcode();
1672 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1673 if (ConstantSDNode *ShiftC =
1674 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1675 // Don't crash on an oversized shift. We can not guarantee that a
1676 // bogus shift has been simplified to undef.
1677 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1678 uint64_t ShiftAmt = ShiftC->getZExtValue();
1680 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1681 : Ones.lshr(ShiftAmt);
1682 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1684 // If the xor constant is a demanded mask, do a 'not' before the
1685 // shift:
1686 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1687 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1688 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1689 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1690 Op0.getOperand(1)));
1691 }
1692 }
1693 }
1694 }
1695 }
1696
1697 // If we can't turn this into a 'not', try to shrink the constant.
1698 if (!C || !C->isAllOnes())
1699 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1700 return true;
1701
1702 // Attempt to avoid multi-use ops if we don't need anything from them.
1703 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1705 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1707 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1708 if (DemandedOp0 || DemandedOp1) {
1709 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1710 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1711 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1712 return TLO.CombineTo(Op, NewOp);
1713 }
1714 }
1715
1716 Known ^= Known2;
1717 break;
1718 }
1719 case ISD::SELECT:
1720 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1721 Known, TLO, Depth + 1))
1722 return true;
1723 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1724 Known2, TLO, Depth + 1))
1725 return true;
1726
1727 // If the operands are constants, see if we can simplify them.
1728 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1729 return true;
1730
1731 // Only known if known in both the LHS and RHS.
1732 Known = Known.intersectWith(Known2);
1733 break;
1734 case ISD::VSELECT:
1735 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1736 Known, TLO, Depth + 1))
1737 return true;
1738 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1739 Known2, TLO, Depth + 1))
1740 return true;
1741
1742 // Only known if known in both the LHS and RHS.
1743 Known = Known.intersectWith(Known2);
1744 break;
1745 case ISD::SELECT_CC:
1746 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1747 Known, TLO, Depth + 1))
1748 return true;
1749 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1750 Known2, TLO, Depth + 1))
1751 return true;
1752
1753 // If the operands are constants, see if we can simplify them.
1754 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1755 return true;
1756
1757 // Only known if known in both the LHS and RHS.
1758 Known = Known.intersectWith(Known2);
1759 break;
1760 case ISD::SETCC: {
1761 SDValue Op0 = Op.getOperand(0);
1762 SDValue Op1 = Op.getOperand(1);
1763 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1764 // If we're testing X < 0, X >= 0, X <= -1 or X > -1
1765 // (X is of integer type) then we only need the sign mask of the previous
1766 // result
1767 if (Op1.getValueType().isInteger() &&
1768 (((CC == ISD::SETLT || CC == ISD::SETGE) && isNullOrNullSplat(Op1)) ||
1769 ((CC == ISD::SETLE || CC == ISD::SETGT) &&
1770 isAllOnesOrAllOnesSplat(Op1)))) {
1771 KnownBits KnownOp0;
1774 DemandedElts, KnownOp0, TLO, Depth + 1))
1775 return true;
1776 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1777 // width as the setcc result, and (3) the result of a setcc conforms to 0
1778 // or -1, we may be able to bypass the setcc.
1779 if (DemandedBits.isSignMask() &&
1783 // If we remove a >= 0 or > -1 (for integers), we need to introduce a
1784 // NOT Operation
1785 if (CC == ISD::SETGE || CC == ISD::SETGT) {
1786 SDLoc DL(Op);
1787 EVT VT = Op0.getValueType();
1788 SDValue NotOp0 = TLO.DAG.getNOT(DL, Op0, VT);
1789 return TLO.CombineTo(Op, NotOp0);
1790 }
1791 return TLO.CombineTo(Op, Op0);
1792 }
1793 }
1794 if (getBooleanContents(Op0.getValueType()) ==
1796 BitWidth > 1)
1797 Known.Zero.setBitsFrom(1);
1798 break;
1799 }
1800 case ISD::SHL: {
1801 SDValue Op0 = Op.getOperand(0);
1802 SDValue Op1 = Op.getOperand(1);
1803 EVT ShiftVT = Op1.getValueType();
1804
1805 if (std::optional<unsigned> KnownSA =
1806 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1807 unsigned ShAmt = *KnownSA;
1808 if (ShAmt == 0)
1809 return TLO.CombineTo(Op, Op0);
1810
1811 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1812 // single shift. We can do this if the bottom bits (which are shifted
1813 // out) are never demanded.
1814 // TODO - support non-uniform vector amounts.
1815 if (Op0.getOpcode() == ISD::SRL) {
1816 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1817 if (std::optional<unsigned> InnerSA =
1818 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1819 unsigned C1 = *InnerSA;
1820 unsigned Opc = ISD::SHL;
1821 int Diff = ShAmt - C1;
1822 if (Diff < 0) {
1823 Diff = -Diff;
1824 Opc = ISD::SRL;
1825 }
1826 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1827 return TLO.CombineTo(
1828 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1829 }
1830 }
1831 }
1832
1833 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1834 // are not demanded. This will likely allow the anyext to be folded away.
1835 // TODO - support non-uniform vector amounts.
1836 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1837 SDValue InnerOp = Op0.getOperand(0);
1838 EVT InnerVT = InnerOp.getValueType();
1839 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1840 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1841 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1842 SDValue NarrowShl = TLO.DAG.getNode(
1843 ISD::SHL, dl, InnerVT, InnerOp,
1844 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1845 return TLO.CombineTo(
1846 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1847 }
1848
1849 // Repeat the SHL optimization above in cases where an extension
1850 // intervenes: (shl (anyext (shr x, c1)), c2) to
1851 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1852 // aren't demanded (as above) and that the shifted upper c1 bits of
1853 // x aren't demanded.
1854 // TODO - support non-uniform vector amounts.
1855 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1856 InnerOp.hasOneUse()) {
1857 if (std::optional<unsigned> SA2 = TLO.DAG.getValidShiftAmount(
1858 InnerOp, DemandedElts, Depth + 2)) {
1859 unsigned InnerShAmt = *SA2;
1860 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1861 DemandedBits.getActiveBits() <=
1862 (InnerBits - InnerShAmt + ShAmt) &&
1863 DemandedBits.countr_zero() >= ShAmt) {
1864 SDValue NewSA =
1865 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1866 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1867 InnerOp.getOperand(0));
1868 return TLO.CombineTo(
1869 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1870 }
1871 }
1872 }
1873 }
1874
1875 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1876 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1877 Depth + 1)) {
1878 // Disable the nsw and nuw flags. We can no longer guarantee that we
1879 // won't wrap after simplification.
1880 Op->dropFlags(SDNodeFlags::NoWrap);
1881 return true;
1882 }
1883 Known <<= ShAmt;
1884 // low bits known zero.
1885 Known.Zero.setLowBits(ShAmt);
1886
1887 // Attempt to avoid multi-use ops if we don't need anything from them.
1888 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1890 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1891 if (DemandedOp0) {
1892 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1893 return TLO.CombineTo(Op, NewOp);
1894 }
1895 }
1896
1897 // TODO: Can we merge this fold with the one below?
1898 // Try shrinking the operation as long as the shift amount will still be
1899 // in range.
1900 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1901 Op.getNode()->hasOneUse()) {
1902 // Search for the smallest integer type with free casts to and from
1903 // Op's type. For expedience, just check power-of-2 integer types.
1904 unsigned DemandedSize = DemandedBits.getActiveBits();
1905 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1906 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1907 EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1908 if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&
1909 isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1910 isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1911 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1912 assert(DemandedSize <= SmallVTBits &&
1913 "Narrowed below demanded bits?");
1914 // We found a type with free casts.
1915 SDValue NarrowShl = TLO.DAG.getNode(
1916 ISD::SHL, dl, SmallVT,
1917 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1918 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1919 return TLO.CombineTo(
1920 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1921 }
1922 }
1923 }
1924
1925 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1926 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1927 // Only do this if we demand the upper half so the knownbits are correct.
1928 unsigned HalfWidth = BitWidth / 2;
1929 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1930 DemandedBits.countLeadingOnes() >= HalfWidth) {
1931 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1932 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1933 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1934 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1935 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1936 // If we're demanding the upper bits at all, we must ensure
1937 // that the upper bits of the shift result are known to be zero,
1938 // which is equivalent to the narrow shift being NUW.
1939 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1940 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1941 SDNodeFlags Flags;
1942 Flags.setNoSignedWrap(IsNSW);
1943 Flags.setNoUnsignedWrap(IsNUW);
1944 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1945 SDValue NewShiftAmt =
1946 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1947 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1948 NewShiftAmt, Flags);
1949 SDValue NewExt =
1950 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1951 return TLO.CombineTo(Op, NewExt);
1952 }
1953 }
1954 }
1955 } else {
1956 // This is a variable shift, so we can't shift the demand mask by a known
1957 // amount. But if we are not demanding high bits, then we are not
1958 // demanding those bits from the pre-shifted operand either.
1959 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1960 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1961 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1962 Depth + 1)) {
1963 // Disable the nsw and nuw flags. We can no longer guarantee that we
1964 // won't wrap after simplification.
1965 Op->dropFlags(SDNodeFlags::NoWrap);
1966 return true;
1967 }
1968 Known.resetAll();
1969 }
1970 }
1971
1972 // If we are only demanding sign bits then we can use the shift source
1973 // directly.
1974 if (std::optional<unsigned> MaxSA =
1975 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1976 unsigned ShAmt = *MaxSA;
1977 unsigned NumSignBits =
1978 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1979 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1980 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1981 return TLO.CombineTo(Op, Op0);
1982 }
1983 break;
1984 }
1985 case ISD::SRL: {
1986 SDValue Op0 = Op.getOperand(0);
1987 SDValue Op1 = Op.getOperand(1);
1988 EVT ShiftVT = Op1.getValueType();
1989
1990 if (std::optional<unsigned> KnownSA =
1991 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1992 unsigned ShAmt = *KnownSA;
1993 if (ShAmt == 0)
1994 return TLO.CombineTo(Op, Op0);
1995
1996 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1997 // single shift. We can do this if the top bits (which are shifted out)
1998 // are never demanded.
1999 // TODO - support non-uniform vector amounts.
2000 if (Op0.getOpcode() == ISD::SHL) {
2001 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
2002 if (std::optional<unsigned> InnerSA =
2003 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2004 unsigned C1 = *InnerSA;
2005 unsigned Opc = ISD::SRL;
2006 int Diff = ShAmt - C1;
2007 if (Diff < 0) {
2008 Diff = -Diff;
2009 Opc = ISD::SHL;
2010 }
2011 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
2012 return TLO.CombineTo(
2013 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
2014 }
2015 }
2016 }
2017
2018 // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
2019 // single sra. We can do this if the top bits are never demanded.
2020 if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
2021 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
2022 if (std::optional<unsigned> InnerSA =
2023 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2024 unsigned C1 = *InnerSA;
2025 // Clamp the combined shift amount if it exceeds the bit width.
2026 unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
2027 SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
2028 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
2029 Op0.getOperand(0), NewSA));
2030 }
2031 }
2032 }
2033
2034 APInt InDemandedMask = (DemandedBits << ShAmt);
2035
2036 // If the shift is exact, then it does demand the low bits (and knows that
2037 // they are zero).
2038 if (Op->getFlags().hasExact())
2039 InDemandedMask.setLowBits(ShAmt);
2040
2041 // Narrow shift to lower half - similar to ShrinkDemandedOp.
2042 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2043 if ((BitWidth % 2) == 0 && !VT.isVector()) {
2045 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
2046 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
2047 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
2048 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
2049 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
2050 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2051 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
2052 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
2053 SDValue NewShiftAmt =
2054 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
2055 SDValue NewShift =
2056 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
2057 return TLO.CombineTo(
2058 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
2059 }
2060 }
2061
2062 // Compute the new bits that are at the top now.
2063 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2064 Depth + 1))
2065 return true;
2066 Known >>= ShAmt;
2067 // High bits known zero.
2068 Known.Zero.setHighBits(ShAmt);
2069
2070 // Attempt to avoid multi-use ops if we don't need anything from them.
2071 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2073 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2074 if (DemandedOp0) {
2075 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2076 return TLO.CombineTo(Op, NewOp);
2077 }
2078 }
2079 } else {
2080 // Use generic knownbits computation as it has support for non-uniform
2081 // shift amounts.
2082 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2083 }
2084
2085 // If we are only demanding sign bits then we can use the shift source
2086 // directly.
2087 if (std::optional<unsigned> MaxSA =
2088 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
2089 unsigned ShAmt = *MaxSA;
2090 // Must already be signbits in DemandedBits bounds, and can't demand any
2091 // shifted in zeroes.
2092 if (DemandedBits.countl_zero() >= ShAmt) {
2093 unsigned NumSignBits =
2094 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
2095 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2096 return TLO.CombineTo(Op, Op0);
2097 }
2098 }
2099
2100 // Try to match AVG patterns (after shift simplification).
2101 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2102 DemandedElts, Depth + 1))
2103 return TLO.CombineTo(Op, AVG);
2104
2105 break;
2106 }
2107 case ISD::SRA: {
2108 SDValue Op0 = Op.getOperand(0);
2109 SDValue Op1 = Op.getOperand(1);
2110 EVT ShiftVT = Op1.getValueType();
2111
2112 // If we only want bits that already match the signbit then we don't need
2113 // to shift.
2114 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2115 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2116 NumHiDemandedBits)
2117 return TLO.CombineTo(Op, Op0);
2118
2119 // If this is an arithmetic shift right and only the low-bit is set, we can
2120 // always convert this into a logical shr, even if the shift amount is
2121 // variable. The low bit of the shift cannot be an input sign bit unless
2122 // the shift amount is >= the size of the datatype, which is undefined.
2123 if (DemandedBits.isOne())
2124 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2125
2126 if (std::optional<unsigned> KnownSA =
2127 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2128 unsigned ShAmt = *KnownSA;
2129 if (ShAmt == 0)
2130 return TLO.CombineTo(Op, Op0);
2131
2132 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2133 // supports sext_inreg.
2134 if (Op0.getOpcode() == ISD::SHL) {
2135 if (std::optional<unsigned> InnerSA =
2136 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2137 unsigned LowBits = BitWidth - ShAmt;
2138 EVT ExtVT = VT.changeElementType(
2139 *TLO.DAG.getContext(),
2140 EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits));
2141
2142 if (*InnerSA == ShAmt) {
2143 if (!TLO.LegalOperations() ||
2145 return TLO.CombineTo(
2146 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2147 Op0.getOperand(0),
2148 TLO.DAG.getValueType(ExtVT)));
2149
2150 // Even if we can't convert to sext_inreg, we might be able to
2151 // remove this shift pair if the input is already sign extended.
2152 unsigned NumSignBits =
2153 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2154 if (NumSignBits > ShAmt)
2155 return TLO.CombineTo(Op, Op0.getOperand(0));
2156 }
2157 }
2158 }
2159
2160 APInt InDemandedMask = (DemandedBits << ShAmt);
2161
2162 // If the shift is exact, then it does demand the low bits (and knows that
2163 // they are zero).
2164 if (Op->getFlags().hasExact())
2165 InDemandedMask.setLowBits(ShAmt);
2166
2167 // If any of the demanded bits are produced by the sign extension, we also
2168 // demand the input sign bit.
2169 if (DemandedBits.countl_zero() < ShAmt)
2170 InDemandedMask.setSignBit();
2171
2172 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2173 Depth + 1))
2174 return true;
2175 Known >>= ShAmt;
2176
2177 // If the input sign bit is known to be zero, or if none of the top bits
2178 // are demanded, turn this into an unsigned shift right.
2179 if (Known.Zero[BitWidth - ShAmt - 1] ||
2180 DemandedBits.countl_zero() >= ShAmt) {
2181 SDNodeFlags Flags;
2182 Flags.setExact(Op->getFlags().hasExact());
2183 return TLO.CombineTo(
2184 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2185 }
2186
2187 int Log2 = DemandedBits.exactLogBase2();
2188 if (Log2 >= 0) {
2189 // The bit must come from the sign.
2190 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2191 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2192 }
2193
2194 if (Known.One[BitWidth - ShAmt - 1])
2195 // New bits are known one.
2196 Known.One.setHighBits(ShAmt);
2197
2198 // Attempt to avoid multi-use ops if we don't need anything from them.
2199 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2201 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2202 if (DemandedOp0) {
2203 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2204 return TLO.CombineTo(Op, NewOp);
2205 }
2206 }
2207 }
2208
2209 // Try to match AVG patterns (after shift simplification).
2210 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2211 DemandedElts, Depth + 1))
2212 return TLO.CombineTo(Op, AVG);
2213
2214 break;
2215 }
2216 case ISD::FSHL:
2217 case ISD::FSHR: {
2218 SDValue Op0 = Op.getOperand(0);
2219 SDValue Op1 = Op.getOperand(1);
2220 SDValue Op2 = Op.getOperand(2);
2221 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2222
2223 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2224 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2225
2226 // For fshl, 0-shift returns the 1st arg.
2227 // For fshr, 0-shift returns the 2nd arg.
2228 if (Amt == 0) {
2229 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2230 Known, TLO, Depth + 1))
2231 return true;
2232 break;
2233 }
2234
2235 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2236 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2237 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2238 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2239 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2240 Depth + 1))
2241 return true;
2242 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2243 Depth + 1))
2244 return true;
2245
2246 Known2 <<= (IsFSHL ? Amt : (BitWidth - Amt));
2247 Known >>= (IsFSHL ? (BitWidth - Amt) : Amt);
2248 Known = Known.unionWith(Known2);
2249
2250 // Attempt to avoid multi-use ops if we don't need anything from them.
2251 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2252 !DemandedElts.isAllOnes()) {
2254 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2256 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2257 if (DemandedOp0 || DemandedOp1) {
2258 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2259 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2260 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2261 DemandedOp1, Op2);
2262 return TLO.CombineTo(Op, NewOp);
2263 }
2264 }
2265 }
2266
2267 if (isPowerOf2_32(BitWidth)) {
2268 // Fold FSHR(Op0,Op1,Op2) -> SRL(Op1,Op2)
2269 // iff we're guaranteed not to use Op0.
2270 // TODO: Add FSHL equivalent?
2271 if (!IsFSHL && !DemandedBits.isAllOnes() &&
2272 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT))) {
2273 KnownBits KnownAmt =
2274 TLO.DAG.computeKnownBits(Op2, DemandedElts, Depth + 1);
2275 unsigned MaxShiftAmt =
2276 KnownAmt.getMaxValue().getLimitedValue(BitWidth - 1);
2277 // Check we don't demand any shifted bits outside Op1.
2278 if (DemandedBits.countl_zero() >= MaxShiftAmt) {
2279 EVT AmtVT = Op2.getValueType();
2280 SDValue NewAmt =
2281 TLO.DAG.getNode(ISD::AND, dl, AmtVT, Op2,
2282 TLO.DAG.getConstant(BitWidth - 1, dl, AmtVT));
2283 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, Op1, NewAmt);
2284 return TLO.CombineTo(Op, NewOp);
2285 }
2286 }
2287
2288 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2289 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2290 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts, Known2, TLO,
2291 Depth + 1))
2292 return true;
2293 }
2294 break;
2295 }
2296 case ISD::ROTL:
2297 case ISD::ROTR: {
2298 SDValue Op0 = Op.getOperand(0);
2299 SDValue Op1 = Op.getOperand(1);
2300 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2301
2302 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2303 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2304 return TLO.CombineTo(Op, Op0);
2305
2306 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2307 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2308 unsigned RevAmt = BitWidth - Amt;
2309
2310 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2311 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2312 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2313 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2314 Depth + 1))
2315 return true;
2316
2317 // rot*(x, 0) --> x
2318 if (Amt == 0)
2319 return TLO.CombineTo(Op, Op0);
2320
2321 // See if we don't demand either half of the rotated bits.
2322 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2323 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2324 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2325 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2326 }
2327 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2328 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2329 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2330 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2331 }
2332 }
2333
2334 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2335 if (isPowerOf2_32(BitWidth)) {
2336 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2337 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2338 Depth + 1))
2339 return true;
2340 }
2341 break;
2342 }
2343 case ISD::SMIN:
2344 case ISD::SMAX:
2345 case ISD::UMIN:
2346 case ISD::UMAX: {
2347 unsigned Opc = Op.getOpcode();
2348 SDValue Op0 = Op.getOperand(0);
2349 SDValue Op1 = Op.getOperand(1);
2350
2351 // If we're only demanding signbits, then we can simplify to OR/AND node.
2352 unsigned BitOp =
2353 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2354 unsigned NumSignBits =
2355 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2356 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2357 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2358 if (NumSignBits >= NumDemandedUpperBits)
2359 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2360
2361 // Check if one arg is always less/greater than (or equal) to the other arg.
2362 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2363 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2364 switch (Opc) {
2365 case ISD::SMIN:
2366 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2367 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2368 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2369 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2370 Known = KnownBits::smin(Known0, Known1);
2371 break;
2372 case ISD::SMAX:
2373 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2374 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2375 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2376 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2377 Known = KnownBits::smax(Known0, Known1);
2378 break;
2379 case ISD::UMIN:
2380 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2381 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2382 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2383 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2384 Known = KnownBits::umin(Known0, Known1);
2385 break;
2386 case ISD::UMAX:
2387 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2388 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2389 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2390 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2391 Known = KnownBits::umax(Known0, Known1);
2392 break;
2393 }
2394 break;
2395 }
2396 case ISD::BITREVERSE: {
2397 SDValue Src = Op.getOperand(0);
2398 APInt DemandedSrcBits = DemandedBits.reverseBits();
2399 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2400 Depth + 1))
2401 return true;
2402 Known = Known2.reverseBits();
2403 break;
2404 }
2405 case ISD::BSWAP: {
2406 SDValue Src = Op.getOperand(0);
2407
2408 // If the only bits demanded come from one byte of the bswap result,
2409 // just shift the input byte into position to eliminate the bswap.
2410 unsigned NLZ = DemandedBits.countl_zero();
2411 unsigned NTZ = DemandedBits.countr_zero();
2412
2413 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2414 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2415 // have 14 leading zeros, round to 8.
2416 NLZ = alignDown(NLZ, 8);
2417 NTZ = alignDown(NTZ, 8);
2418 // If we need exactly one byte, we can do this transformation.
2419 if (BitWidth - NLZ - NTZ == 8) {
2420 // Replace this with either a left or right shift to get the byte into
2421 // the right place.
2422 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2423 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2424 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2425 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2426 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2427 return TLO.CombineTo(Op, NewOp);
2428 }
2429 }
2430
2431 APInt DemandedSrcBits = DemandedBits.byteSwap();
2432 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2433 Depth + 1))
2434 return true;
2435 Known = Known2.byteSwap();
2436 break;
2437 }
2438 case ISD::CTPOP: {
2439 // If only 1 bit is demanded, replace with PARITY as long as we're before
2440 // op legalization.
2441 // FIXME: Limit to scalars for now.
2442 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2443 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2444 Op.getOperand(0)));
2445
2446 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2447 break;
2448 }
2450 SDValue Op0 = Op.getOperand(0);
2451 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2452 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2453
2454 // If we only care about the highest bit, don't bother shifting right.
2455 if (DemandedBits.isSignMask()) {
2456 unsigned MinSignedBits =
2457 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2458 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2459 // However if the input is already sign extended we expect the sign
2460 // extension to be dropped altogether later and do not simplify.
2461 if (!AlreadySignExtended) {
2462 // Compute the correct shift amount type, which must be getShiftAmountTy
2463 // for scalar types after legalization.
2464 SDValue ShiftAmt =
2465 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2466 return TLO.CombineTo(Op,
2467 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2468 }
2469 }
2470
2471 // If none of the extended bits are demanded, eliminate the sextinreg.
2472 if (DemandedBits.getActiveBits() <= ExVTBits)
2473 return TLO.CombineTo(Op, Op0);
2474
2475 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2476
2477 // Since the sign extended bits are demanded, we know that the sign
2478 // bit is demanded.
2479 InputDemandedBits.setBit(ExVTBits - 1);
2480
2481 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2482 Depth + 1))
2483 return true;
2484
2485 // If the sign bit of the input is known set or clear, then we know the
2486 // top bits of the result.
2487
2488 // If the input sign bit is known zero, convert this into a zero extension.
2489 if (Known.Zero[ExVTBits - 1])
2490 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2491
2492 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2493 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2494 Known.One.setBitsFrom(ExVTBits);
2495 Known.Zero &= Mask;
2496 } else { // Input sign bit unknown
2497 Known.Zero &= Mask;
2498 Known.One &= Mask;
2499 }
2500 break;
2501 }
2502 case ISD::BUILD_PAIR: {
2503 EVT HalfVT = Op.getOperand(0).getValueType();
2504 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2505
2506 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2507 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2508
2509 KnownBits KnownLo, KnownHi;
2510
2511 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2512 return true;
2513
2514 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2515 return true;
2516
2517 Known = KnownHi.concat(KnownLo);
2518 break;
2519 }
2521 if (VT.isScalableVector())
2522 return false;
2523 [[fallthrough]];
2524 case ISD::ZERO_EXTEND: {
2525 SDValue Src = Op.getOperand(0);
2526 EVT SrcVT = Src.getValueType();
2527 unsigned InBits = SrcVT.getScalarSizeInBits();
2528 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2529 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2530
2531 // If none of the top bits are demanded, convert this into an any_extend.
2532 if (DemandedBits.getActiveBits() <= InBits) {
2533 // If we only need the non-extended bits of the bottom element
2534 // then we can just bitcast to the result.
2535 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2536 VT.getSizeInBits() == SrcVT.getSizeInBits())
2537 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2538
2539 unsigned Opc =
2541 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2542 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2543 }
2544
2545 APInt InDemandedBits = DemandedBits.trunc(InBits);
2546 APInt InDemandedElts = DemandedElts.zext(InElts);
2547 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2548 Depth + 1)) {
2549 Op->dropFlags(SDNodeFlags::NonNeg);
2550 return true;
2551 }
2552 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2553 Known = Known.zext(BitWidth);
2554
2555 // Attempt to avoid multi-use ops if we don't need anything from them.
2557 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2558 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2559 break;
2560 }
2562 if (VT.isScalableVector())
2563 return false;
2564 [[fallthrough]];
2565 case ISD::SIGN_EXTEND: {
2566 SDValue Src = Op.getOperand(0);
2567 EVT SrcVT = Src.getValueType();
2568 unsigned InBits = SrcVT.getScalarSizeInBits();
2569 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2570 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2571
2572 APInt InDemandedElts = DemandedElts.zext(InElts);
2573 APInt InDemandedBits = DemandedBits.trunc(InBits);
2574
2575 // Since some of the sign extended bits are demanded, we know that the sign
2576 // bit is demanded.
2577 InDemandedBits.setBit(InBits - 1);
2578
2579 // If none of the top bits are demanded, convert this into an any_extend.
2580 if (DemandedBits.getActiveBits() <= InBits) {
2581 // If we only need the non-extended bits of the bottom element
2582 // then we can just bitcast to the result.
2583 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2584 VT.getSizeInBits() == SrcVT.getSizeInBits())
2585 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2586
2587 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2589 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2590 InBits) {
2591 unsigned Opc =
2593 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2594 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2595 }
2596 }
2597
2598 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2599 Depth + 1))
2600 return true;
2601 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2602
2603 // If the sign bit is known one, the top bits match.
2604 Known = Known.sext(BitWidth);
2605
2606 // If the sign bit is known zero, convert this to a zero extend.
2607 if (Known.isNonNegative()) {
2608 unsigned Opc =
2610 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2611 SDNodeFlags Flags;
2612 if (!IsVecInReg)
2613 Flags |= SDNodeFlags::NonNeg;
2614 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2615 }
2616 }
2617
2618 // Attempt to avoid multi-use ops if we don't need anything from them.
2620 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2621 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2622 break;
2623 }
2625 if (VT.isScalableVector())
2626 return false;
2627 [[fallthrough]];
2628 case ISD::ANY_EXTEND: {
2629 SDValue Src = Op.getOperand(0);
2630 EVT SrcVT = Src.getValueType();
2631 unsigned InBits = SrcVT.getScalarSizeInBits();
2632 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2633 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2634
2635 // If we only need the bottom element then we can just bitcast.
2636 // TODO: Handle ANY_EXTEND?
2637 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2638 VT.getSizeInBits() == SrcVT.getSizeInBits())
2639 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2640
2641 APInt InDemandedBits = DemandedBits.trunc(InBits);
2642 APInt InDemandedElts = DemandedElts.zext(InElts);
2643 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2644 Depth + 1))
2645 return true;
2646 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2647 Known = Known.anyext(BitWidth);
2648
2649 // Attempt to avoid multi-use ops if we don't need anything from them.
2651 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2652 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2653 break;
2654 }
2655 case ISD::TRUNCATE: {
2656 SDValue Src = Op.getOperand(0);
2657
2658 // Simplify the input, using demanded bit information, and compute the known
2659 // zero/one bits live out.
2660 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2661 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2662 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2663 Depth + 1)) {
2664 // Disable the nsw and nuw flags. We can no longer guarantee that we
2665 // won't wrap after simplification.
2666 Op->dropFlags(SDNodeFlags::NoWrap);
2667 return true;
2668 }
2669 Known = Known.trunc(BitWidth);
2670
2671 // Attempt to avoid multi-use ops if we don't need anything from them.
2673 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2674 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2675
2676 // If the input is only used by this truncate, see if we can shrink it based
2677 // on the known demanded bits.
2678 switch (Src.getOpcode()) {
2679 default:
2680 break;
2681 case ISD::SRL:
2682 // Shrink SRL by a constant if none of the high bits shifted in are
2683 // demanded.
2684 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2685 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2686 // undesirable.
2687 break;
2688
2689 if (Src.getNode()->hasOneUse()) {
2690 if (isTruncateFree(Src, VT) &&
2691 !isTruncateFree(Src.getValueType(), VT)) {
2692 // If truncate is only free at trunc(srl), do not turn it into
2693 // srl(trunc). The check is done by first check the truncate is free
2694 // at Src's opcode(srl), then check the truncate is not done by
2695 // referencing sub-register. In test, if both trunc(srl) and
2696 // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2697 // trunc(srl)'s trunc is free, trunc(srl) is better.
2698 break;
2699 }
2700
2701 std::optional<unsigned> ShAmtC =
2702 TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2703 if (!ShAmtC || *ShAmtC >= BitWidth)
2704 break;
2705 unsigned ShVal = *ShAmtC;
2706
2707 APInt HighBits =
2708 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2709 HighBits.lshrInPlace(ShVal);
2710 HighBits = HighBits.trunc(BitWidth);
2711 if (!(HighBits & DemandedBits)) {
2712 // None of the shifted in bits are needed. Add a truncate of the
2713 // shift input, then shift it.
2714 SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2715 SDValue NewTrunc =
2716 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2717 return TLO.CombineTo(
2718 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2719 }
2720 }
2721 break;
2722 }
2723
2724 break;
2725 }
2726 case ISD::AssertZext: {
2727 // AssertZext demands all of the high bits, plus any of the low bits
2728 // demanded by its users.
2729 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2731 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2732 TLO, Depth + 1))
2733 return true;
2734
2735 Known.Zero |= ~InMask;
2736 Known.One &= (~Known.Zero);
2737 break;
2738 }
2740 SDValue Src = Op.getOperand(0);
2741 SDValue Idx = Op.getOperand(1);
2742 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2743 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2744
2745 if (SrcEltCnt.isScalable())
2746 return false;
2747
2748 // Demand the bits from every vector element without a constant index.
2749 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2750 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2751 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2752 if (CIdx->getAPIntValue().ult(NumSrcElts))
2753 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2754
2755 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2756 // anything about the extended bits.
2757 APInt DemandedSrcBits = DemandedBits;
2758 if (BitWidth > EltBitWidth)
2759 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2760
2761 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2762 Depth + 1))
2763 return true;
2764
2765 // Attempt to avoid multi-use ops if we don't need anything from them.
2766 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2767 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2768 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2769 SDValue NewOp =
2770 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2771 return TLO.CombineTo(Op, NewOp);
2772 }
2773 }
2774
2775 Known = Known2;
2776 if (BitWidth > EltBitWidth)
2777 Known = Known.anyext(BitWidth);
2778 break;
2779 }
2780 case ISD::BITCAST: {
2781 if (VT.isScalableVector())
2782 return false;
2783 SDValue Src = Op.getOperand(0);
2784 EVT SrcVT = Src.getValueType();
2785 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2786
2787 // If this is an FP->Int bitcast and if the sign bit is the only
2788 // thing demanded, turn this into a FGETSIGN.
2789 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2790 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2791 SrcVT.isFloatingPoint()) {
2793 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2794 // place. We expect the SHL to be eliminated by other optimizations.
2795 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, VT, Src);
2796 unsigned ShVal = Op.getValueSizeInBits() - 1;
2797 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2798 return TLO.CombineTo(Op,
2799 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2800 }
2801 }
2802
2803 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2804 // Demand the elt/bit if any of the original elts/bits are demanded.
2805 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2806 unsigned Scale = BitWidth / NumSrcEltBits;
2807 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2808 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2809 for (unsigned i = 0; i != Scale; ++i) {
2810 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2811 unsigned BitOffset = EltOffset * NumSrcEltBits;
2812 DemandedSrcBits |= DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2813 }
2814 // Recursive calls below may turn not demanded elements into poison, so we
2815 // need to demand all smaller source elements that maps to a demanded
2816 // destination element.
2817 APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
2818
2819 APInt KnownSrcUndef, KnownSrcZero;
2820 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2821 KnownSrcZero, TLO, Depth + 1))
2822 return true;
2823
2824 KnownBits KnownSrcBits;
2825 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2826 KnownSrcBits, TLO, Depth + 1))
2827 return true;
2828 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2829 // TODO - bigendian once we have test coverage.
2830 unsigned Scale = NumSrcEltBits / BitWidth;
2831 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2832 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2833 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2834 for (unsigned i = 0; i != NumElts; ++i)
2835 if (DemandedElts[i]) {
2836 unsigned Offset = (i % Scale) * BitWidth;
2837 DemandedSrcBits.insertBits(DemandedBits, Offset);
2838 DemandedSrcElts.setBit(i / Scale);
2839 }
2840
2841 if (SrcVT.isVector()) {
2842 APInt KnownSrcUndef, KnownSrcZero;
2843 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2844 KnownSrcZero, TLO, Depth + 1))
2845 return true;
2846 }
2847
2848 KnownBits KnownSrcBits;
2849 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2850 KnownSrcBits, TLO, Depth + 1))
2851 return true;
2852
2853 // Attempt to avoid multi-use ops if we don't need anything from them.
2854 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2855 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2856 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2857 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2858 return TLO.CombineTo(Op, NewOp);
2859 }
2860 }
2861 }
2862
2863 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2864 // recursive call where Known may be useful to the caller.
2865 if (Depth > 0) {
2866 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2867 return false;
2868 }
2869 break;
2870 }
2871 case ISD::MUL:
2872 if (DemandedBits.isPowerOf2()) {
2873 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2874 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2875 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2876 unsigned CTZ = DemandedBits.countr_zero();
2877 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2878 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2879 SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2880 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2881 return TLO.CombineTo(Op, Shl);
2882 }
2883 }
2884 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2885 // X * X is odd iff X is odd.
2886 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2887 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2888 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2889 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2890 return TLO.CombineTo(Op, And1);
2891 }
2892 [[fallthrough]];
2893 case ISD::PTRADD:
2894 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
2895 break;
2896 // PTRADD behaves like ADD if pointers are represented as integers.
2897 [[fallthrough]];
2898 case ISD::ADD:
2899 case ISD::SUB: {
2900 // Add, Sub, and Mul don't demand any bits in positions beyond that
2901 // of the highest bit demanded of them.
2902 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2903 SDNodeFlags Flags = Op.getNode()->getFlags();
2904 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2905 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2906 KnownBits KnownOp0, KnownOp1;
2907 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2908 const KnownBits &KnownRHS) {
2909 if (Op.getOpcode() == ISD::MUL)
2910 Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2911 return Demanded;
2912 };
2913 if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2914 Depth + 1) ||
2915 SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2916 DemandedElts, KnownOp0, TLO, Depth + 1) ||
2917 // See if the operation should be performed at a smaller bit width.
2919 // Disable the nsw and nuw flags. We can no longer guarantee that we
2920 // won't wrap after simplification.
2921 Op->dropFlags(SDNodeFlags::NoWrap);
2922 return true;
2923 }
2924
2925 // neg x with only low bit demanded is simply x.
2926 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2927 isNullConstant(Op0))
2928 return TLO.CombineTo(Op, Op1);
2929
2930 // Attempt to avoid multi-use ops if we don't need anything from them.
2931 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2933 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2935 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2936 if (DemandedOp0 || DemandedOp1) {
2937 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2938 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2939 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
2940 Flags & ~SDNodeFlags::NoWrap);
2941 return TLO.CombineTo(Op, NewOp);
2942 }
2943 }
2944
2945 // If we have a constant operand, we may be able to turn it into -1 if we
2946 // do not demand the high bits. This can make the constant smaller to
2947 // encode, allow more general folding, or match specialized instruction
2948 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2949 // is probably not useful (and could be detrimental).
2951 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2952 if (C && !C->isAllOnes() && !C->isOne() &&
2953 (C->getAPIntValue() | HighMask).isAllOnes()) {
2954 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2955 // Disable the nsw and nuw flags. We can no longer guarantee that we
2956 // won't wrap after simplification.
2957 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1,
2958 Flags & ~SDNodeFlags::NoWrap);
2959 return TLO.CombineTo(Op, NewOp);
2960 }
2961
2962 // Match a multiply with a disguised negated-power-of-2 and convert to a
2963 // an equivalent shift-left amount.
2964 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2965 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2966 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2967 return 0;
2968
2969 // Don't touch opaque constants. Also, ignore zero and power-of-2
2970 // multiplies. Those will get folded later.
2971 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2972 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2973 !MulC->getAPIntValue().isPowerOf2()) {
2974 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2975 if (UnmaskedC.isNegatedPowerOf2())
2976 return (-UnmaskedC).logBase2();
2977 }
2978 return 0;
2979 };
2980
2981 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2982 unsigned ShlAmt) {
2983 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2984 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2985 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2986 return TLO.CombineTo(Op, Res);
2987 };
2988
2990 if (Op.getOpcode() == ISD::ADD) {
2991 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2992 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2993 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2994 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2995 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2996 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2997 }
2998 if (Op.getOpcode() == ISD::SUB) {
2999 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
3000 if (unsigned ShAmt = getShiftLeftAmt(Op1))
3001 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
3002 }
3003 }
3004
3005 if (Op.getOpcode() == ISD::MUL) {
3006 Known = KnownBits::mul(KnownOp0, KnownOp1);
3007 } else { // Op.getOpcode() is either ISD::ADD, ISD::PTRADD, or ISD::SUB.
3009 Op.getOpcode() != ISD::SUB, Flags.hasNoSignedWrap(),
3010 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
3011 }
3012 break;
3013 }
3014 case ISD::FABS: {
3015 SDValue Op0 = Op.getOperand(0);
3016 APInt SignMask = APInt::getSignMask(BitWidth);
3017
3018 if (!DemandedBits.intersects(SignMask))
3019 return TLO.CombineTo(Op, Op0);
3020
3021 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
3022 Depth + 1))
3023 return true;
3024
3025 if (Known.isNonNegative())
3026 return TLO.CombineTo(Op, Op0);
3027 if (Known.isNegative())
3028 return TLO.CombineTo(
3029 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT, Op0, Op->getFlags()));
3030
3031 Known.Zero |= SignMask;
3032 Known.One &= ~SignMask;
3033
3034 break;
3035 }
3036 case ISD::FCOPYSIGN: {
3037 SDValue Op0 = Op.getOperand(0);
3038 SDValue Op1 = Op.getOperand(1);
3039
3040 unsigned BitWidth0 = Op0.getScalarValueSizeInBits();
3041 unsigned BitWidth1 = Op1.getScalarValueSizeInBits();
3042 APInt SignMask0 = APInt::getSignMask(BitWidth0);
3043 APInt SignMask1 = APInt::getSignMask(BitWidth1);
3044
3045 if (!DemandedBits.intersects(SignMask0))
3046 return TLO.CombineTo(Op, Op0);
3047
3048 if (SimplifyDemandedBits(Op0, ~SignMask0 & DemandedBits, DemandedElts,
3049 Known, TLO, Depth + 1) ||
3050 SimplifyDemandedBits(Op1, SignMask1, DemandedElts, Known2, TLO,
3051 Depth + 1))
3052 return true;
3053
3054 if (Known2.isNonNegative())
3055 return TLO.CombineTo(
3056 Op, TLO.DAG.getNode(ISD::FABS, dl, VT, Op0, Op->getFlags()));
3057
3058 if (Known2.isNegative())
3059 return TLO.CombineTo(
3060 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT,
3061 TLO.DAG.getNode(ISD::FABS, SDLoc(Op0), VT, Op0)));
3062
3063 Known.Zero &= ~SignMask0;
3064 Known.One &= ~SignMask0;
3065 break;
3066 }
3067 case ISD::FNEG: {
3068 SDValue Op0 = Op.getOperand(0);
3069 APInt SignMask = APInt::getSignMask(BitWidth);
3070
3071 if (!DemandedBits.intersects(SignMask))
3072 return TLO.CombineTo(Op, Op0);
3073
3074 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
3075 Depth + 1))
3076 return true;
3077
3078 if (!Known.isSignUnknown()) {
3079 Known.Zero ^= SignMask;
3080 Known.One ^= SignMask;
3081 }
3082
3083 break;
3084 }
3085 default:
3086 // We also ask the target about intrinsics (which could be specific to it).
3087 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3088 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3089 // TODO: Probably okay to remove after audit; here to reduce change size
3090 // in initial enablement patch for scalable vectors
3091 if (Op.getValueType().isScalableVector())
3092 break;
3094 Known, TLO, Depth))
3095 return true;
3096 break;
3097 }
3098
3099 // Just use computeKnownBits to compute output bits.
3100 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
3101 break;
3102 }
3103
3104 // If we know the value of all of the demanded bits, return this as a
3105 // constant.
3107 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
3108 // Avoid folding to a constant if any OpaqueConstant is involved.
3109 if (llvm::any_of(Op->ops(), [](SDValue V) {
3110 auto *C = dyn_cast<ConstantSDNode>(V);
3111 return C && C->isOpaque();
3112 }))
3113 return false;
3114 if (VT.isInteger())
3115 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
3116 if (VT.isFloatingPoint())
3117 return TLO.CombineTo(
3118 Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
3119 dl, VT));
3120 }
3121
3122 // A multi use 'all demanded elts' simplify failed to find any knownbits.
3123 // Try again just for the original demanded elts.
3124 // Ensure we do this AFTER constant folding above.
3125 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3126 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
3127
3128 return false;
3129}
3130
3132 const APInt &DemandedElts,
3133 DAGCombinerInfo &DCI) const {
3134 SelectionDAG &DAG = DCI.DAG;
3135 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3136 !DCI.isBeforeLegalizeOps());
3137
3138 APInt KnownUndef, KnownZero;
3139 bool Simplified =
3140 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
3141 if (Simplified) {
3142 DCI.AddToWorklist(Op.getNode());
3143 DCI.CommitTargetLoweringOpt(TLO);
3144 }
3145
3146 return Simplified;
3147}
3148
3149/// Given a vector binary operation and known undefined elements for each input
3150/// operand, compute whether each element of the output is undefined.
3152 const APInt &UndefOp0,
3153 const APInt &UndefOp1) {
3154 EVT VT = BO.getValueType();
3156 "Vector binop only");
3157
3158 EVT EltVT = VT.getVectorElementType();
3159 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3160 assert(UndefOp0.getBitWidth() == NumElts &&
3161 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3162
3163 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3164 const APInt &UndefVals) {
3165 if (UndefVals[Index])
3166 return DAG.getUNDEF(EltVT);
3167
3168 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3169 // Try hard to make sure that the getNode() call is not creating temporary
3170 // nodes. Ignore opaque integers because they do not constant fold.
3171 SDValue Elt = BV->getOperand(Index);
3172 auto *C = dyn_cast<ConstantSDNode>(Elt);
3173 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3174 return Elt;
3175 }
3176
3177 return SDValue();
3178 };
3179
3180 APInt KnownUndef = APInt::getZero(NumElts);
3181 for (unsigned i = 0; i != NumElts; ++i) {
3182 // If both inputs for this element are either constant or undef and match
3183 // the element type, compute the constant/undef result for this element of
3184 // the vector.
3185 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3186 // not handle FP constants. The code within getNode() should be refactored
3187 // to avoid the danger of creating a bogus temporary node here.
3188 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3189 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3190 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3191 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3192 KnownUndef.setBit(i);
3193 }
3194 return KnownUndef;
3195}
3196
3198 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3199 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3200 bool AssumeSingleUse) const {
3201 EVT VT = Op.getValueType();
3202 unsigned Opcode = Op.getOpcode();
3203 APInt DemandedElts = OriginalDemandedElts;
3204 unsigned NumElts = DemandedElts.getBitWidth();
3205 assert(VT.isVector() && "Expected vector op");
3206
3207 KnownUndef = KnownZero = APInt::getZero(NumElts);
3208
3210 return false;
3211
3212 // TODO: For now we assume we know nothing about scalable vectors.
3213 if (VT.isScalableVector())
3214 return false;
3215
3216 assert(VT.getVectorNumElements() == NumElts &&
3217 "Mask size mismatches value type element count!");
3218
3219 // Undef operand.
3220 if (Op.isUndef()) {
3221 KnownUndef.setAllBits();
3222 return false;
3223 }
3224
3225 // If Op has other users, assume that all elements are needed.
3226 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3227 DemandedElts.setAllBits();
3228
3229 // Not demanding any elements from Op.
3230 if (DemandedElts == 0) {
3231 KnownUndef.setAllBits();
3232 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3233 }
3234
3235 // Limit search depth.
3237 return false;
3238
3239 SDLoc DL(Op);
3240 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3241 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3242
3243 // Helper for demanding the specified elements and all the bits of both binary
3244 // operands.
3245 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3246 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3247 TLO.DAG, Depth + 1);
3248 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3249 TLO.DAG, Depth + 1);
3250 if (NewOp0 || NewOp1) {
3251 SDValue NewOp =
3252 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3253 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3254 return TLO.CombineTo(Op, NewOp);
3255 }
3256 return false;
3257 };
3258
3259 switch (Opcode) {
3260 case ISD::SCALAR_TO_VECTOR: {
3261 if (!DemandedElts[0]) {
3262 KnownUndef.setAllBits();
3263 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3264 }
3265 KnownUndef.setHighBits(NumElts - 1);
3266 break;
3267 }
3268 case ISD::BITCAST: {
3269 SDValue Src = Op.getOperand(0);
3270 EVT SrcVT = Src.getValueType();
3271
3272 if (!SrcVT.isVector()) {
3273 // TODO - bigendian once we have test coverage.
3274 if (IsLE) {
3275 APInt DemandedSrcBits = APInt::getZero(SrcVT.getSizeInBits());
3276 unsigned EltSize = VT.getScalarSizeInBits();
3277 for (unsigned I = 0; I != NumElts; ++I) {
3278 if (DemandedElts[I]) {
3279 unsigned Offset = I * EltSize;
3280 DemandedSrcBits.setBits(Offset, Offset + EltSize);
3281 }
3282 }
3283 KnownBits Known;
3284 if (SimplifyDemandedBits(Src, DemandedSrcBits, Known, TLO, Depth + 1))
3285 return true;
3286 }
3287 break;
3288 }
3289
3290 // Fast handling of 'identity' bitcasts.
3291 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3292 if (NumSrcElts == NumElts)
3293 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3294 KnownZero, TLO, Depth + 1);
3295
3296 APInt SrcDemandedElts, SrcZero, SrcUndef;
3297
3298 // Bitcast from 'large element' src vector to 'small element' vector, we
3299 // must demand a source element if any DemandedElt maps to it.
3300 if ((NumElts % NumSrcElts) == 0) {
3301 unsigned Scale = NumElts / NumSrcElts;
3302 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3303 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3304 TLO, Depth + 1))
3305 return true;
3306
3307 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3308 // of the large element.
3309 // TODO - bigendian once we have test coverage.
3310 if (IsLE) {
3311 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3312 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3313 for (unsigned i = 0; i != NumElts; ++i)
3314 if (DemandedElts[i]) {
3315 unsigned Ofs = (i % Scale) * EltSizeInBits;
3316 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3317 }
3318
3319 KnownBits Known;
3320 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3321 TLO, Depth + 1))
3322 return true;
3323
3324 // The bitcast has split each wide element into a number of
3325 // narrow subelements. We have just computed the Known bits
3326 // for wide elements. See if element splitting results in
3327 // some subelements being zero. Only for demanded elements!
3328 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3329 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3330 .isAllOnes())
3331 continue;
3332 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3333 unsigned Elt = Scale * SrcElt + SubElt;
3334 if (DemandedElts[Elt])
3335 KnownZero.setBit(Elt);
3336 }
3337 }
3338 }
3339
3340 // If the src element is zero/undef then all the output elements will be -
3341 // only demanded elements are guaranteed to be correct.
3342 for (unsigned i = 0; i != NumSrcElts; ++i) {
3343 if (SrcDemandedElts[i]) {
3344 if (SrcZero[i])
3345 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3346 if (SrcUndef[i])
3347 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3348 }
3349 }
3350 }
3351
3352 // Bitcast from 'small element' src vector to 'large element' vector, we
3353 // demand all smaller source elements covered by the larger demanded element
3354 // of this vector.
3355 if ((NumSrcElts % NumElts) == 0) {
3356 unsigned Scale = NumSrcElts / NumElts;
3357 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3358 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3359 TLO, Depth + 1))
3360 return true;
3361
3362 // If all the src elements covering an output element are zero/undef, then
3363 // the output element will be as well, assuming it was demanded.
3364 for (unsigned i = 0; i != NumElts; ++i) {
3365 if (DemandedElts[i]) {
3366 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3367 KnownZero.setBit(i);
3368 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3369 KnownUndef.setBit(i);
3370 }
3371 }
3372 }
3373 break;
3374 }
3375 case ISD::FREEZE: {
3376 SDValue N0 = Op.getOperand(0);
3378 N0, DemandedElts, UndefPoisonKind::UndefOrPoison, Depth + 1))
3379 return TLO.CombineTo(Op, N0);
3380
3381 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3382 // freeze(op(x, ...)) -> op(freeze(x), ...).
3383 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3384 return TLO.CombineTo(
3386 TLO.DAG.getFreeze(N0.getOperand(0))));
3387 break;
3388 }
3389 case ISD::BUILD_VECTOR: {
3390 // Check all elements and simplify any unused elements with UNDEF.
3391 if (!DemandedElts.isAllOnes()) {
3392 // Don't simplify BROADCASTS.
3393 if (llvm::any_of(Op->op_values(),
3394 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3396 bool Updated = false;
3397 for (unsigned i = 0; i != NumElts; ++i) {
3398 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3399 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3400 KnownUndef.setBit(i);
3401 Updated = true;
3402 }
3403 }
3404 if (Updated)
3405 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3406 }
3407 }
3408 for (unsigned i = 0; i != NumElts; ++i) {
3409 SDValue SrcOp = Op.getOperand(i);
3410 if (SrcOp.isUndef()) {
3411 KnownUndef.setBit(i);
3412 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3414 KnownZero.setBit(i);
3415 }
3416 }
3417 break;
3418 }
3419 case ISD::CONCAT_VECTORS: {
3420 EVT SubVT = Op.getOperand(0).getValueType();
3421 unsigned NumSubVecs = Op.getNumOperands();
3422 unsigned NumSubElts = SubVT.getVectorNumElements();
3423 for (unsigned i = 0; i != NumSubVecs; ++i) {
3424 SDValue SubOp = Op.getOperand(i);
3425 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3426 APInt SubUndef, SubZero;
3427 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3428 Depth + 1))
3429 return true;
3430 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3431 KnownZero.insertBits(SubZero, i * NumSubElts);
3432 }
3433
3434 // Attempt to avoid multi-use ops if we don't need anything from them.
3435 if (!DemandedElts.isAllOnes()) {
3436 bool FoundNewSub = false;
3437 SmallVector<SDValue, 2> DemandedSubOps;
3438 for (unsigned i = 0; i != NumSubVecs; ++i) {
3439 SDValue SubOp = Op.getOperand(i);
3440 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3442 SubOp, SubElts, TLO.DAG, Depth + 1);
3443 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3444 FoundNewSub = NewSubOp ? true : FoundNewSub;
3445 }
3446 if (FoundNewSub) {
3447 SDValue NewOp =
3448 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3449 return TLO.CombineTo(Op, NewOp);
3450 }
3451 }
3452 break;
3453 }
3454 case ISD::INSERT_SUBVECTOR: {
3455 // Demand any elements from the subvector and the remainder from the src it
3456 // is inserted into.
3457 SDValue Src = Op.getOperand(0);
3458 SDValue Sub = Op.getOperand(1);
3459 uint64_t Idx = Op.getConstantOperandVal(2);
3460 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3461 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3462 APInt DemandedSrcElts = DemandedElts;
3463 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
3464
3465 // If none of the sub operand elements are demanded, bypass the insert.
3466 if (!DemandedSubElts)
3467 return TLO.CombineTo(Op, Src);
3468
3469 APInt SubUndef, SubZero;
3470 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3471 Depth + 1))
3472 return true;
3473
3474 // If none of the src operand elements are demanded, replace it with undef.
3475 if (!DemandedSrcElts && !Src.isUndef())
3476 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3477 TLO.DAG.getUNDEF(VT), Sub,
3478 Op.getOperand(2)));
3479
3480 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3481 TLO, Depth + 1))
3482 return true;
3483 KnownUndef.insertBits(SubUndef, Idx);
3484 KnownZero.insertBits(SubZero, Idx);
3485
3486 // Attempt to avoid multi-use ops if we don't need anything from them.
3487 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3489 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3491 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3492 if (NewSrc || NewSub) {
3493 NewSrc = NewSrc ? NewSrc : Src;
3494 NewSub = NewSub ? NewSub : Sub;
3495 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3496 NewSub, Op.getOperand(2));
3497 return TLO.CombineTo(Op, NewOp);
3498 }
3499 }
3500 break;
3501 }
3503 // Offset the demanded elts by the subvector index.
3504 SDValue Src = Op.getOperand(0);
3505 if (Src.getValueType().isScalableVector())
3506 break;
3507 uint64_t Idx = Op.getConstantOperandVal(1);
3508 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3509 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3510
3511 APInt SrcUndef, SrcZero;
3512 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3513 Depth + 1))
3514 return true;
3515 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3516 KnownZero = SrcZero.extractBits(NumElts, Idx);
3517
3518 // Attempt to avoid multi-use ops if we don't need anything from them.
3519 if (!DemandedElts.isAllOnes()) {
3521 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3522 if (NewSrc) {
3523 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3524 Op.getOperand(1));
3525 return TLO.CombineTo(Op, NewOp);
3526 }
3527 }
3528 break;
3529 }
3531 SDValue Vec = Op.getOperand(0);
3532 SDValue Scl = Op.getOperand(1);
3533 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3534
3535 // For a legal, constant insertion index, if we don't need this insertion
3536 // then strip it, else remove it from the demanded elts.
3537 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3538 unsigned Idx = CIdx->getZExtValue();
3539 if (!DemandedElts[Idx])
3540 return TLO.CombineTo(Op, Vec);
3541
3542 APInt DemandedVecElts(DemandedElts);
3543 DemandedVecElts.clearBit(Idx);
3544 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3545 KnownZero, TLO, Depth + 1))
3546 return true;
3547
3548 KnownUndef.setBitVal(Idx, Scl.isUndef());
3549
3550 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3551 break;
3552 }
3553
3554 APInt VecUndef, VecZero;
3555 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3556 Depth + 1))
3557 return true;
3558 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3559 break;
3560 }
3561 case ISD::VSELECT: {
3562 SDValue Sel = Op.getOperand(0);
3563 SDValue LHS = Op.getOperand(1);
3564 SDValue RHS = Op.getOperand(2);
3565
3566 // Try to transform the select condition based on the current demanded
3567 // elements.
3568 APInt UndefSel, ZeroSel;
3569 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3570 Depth + 1))
3571 return true;
3572
3573 // See if we can simplify either vselect operand.
3574 APInt DemandedLHS(DemandedElts);
3575 APInt DemandedRHS(DemandedElts);
3576 APInt UndefLHS, ZeroLHS;
3577 APInt UndefRHS, ZeroRHS;
3578 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3579 Depth + 1))
3580 return true;
3581 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3582 Depth + 1))
3583 return true;
3584
3585 KnownUndef = UndefLHS & UndefRHS;
3586 KnownZero = ZeroLHS & ZeroRHS;
3587
3588 // If we know that the selected element is always zero, we don't need the
3589 // select value element.
3590 APInt DemandedSel = DemandedElts & ~KnownZero;
3591 if (DemandedSel != DemandedElts)
3592 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3593 Depth + 1))
3594 return true;
3595
3596 break;
3597 }
3598 case ISD::VECTOR_SHUFFLE: {
3599 SDValue LHS = Op.getOperand(0);
3600 SDValue RHS = Op.getOperand(1);
3601 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3602
3603 // Collect demanded elements from shuffle operands..
3604 APInt DemandedLHS(NumElts, 0);
3605 APInt DemandedRHS(NumElts, 0);
3606 for (unsigned i = 0; i != NumElts; ++i) {
3607 int M = ShuffleMask[i];
3608 if (M < 0 || !DemandedElts[i])
3609 continue;
3610 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3611 if (M < (int)NumElts)
3612 DemandedLHS.setBit(M);
3613 else
3614 DemandedRHS.setBit(M - NumElts);
3615 }
3616
3617 // If either side isn't demanded, replace it by UNDEF. We handle this
3618 // explicitly here to also simplify in case of multiple uses (on the
3619 // contrary to the SimplifyDemandedVectorElts calls below).
3620 bool FoldLHS = !DemandedLHS && !LHS.isUndef();
3621 bool FoldRHS = !DemandedRHS && !RHS.isUndef();
3622 if (FoldLHS || FoldRHS) {
3623 LHS = FoldLHS ? TLO.DAG.getUNDEF(LHS.getValueType()) : LHS;
3624 RHS = FoldRHS ? TLO.DAG.getUNDEF(RHS.getValueType()) : RHS;
3625 SDValue NewOp =
3626 TLO.DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, ShuffleMask);
3627 return TLO.CombineTo(Op, NewOp);
3628 }
3629
3630 // See if we can simplify either shuffle operand.
3631 APInt UndefLHS, ZeroLHS;
3632 APInt UndefRHS, ZeroRHS;
3633 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3634 Depth + 1))
3635 return true;
3636 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3637 Depth + 1))
3638 return true;
3639
3640 // Simplify mask using undef elements from LHS/RHS.
3641 bool Updated = false;
3642 bool IdentityLHS = true, IdentityRHS = true;
3643 SmallVector<int, 32> NewMask(ShuffleMask);
3644 for (unsigned i = 0; i != NumElts; ++i) {
3645 int &M = NewMask[i];
3646 if (M < 0)
3647 continue;
3648 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3649 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3650 Updated = true;
3651 M = -1;
3652 }
3653 IdentityLHS &= (M < 0) || (M == (int)i);
3654 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3655 }
3656
3657 // Update legal shuffle masks based on demanded elements if it won't reduce
3658 // to Identity which can cause premature removal of the shuffle mask.
3659 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3660 SDValue LegalShuffle =
3661 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3662 if (LegalShuffle)
3663 return TLO.CombineTo(Op, LegalShuffle);
3664 }
3665
3666 // Propagate undef/zero elements from LHS/RHS.
3667 for (unsigned i = 0; i != NumElts; ++i) {
3668 int M = ShuffleMask[i];
3669 if (M < 0) {
3670 KnownUndef.setBit(i);
3671 } else if (M < (int)NumElts) {
3672 if (UndefLHS[M])
3673 KnownUndef.setBit(i);
3674 if (ZeroLHS[M])
3675 KnownZero.setBit(i);
3676 } else {
3677 if (UndefRHS[M - NumElts])
3678 KnownUndef.setBit(i);
3679 if (ZeroRHS[M - NumElts])
3680 KnownZero.setBit(i);
3681 }
3682 }
3683 break;
3684 }
3688 APInt SrcUndef, SrcZero;
3689 SDValue Src = Op.getOperand(0);
3690 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3691 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3692 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3693 Depth + 1))
3694 return true;
3695 KnownZero = SrcZero.zextOrTrunc(NumElts);
3696 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3697
3698 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3699 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3700 DemandedSrcElts == 1) {
3701 // aext - if we just need the bottom element then we can bitcast.
3702 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3703 }
3704
3705 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3706 // zext(undef) upper bits are guaranteed to be zero.
3707 if (DemandedElts.isSubsetOf(KnownUndef))
3708 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3709 KnownUndef.clearAllBits();
3710
3711 // zext - if we just need the bottom element then we can mask:
3712 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3713 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3714 Op->isOnlyUserOf(Src.getNode()) &&
3715 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3716 SDLoc DL(Op);
3717 EVT SrcVT = Src.getValueType();
3718 EVT SrcSVT = SrcVT.getScalarType();
3719
3720 // If we're after type legalization and SrcSVT is not legal, use the
3721 // promoted type for creating constants to avoid creating nodes with
3722 // illegal types.
3724 SrcSVT = getLegalTypeToTransformTo(*TLO.DAG.getContext(), SrcSVT);
3725
3726 SmallVector<SDValue> MaskElts;
3727 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3728 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3729 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3730 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3731 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3732 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3733 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3734 }
3735 }
3736 }
3737 break;
3738 }
3739
3740 // TODO: There are more binop opcodes that could be handled here - MIN,
3741 // MAX, saturated math, etc.
3742 case ISD::ADD: {
3743 SDValue Op0 = Op.getOperand(0);
3744 SDValue Op1 = Op.getOperand(1);
3745 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3746 APInt UndefLHS, ZeroLHS;
3747 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3748 Depth + 1, /*AssumeSingleUse*/ true))
3749 return true;
3750 }
3751 [[fallthrough]];
3752 }
3753 case ISD::AVGCEILS:
3754 case ISD::AVGCEILU:
3755 case ISD::AVGFLOORS:
3756 case ISD::AVGFLOORU:
3757 case ISD::OR:
3758 case ISD::XOR:
3759 case ISD::SUB:
3760 case ISD::FADD:
3761 case ISD::FSUB:
3762 case ISD::FMUL:
3763 case ISD::FDIV:
3764 case ISD::FREM: {
3765 SDValue Op0 = Op.getOperand(0);
3766 SDValue Op1 = Op.getOperand(1);
3767
3768 APInt UndefRHS, ZeroRHS;
3769 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3770 Depth + 1))
3771 return true;
3772 APInt UndefLHS, ZeroLHS;
3773 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3774 Depth + 1))
3775 return true;
3776
3777 KnownZero = ZeroLHS & ZeroRHS;
3778 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3779
3780 // Attempt to avoid multi-use ops if we don't need anything from them.
3781 // TODO - use KnownUndef to relax the demandedelts?
3782 if (!DemandedElts.isAllOnes())
3783 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3784 return true;
3785 break;
3786 }
3787 case ISD::SHL:
3788 case ISD::SRL:
3789 case ISD::SRA:
3790 case ISD::ROTL:
3791 case ISD::ROTR: {
3792 SDValue Op0 = Op.getOperand(0);
3793 SDValue Op1 = Op.getOperand(1);
3794
3795 APInt UndefRHS, ZeroRHS;
3796 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3797 Depth + 1))
3798 return true;
3799 APInt UndefLHS, ZeroLHS;
3800 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3801 Depth + 1))
3802 return true;
3803
3804 KnownZero = ZeroLHS;
3805 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3806
3807 // Attempt to avoid multi-use ops if we don't need anything from them.
3808 // TODO - use KnownUndef to relax the demandedelts?
3809 if (!DemandedElts.isAllOnes())
3810 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3811 return true;
3812 break;
3813 }
3814 case ISD::MUL:
3815 case ISD::MULHU:
3816 case ISD::MULHS:
3817 case ISD::AND: {
3818 SDValue Op0 = Op.getOperand(0);
3819 SDValue Op1 = Op.getOperand(1);
3820
3821 APInt SrcUndef, SrcZero;
3822 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3823 Depth + 1))
3824 return true;
3825 // FIXME: If we know that a demanded element was zero in Op1 we don't need
3826 // to demand it in Op0 - its guaranteed to be zero. There is however a
3827 // restriction, as we must not make any of the originally demanded elements
3828 // more poisonous. We could reduce amount of elements demanded, but then we
3829 // also need a to inform SimplifyDemandedVectorElts that some elements must
3830 // not be made more poisonous.
3831 if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero,
3832 TLO, Depth + 1))
3833 return true;
3834
3835 KnownUndef &= DemandedElts;
3836 KnownZero &= DemandedElts;
3837
3838 // If every element pair has a zero/undef/poison then just fold to zero.
3839 // fold (and x, undef/poison) -> 0 / (and x, 0) -> 0
3840 // fold (mul x, undef/poison) -> 0 / (mul x, 0) -> 0
3841 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3842 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3843
3844 // If either side has a zero element, then the result element is zero, even
3845 // if the other is an UNDEF.
3846 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3847 // and then handle 'and' nodes with the rest of the binop opcodes.
3848 KnownZero |= SrcZero;
3849 KnownUndef &= SrcUndef;
3850 KnownUndef &= ~KnownZero;
3851
3852 // Attempt to avoid multi-use ops if we don't need anything from them.
3853 if (!DemandedElts.isAllOnes())
3854 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3855 return true;
3856 break;
3857 }
3858 case ISD::TRUNCATE:
3859 case ISD::SIGN_EXTEND:
3860 case ISD::ZERO_EXTEND:
3861 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3862 KnownZero, TLO, Depth + 1))
3863 return true;
3864
3865 if (!DemandedElts.isAllOnes())
3867 Op.getOperand(0), DemandedElts, TLO.DAG, Depth + 1))
3868 return TLO.CombineTo(Op, TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp));
3869
3870 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3871 // zext(undef) upper bits are guaranteed to be zero.
3872 if (DemandedElts.isSubsetOf(KnownUndef))
3873 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3874 KnownUndef.clearAllBits();
3875 }
3876 break;
3877 case ISD::SINT_TO_FP:
3878 case ISD::UINT_TO_FP:
3879 case ISD::FP_TO_SINT:
3880 case ISD::FP_TO_UINT:
3881 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3882 KnownZero, TLO, Depth + 1))
3883 return true;
3884 // Don't fall through to generic undef -> undef handling.
3885 return false;
3886 default: {
3887 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3888 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3889 KnownZero, TLO, Depth))
3890 return true;
3891 } else {
3892 KnownBits Known;
3893 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3894 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3895 TLO, Depth, AssumeSingleUse))
3896 return true;
3897 }
3898 break;
3899 }
3900 }
3901 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3902
3903 // Constant fold all undef cases.
3904 // TODO: Handle zero cases as well.
3905 if (DemandedElts.isSubsetOf(KnownUndef))
3906 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3907
3908 return false;
3909}
3910
3911/// Determine which of the bits specified in Mask are known to be either zero or
3912/// one and return them in the Known.
3914 KnownBits &Known,
3915 const APInt &DemandedElts,
3916 const SelectionDAG &DAG,
3917 unsigned Depth) const {
3918 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3919 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3920 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3921 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3922 "Should use MaskedValueIsZero if you don't know whether Op"
3923 " is a target node!");
3924 Known.resetAll();
3925}
3926
3929 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3930 unsigned Depth) const {
3931 Known.resetAll();
3932}
3933
3936 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3937 unsigned Depth) const {
3938 Known.resetAll();
3939}
3940
3942 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3943 // The low bits are known zero if the pointer is aligned.
3944 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3945}
3946
3952
3953/// This method can be implemented by targets that want to expose additional
3954/// information about sign bits to the DAG Combiner.
3956 const APInt &,
3957 const SelectionDAG &,
3958 unsigned Depth) const {
3959 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3960 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3961 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3962 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3963 "Should use ComputeNumSignBits if you don't know whether Op"
3964 " is a target node!");
3965 return 1;
3966}
3967
3969 GISelValueTracking &Analysis, Register R, const APInt &DemandedElts,
3970 const MachineRegisterInfo &MRI, unsigned Depth) const {
3971 return 1;
3972}
3973
3975 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3976 TargetLoweringOpt &TLO, unsigned Depth) const {
3977 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3978 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3979 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3980 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3981 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3982 " is a target node!");
3983 return false;
3984}
3985
3987 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3988 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3989 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3990 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3991 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3992 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3993 "Should use SimplifyDemandedBits if you don't know whether Op"
3994 " is a target node!");
3995 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3996 return false;
3997}
3998
4000 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
4001 SelectionDAG &DAG, unsigned Depth) const {
4002 assert(
4003 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4004 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4005 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4006 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4007 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
4008 " is a target node!");
4009 return SDValue();
4010}
4011
4012SDValue
4015 SelectionDAG &DAG) const {
4016 bool LegalMask = isShuffleMaskLegal(Mask, VT);
4017 if (!LegalMask) {
4018 std::swap(N0, N1);
4020 LegalMask = isShuffleMaskLegal(Mask, VT);
4021 }
4022
4023 if (!LegalMask)
4024 return SDValue();
4025
4026 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
4027}
4028
4030 return nullptr;
4031}
4032
4034 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4035 UndefPoisonKind Kind, unsigned Depth) const {
4036 assert(
4037 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4038 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4039 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4040 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4041 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
4042 " is a target node!");
4043
4044 // If Op can't create undef/poison and none of its operands are undef/poison
4045 // then Op is never undef/poison.
4046 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, Kind,
4047 /*ConsiderFlags*/ true, Depth) &&
4048 all_of(Op->ops(), [&](SDValue V) {
4049 return DAG.isGuaranteedNotToBeUndefOrPoison(V, Kind, Depth + 1);
4050 });
4051}
4052
4054 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4055 UndefPoisonKind Kind, bool ConsiderFlags, unsigned Depth) const {
4056 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4057 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4058 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4059 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4060 "Should use canCreateUndefOrPoison if you don't know whether Op"
4061 " is a target node!");
4062 // Be conservative and return true.
4063 return true;
4064}
4065
4067 KnownFPClass &Known,
4068 const APInt &DemandedElts,
4069 const SelectionDAG &DAG,
4070 unsigned Depth) const {
4071 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4072 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4073 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4074 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4075 "Should use computeKnownFPClass if you don't know whether Op"
4076 " is a target node!");
4077}
4078
4080 const APInt &DemandedElts,
4081 const SelectionDAG &DAG,
4082 bool SNaN,
4083 unsigned Depth) const {
4084 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4085 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4086 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4087 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4088 "Should use isKnownNeverNaN if you don't know whether Op"
4089 " is a target node!");
4090 return false;
4091}
4092
4094 const APInt &DemandedElts,
4095 APInt &UndefElts,
4096 const SelectionDAG &DAG,
4097 unsigned Depth) const {
4098 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4099 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4100 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4101 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4102 "Should use isSplatValue if you don't know whether Op"
4103 " is a target node!");
4104 return false;
4105}
4106
4107// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
4108// work with truncating build vectors and vectors with elements of less than
4109// 8 bits.
4111 if (!N)
4112 return false;
4113
4114 unsigned EltWidth;
4115 APInt CVal;
4116 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
4117 /*AllowTruncation=*/true)) {
4118 CVal = CN->getAPIntValue();
4119 EltWidth = N.getValueType().getScalarSizeInBits();
4120 } else
4121 return false;
4122
4123 // If this is a truncating splat, truncate the splat value.
4124 // Otherwise, we may fail to match the expected values below.
4125 if (EltWidth < CVal.getBitWidth())
4126 CVal = CVal.trunc(EltWidth);
4127
4128 switch (getBooleanContents(N.getValueType())) {
4130 return CVal[0];
4132 return CVal.isOne();
4134 return CVal.isAllOnes();
4135 }
4136
4137 llvm_unreachable("Invalid boolean contents");
4138}
4139
4141 if (!N)
4142 return false;
4143
4145 if (!CN) {
4147 if (!BV)
4148 return false;
4149
4150 // Only interested in constant splats, we don't care about undef
4151 // elements in identifying boolean constants and getConstantSplatNode
4152 // returns NULL if all ops are undef;
4153 CN = BV->getConstantSplatNode();
4154 if (!CN)
4155 return false;
4156 }
4157
4158 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
4159 return !CN->getAPIntValue()[0];
4160
4161 return CN->isZero();
4162}
4163
4165 bool SExt) const {
4166 if (VT == MVT::i1)
4167 return N->isOne();
4168
4170 switch (Cnt) {
4172 // An extended value of 1 is always true, unless its original type is i1,
4173 // in which case it will be sign extended to -1.
4174 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
4177 return N->isAllOnes() && SExt;
4178 }
4179 llvm_unreachable("Unexpected enumeration.");
4180}
4181
4182/// This helper function of SimplifySetCC tries to optimize the comparison when
4183/// either operand of the SetCC node is a bitwise-and instruction.
4184SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4185 ISD::CondCode Cond, const SDLoc &DL,
4186 DAGCombinerInfo &DCI) const {
4187 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4188 std::swap(N0, N1);
4189
4190 SelectionDAG &DAG = DCI.DAG;
4191 EVT OpVT = N0.getValueType();
4192 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
4193 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4194 return SDValue();
4195
4196 // (X & Y) != 0 --> zextOrTrunc(X & Y)
4197 // iff everything but LSB is known zero:
4198 if (Cond == ISD::SETNE && isNullConstant(N1) &&
4201 unsigned NumEltBits = OpVT.getScalarSizeInBits();
4202 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
4203 if (DAG.MaskedValueIsZero(N0, UpperBits))
4204 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
4205 }
4206
4207 // Try to eliminate a power-of-2 mask constant by converting to a signbit
4208 // test in a narrow type that we can truncate to with no cost. Examples:
4209 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4210 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4211 // TODO: This conservatively checks for type legality on the source and
4212 // destination types. That may inhibit optimizations, but it also
4213 // allows setcc->shift transforms that may be more beneficial.
4214 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4215 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4216 isTypeLegal(OpVT) && N0.hasOneUse()) {
4217 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4218 AndC->getAPIntValue().getActiveBits());
4219 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4220 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4221 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4222 return DAG.getSetCC(DL, VT, Trunc, Zero,
4224 }
4225 }
4226
4227 // Match these patterns in any of their permutations:
4228 // (X & Y) == Y
4229 // (X & Y) != Y
4230 SDValue X, Y;
4231 if (N0.getOperand(0) == N1) {
4232 X = N0.getOperand(1);
4233 Y = N0.getOperand(0);
4234 } else if (N0.getOperand(1) == N1) {
4235 X = N0.getOperand(0);
4236 Y = N0.getOperand(1);
4237 } else {
4238 return SDValue();
4239 }
4240
4241 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4242 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4243 // its liable to create and infinite loop.
4244 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4245 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4247 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4248 // Note that where Y is variable and is known to have at most one bit set
4249 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4250 // equivalent when Y == 0.
4251 assert(OpVT.isInteger());
4253 if (DCI.isBeforeLegalizeOps() ||
4255 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4256 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4257 // If the target supports an 'and-not' or 'and-complement' logic operation,
4258 // try to use that to make a comparison operation more efficient.
4259 // But don't do this transform if the mask is a single bit because there are
4260 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4261 // 'rlwinm' on PPC).
4262
4263 // Bail out if the compare operand that we want to turn into a zero is
4264 // already a zero (otherwise, infinite loop).
4265 if (isNullConstant(Y))
4266 return SDValue();
4267
4268 // Transform this into: ~X & Y == 0.
4269 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4270 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4271 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4272 }
4273
4274 return SDValue();
4275}
4276
4277/// This helper function of SimplifySetCC tries to optimize the comparison when
4278/// either operand of the SetCC node is a bitwise-or instruction.
4279/// For now, this just transforms (X | Y) ==/!= Y into X & ~Y ==/!= 0.
4280SDValue TargetLowering::foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1,
4281 ISD::CondCode Cond, const SDLoc &DL,
4282 DAGCombinerInfo &DCI) const {
4283 if (N1.getOpcode() == ISD::OR && N0.getOpcode() != ISD::OR)
4284 std::swap(N0, N1);
4285
4286 SelectionDAG &DAG = DCI.DAG;
4287 EVT OpVT = N0.getValueType();
4288 if (!N0.hasOneUse() || !OpVT.isInteger() ||
4289 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4290 return SDValue();
4291
4292 // (X | Y) == Y
4293 // (X | Y) != Y
4294 SDValue X;
4295 if (sd_match(N0, m_Or(m_Value(X), m_Specific(N1))) && hasAndNotCompare(X)) {
4296 // If the target supports an 'and-not' or 'and-complement' logic operation,
4297 // try to use that to make a comparison operation more efficient.
4298
4299 // Bail out if the compare operand that we want to turn into a zero is
4300 // already a zero (otherwise, infinite loop).
4301 if (isNullConstant(N1))
4302 return SDValue();
4303
4304 // Transform this into: X & ~Y ==/!= 0.
4305 SDValue NotY = DAG.getNOT(SDLoc(N1), N1, OpVT);
4306 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, X, NotY);
4307 return DAG.getSetCC(DL, VT, NewAnd, DAG.getConstant(0, DL, OpVT), Cond);
4308 }
4309
4310 return SDValue();
4311}
4312
4313/// There are multiple IR patterns that could be checking whether certain
4314/// truncation of a signed number would be lossy or not. The pattern which is
4315/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4316/// We are looking for the following pattern: (KeptBits is a constant)
4317/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4318/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4319/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4320/// We will unfold it into the natural trunc+sext pattern:
4321/// ((%x << C) a>> C) dstcond %x
4322/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4323SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4324 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4325 const SDLoc &DL) const {
4326 // We must be comparing with a constant.
4327 ConstantSDNode *C1;
4328 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4329 return SDValue();
4330
4331 // N0 should be: add %x, (1 << (KeptBits-1))
4332 if (N0->getOpcode() != ISD::ADD)
4333 return SDValue();
4334
4335 // And we must be 'add'ing a constant.
4336 ConstantSDNode *C01;
4337 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4338 return SDValue();
4339
4340 SDValue X = N0->getOperand(0);
4341 EVT XVT = X.getValueType();
4342
4343 // Validate constants ...
4344
4345 APInt I1 = C1->getAPIntValue();
4346
4347 ISD::CondCode NewCond;
4348 if (Cond == ISD::CondCode::SETULT) {
4349 NewCond = ISD::CondCode::SETEQ;
4350 } else if (Cond == ISD::CondCode::SETULE) {
4351 NewCond = ISD::CondCode::SETEQ;
4352 // But need to 'canonicalize' the constant.
4353 I1 += 1;
4354 } else if (Cond == ISD::CondCode::SETUGT) {
4355 NewCond = ISD::CondCode::SETNE;
4356 // But need to 'canonicalize' the constant.
4357 I1 += 1;
4358 } else if (Cond == ISD::CondCode::SETUGE) {
4359 NewCond = ISD::CondCode::SETNE;
4360 } else
4361 return SDValue();
4362
4363 APInt I01 = C01->getAPIntValue();
4364
4365 auto checkConstants = [&I1, &I01]() -> bool {
4366 // Both of them must be power-of-two, and the constant from setcc is bigger.
4367 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4368 };
4369
4370 if (checkConstants()) {
4371 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4372 } else {
4373 // What if we invert constants? (and the target predicate)
4374 I1.negate();
4375 I01.negate();
4376 assert(XVT.isInteger());
4377 NewCond = getSetCCInverse(NewCond, XVT);
4378 if (!checkConstants())
4379 return SDValue();
4380 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4381 }
4382
4383 // They are power-of-two, so which bit is set?
4384 const unsigned KeptBits = I1.logBase2();
4385 const unsigned KeptBitsMinusOne = I01.logBase2();
4386
4387 // Magic!
4388 if (KeptBits != (KeptBitsMinusOne + 1))
4389 return SDValue();
4390 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4391
4392 // We don't want to do this in every single case.
4393 SelectionDAG &DAG = DCI.DAG;
4394 if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4395 return SDValue();
4396
4397 // Unfold into: sext_inreg(%x) cond %x
4398 // Where 'cond' will be either 'eq' or 'ne'.
4399 SDValue SExtInReg = DAG.getNode(
4401 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4402 return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4403}
4404
4405// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4406SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4407 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4408 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4410 "Should be a comparison with 0.");
4411 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4412 "Valid only for [in]equality comparisons.");
4413
4414 unsigned NewShiftOpcode;
4415 SDValue X, C, Y;
4416
4417 SelectionDAG &DAG = DCI.DAG;
4418
4419 // Look for '(C l>>/<< Y)'.
4420 auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4421 // The shift should be one-use.
4422 if (!V.hasOneUse())
4423 return false;
4424 unsigned OldShiftOpcode = V.getOpcode();
4425 switch (OldShiftOpcode) {
4426 case ISD::SHL:
4427 NewShiftOpcode = ISD::SRL;
4428 break;
4429 case ISD::SRL:
4430 NewShiftOpcode = ISD::SHL;
4431 break;
4432 default:
4433 return false; // must be a logical shift.
4434 }
4435 // We should be shifting a constant.
4436 // FIXME: best to use isConstantOrConstantVector().
4437 C = V.getOperand(0);
4438 ConstantSDNode *CC =
4439 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4440 if (!CC)
4441 return false;
4442 Y = V.getOperand(1);
4443
4444 ConstantSDNode *XC =
4445 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4447 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4448 };
4449
4450 // LHS of comparison should be an one-use 'and'.
4451 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4452 return SDValue();
4453
4454 X = N0.getOperand(0);
4455 SDValue Mask = N0.getOperand(1);
4456
4457 // 'and' is commutative!
4458 if (!Match(Mask)) {
4459 std::swap(X, Mask);
4460 if (!Match(Mask))
4461 return SDValue();
4462 }
4463
4464 EVT VT = X.getValueType();
4465
4466 // Produce:
4467 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4468 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4469 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4470 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4471 return T2;
4472}
4473
4474/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4475/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4476/// handle the commuted versions of these patterns.
4477SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4478 ISD::CondCode Cond, const SDLoc &DL,
4479 DAGCombinerInfo &DCI) const {
4480 unsigned BOpcode = N0.getOpcode();
4481 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4482 "Unexpected binop");
4483 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4484
4485 // (X + Y) == X --> Y == 0
4486 // (X - Y) == X --> Y == 0
4487 // (X ^ Y) == X --> Y == 0
4488 SelectionDAG &DAG = DCI.DAG;
4489 EVT OpVT = N0.getValueType();
4490 SDValue X = N0.getOperand(0);
4491 SDValue Y = N0.getOperand(1);
4492 if (X == N1)
4493 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4494
4495 if (Y != N1)
4496 return SDValue();
4497
4498 // (X + Y) == Y --> X == 0
4499 // (X ^ Y) == Y --> X == 0
4500 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4501 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4502
4503 // The shift would not be valid if the operands are boolean (i1).
4504 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4505 return SDValue();
4506
4507 // (X - Y) == Y --> X == Y << 1
4508 SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4509 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4510 if (!DCI.isCalledByLegalizer())
4511 DCI.AddToWorklist(YShl1.getNode());
4512 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4513}
4514
4516 SDValue N0, const APInt &C1,
4517 ISD::CondCode Cond, const SDLoc &dl,
4518 SelectionDAG &DAG) {
4519 // Look through truncs that don't change the value of a ctpop.
4520 // FIXME: Add vector support? Need to be careful with setcc result type below.
4521 SDValue CTPOP = N0;
4522 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4524 CTPOP = N0.getOperand(0);
4525
4526 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4527 return SDValue();
4528
4529 EVT CTVT = CTPOP.getValueType();
4530 SDValue CTOp = CTPOP.getOperand(0);
4531
4532 // Expand a power-of-2-or-zero comparison based on ctpop:
4533 // (ctpop x) u< 2 -> (x & x-1) == 0
4534 // (ctpop x) u> 1 -> (x & x-1) != 0
4535 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4536 // Keep the CTPOP if it is a cheap vector op.
4537 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4538 return SDValue();
4539
4540 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4541 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4542 return SDValue();
4543 if (C1 == 0 && (Cond == ISD::SETULT))
4544 return SDValue(); // This is handled elsewhere.
4545
4546 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4547
4548 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4549 SDValue Result = CTOp;
4550 for (unsigned i = 0; i < Passes; i++) {
4551 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4552 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4553 }
4555 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4556 }
4557
4558 // Expand a power-of-2 comparison based on ctpop
4559 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4560 // Keep the CTPOP if it is cheap.
4561 if (TLI.isCtpopFast(CTVT))
4562 return SDValue();
4563
4564 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4565 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4566 assert(CTVT.isInteger());
4567 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4568
4569 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4570 // check before emitting a potentially unnecessary op.
4571 if (DAG.isKnownNeverZero(CTOp)) {
4572 // (ctpop x) == 1 --> (x & x-1) == 0
4573 // (ctpop x) != 1 --> (x & x-1) != 0
4574 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4575 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4576 return RHS;
4577 }
4578
4579 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4580 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4581 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4583 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4584 }
4585
4586 return SDValue();
4587}
4588
4590 ISD::CondCode Cond, const SDLoc &dl,
4591 SelectionDAG &DAG) {
4592 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4593 return SDValue();
4594
4595 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4596 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4597 return SDValue();
4598
4599 auto getRotateSource = [](SDValue X) {
4600 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4601 return X.getOperand(0);
4602 return SDValue();
4603 };
4604
4605 // Peek through a rotated value compared against 0 or -1:
4606 // (rot X, Y) == 0/-1 --> X == 0/-1
4607 // (rot X, Y) != 0/-1 --> X != 0/-1
4608 if (SDValue R = getRotateSource(N0))
4609 return DAG.getSetCC(dl, VT, R, N1, Cond);
4610
4611 // Peek through an 'or' of a rotated value compared against 0:
4612 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4613 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4614 //
4615 // TODO: Add the 'and' with -1 sibling.
4616 // TODO: Recurse through a series of 'or' ops to find the rotate.
4617 EVT OpVT = N0.getValueType();
4618 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4619 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4620 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4621 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4622 }
4623 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4624 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4625 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4626 }
4627 }
4628
4629 return SDValue();
4630}
4631
4633 ISD::CondCode Cond, const SDLoc &dl,
4634 SelectionDAG &DAG) {
4635 // If we are testing for all-bits-clear, we might be able to do that with
4636 // less shifting since bit-order does not matter.
4637 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4638 return SDValue();
4639
4640 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4641 if (!C1 || !C1->isZero())
4642 return SDValue();
4643
4644 if (!N0.hasOneUse() ||
4645 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4646 return SDValue();
4647
4648 unsigned BitWidth = N0.getScalarValueSizeInBits();
4649 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4650 if (!ShAmtC)
4651 return SDValue();
4652
4653 uint64_t ShAmt = ShAmtC->getAPIntValue().urem(BitWidth);
4654 if (ShAmt == 0)
4655 return SDValue();
4656
4657 // Canonicalize fshr as fshl to reduce pattern-matching.
4658 if (N0.getOpcode() == ISD::FSHR)
4659 ShAmt = BitWidth - ShAmt;
4660
4661 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4662 SDValue X, Y;
4663 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4664 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4665 return false;
4666 if (Or.getOperand(0) == Other) {
4667 X = Or.getOperand(0);
4668 Y = Or.getOperand(1);
4669 return true;
4670 }
4671 if (Or.getOperand(1) == Other) {
4672 X = Or.getOperand(1);
4673 Y = Or.getOperand(0);
4674 return true;
4675 }
4676 return false;
4677 };
4678
4679 EVT OpVT = N0.getValueType();
4680 EVT ShAmtVT = N0.getOperand(2).getValueType();
4681 SDValue F0 = N0.getOperand(0);
4682 SDValue F1 = N0.getOperand(1);
4683 if (matchOr(F0, F1)) {
4684 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4685 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4686 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4687 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4688 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4689 }
4690 if (matchOr(F1, F0)) {
4691 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4692 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4693 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4694 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4695 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4696 }
4697
4698 return SDValue();
4699}
4700
4701/// Try to simplify a setcc built with the specified operands and cc. If it is
4702/// unable to simplify it, return a null SDValue.
4704 ISD::CondCode Cond, bool foldBooleans,
4705 DAGCombinerInfo &DCI,
4706 const SDLoc &dl) const {
4707 SelectionDAG &DAG = DCI.DAG;
4708 const DataLayout &Layout = DAG.getDataLayout();
4709 EVT OpVT = N0.getValueType();
4710 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4711
4712 // Constant fold or commute setcc.
4713 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4714 return Fold;
4715
4716 bool N0ConstOrSplat =
4717 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4718 bool N1ConstOrSplat =
4719 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4720
4721 // Canonicalize toward having the constant on the RHS.
4722 // TODO: Handle non-splat vector constants. All undef causes trouble.
4723 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4724 // infinite loop here when we encounter one.
4726 if (N0ConstOrSplat && !N1ConstOrSplat &&
4727 (DCI.isBeforeLegalizeOps() ||
4728 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4729 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4730
4731 // If we have a subtract with the same 2 non-constant operands as this setcc
4732 // -- but in reverse order -- then try to commute the operands of this setcc
4733 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4734 // instruction on some targets.
4735 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4736 (DCI.isBeforeLegalizeOps() ||
4737 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4738 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4739 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4740 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4741
4742 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4743 return V;
4744
4745 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4746 return V;
4747
4748 if (auto *N1C = isConstOrConstSplat(N1)) {
4749 const APInt &C1 = N1C->getAPIntValue();
4750
4751 // Optimize some CTPOP cases.
4752 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4753 return V;
4754
4755 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4756 // X * Y == 0 --> (X == 0) || (Y == 0)
4757 // X * Y != 0 --> (X != 0) && (Y != 0)
4758 // TODO: This bails out if minsize is set, but if the target doesn't have a
4759 // single instruction multiply for this type, it would likely be
4760 // smaller to decompose.
4761 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4762 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4763 (N0->getFlags().hasNoUnsignedWrap() ||
4764 N0->getFlags().hasNoSignedWrap()) &&
4765 !Attr.hasFnAttr(Attribute::MinSize)) {
4766 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4767 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4768 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4769 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4770 }
4771
4772 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4773 // equality comparison, then we're just comparing whether X itself is
4774 // zero.
4775 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4776 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4778 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4779 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4780 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4781 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4782 // (srl (ctlz x), 5) == 0 -> X != 0
4783 // (srl (ctlz x), 5) != 1 -> X != 0
4784 Cond = ISD::SETNE;
4785 } else {
4786 // (srl (ctlz x), 5) != 0 -> X == 0
4787 // (srl (ctlz x), 5) == 1 -> X == 0
4788 Cond = ISD::SETEQ;
4789 }
4790 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4791 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4792 Cond);
4793 }
4794 }
4795 }
4796 }
4797
4798 // setcc X, 0, setlt --> X (when X is all sign bits)
4799 // setcc X, 0, setne --> X (when X is all sign bits)
4800 //
4801 // When we know that X has 0 or -1 in each element (or scalar), this
4802 // comparison will produce X. This is only true when boolean contents are
4803 // represented via 0s and -1s.
4804 if (VT == OpVT &&
4805 // Check that the result of setcc is 0 and -1.
4807 // Match only for checks X < 0 and X != 0
4808 (Cond == ISD::SETLT || Cond == ISD::SETNE) && isNullOrNullSplat(N1) &&
4809 // The identity holds iff we know all sign bits for all lanes.
4811 return N0;
4812
4813 // FIXME: Support vectors.
4814 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4815 const APInt &C1 = N1C->getAPIntValue();
4816
4817 // (zext x) == C --> x == (trunc C)
4818 // (sext x) == C --> x == (trunc C)
4819 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4820 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4821 unsigned MinBits = N0.getValueSizeInBits();
4822 SDValue PreExt;
4823 bool Signed = false;
4824 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4825 // ZExt
4826 MinBits = N0->getOperand(0).getValueSizeInBits();
4827 PreExt = N0->getOperand(0);
4828 } else if (N0->getOpcode() == ISD::AND) {
4829 // DAGCombine turns costly ZExts into ANDs
4830 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4831 if ((C->getAPIntValue()+1).isPowerOf2()) {
4832 MinBits = C->getAPIntValue().countr_one();
4833 PreExt = N0->getOperand(0);
4834 }
4835 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4836 // SExt
4837 MinBits = N0->getOperand(0).getValueSizeInBits();
4838 PreExt = N0->getOperand(0);
4839 Signed = true;
4840 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4841 // ZEXTLOAD / SEXTLOAD
4842 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4843 MinBits = LN0->getMemoryVT().getSizeInBits();
4844 PreExt = N0;
4845 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4846 Signed = true;
4847 MinBits = LN0->getMemoryVT().getSizeInBits();
4848 PreExt = N0;
4849 }
4850 }
4851
4852 // Figure out how many bits we need to preserve this constant.
4853 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4854
4855 // Make sure we're not losing bits from the constant.
4856 if (MinBits > 0 &&
4857 MinBits < C1.getBitWidth() &&
4858 MinBits >= ReqdBits) {
4859 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4860 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4861 // Will get folded away.
4862 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4863 if (MinBits == 1 && C1 == 1)
4864 // Invert the condition.
4865 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4867 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4868 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4869 }
4870
4871 // If truncating the setcc operands is not desirable, we can still
4872 // simplify the expression in some cases:
4873 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4874 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4875 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4876 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4877 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4878 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4879 SDValue TopSetCC = N0->getOperand(0);
4880 unsigned N0Opc = N0->getOpcode();
4881 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4882 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4883 TopSetCC.getOpcode() == ISD::SETCC &&
4884 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4885 (isConstFalseVal(N1) ||
4886 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4887
4888 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4889 (!N1C->isZero() && Cond == ISD::SETNE);
4890
4891 if (!Inverse)
4892 return TopSetCC;
4893
4895 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4896 TopSetCC.getOperand(0).getValueType());
4897 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4898 TopSetCC.getOperand(1),
4899 InvCond);
4900 }
4901 }
4902 }
4903
4904 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4905 // equality or unsigned, and all 1 bits of the const are in the same
4906 // partial word, see if we can shorten the load.
4907 if (DCI.isBeforeLegalize() &&
4909 N0.getOpcode() == ISD::AND && C1 == 0 &&
4910 N0.getNode()->hasOneUse() &&
4911 isa<LoadSDNode>(N0.getOperand(0)) &&
4912 N0.getOperand(0).getNode()->hasOneUse() &&
4914 auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4915 APInt bestMask;
4916 unsigned bestWidth = 0, bestOffset = 0;
4917 if (Lod->isSimple() && Lod->isUnindexed() &&
4918 (Lod->getMemoryVT().isByteSized() ||
4919 isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4920 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4921 unsigned origWidth = N0.getValueSizeInBits();
4922 unsigned maskWidth = origWidth;
4923 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4924 // 8 bits, but have to be careful...
4925 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4926 origWidth = Lod->getMemoryVT().getSizeInBits();
4927 const APInt &Mask = N0.getConstantOperandAPInt(1);
4928 // Only consider power-of-2 widths (and at least one byte) as candiates
4929 // for the narrowed load.
4930 for (unsigned width = 8; width < origWidth; width *= 2) {
4931 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4932 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4933 // Avoid accessing any padding here for now (we could use memWidth
4934 // instead of origWidth here otherwise).
4935 unsigned maxOffset = origWidth - width;
4936 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4937 if (Mask.isSubsetOf(newMask)) {
4938 unsigned ptrOffset =
4939 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4940 unsigned IsFast = 0;
4941 assert((ptrOffset % 8) == 0 && "Non-Bytealigned pointer offset");
4942 Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4944 ptrOffset / 8) &&
4946 *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4947 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4948 IsFast) {
4949 bestOffset = ptrOffset / 8;
4950 bestMask = Mask.lshr(offset);
4951 bestWidth = width;
4952 break;
4953 }
4954 }
4955 newMask <<= 8;
4956 }
4957 if (bestWidth)
4958 break;
4959 }
4960 }
4961 if (bestWidth) {
4962 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4963 SDValue Ptr = Lod->getBasePtr();
4964 if (bestOffset != 0)
4965 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4966 SDValue NewLoad =
4967 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4968 Lod->getPointerInfo().getWithOffset(bestOffset),
4969 Lod->getBaseAlign());
4970 SDValue And =
4971 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4972 DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4973 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4974 }
4975 }
4976
4977 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4978 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4979 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4980
4981 // If the comparison constant has bits in the upper part, the
4982 // zero-extended value could never match.
4984 C1.getBitWidth() - InSize))) {
4985 switch (Cond) {
4986 case ISD::SETUGT:
4987 case ISD::SETUGE:
4988 case ISD::SETEQ:
4989 return DAG.getConstant(0, dl, VT);
4990 case ISD::SETULT:
4991 case ISD::SETULE:
4992 case ISD::SETNE:
4993 return DAG.getConstant(1, dl, VT);
4994 case ISD::SETGT:
4995 case ISD::SETGE:
4996 // True if the sign bit of C1 is set.
4997 return DAG.getConstant(C1.isNegative(), dl, VT);
4998 case ISD::SETLT:
4999 case ISD::SETLE:
5000 // True if the sign bit of C1 isn't set.
5001 return DAG.getConstant(C1.isNonNegative(), dl, VT);
5002 default:
5003 break;
5004 }
5005 }
5006
5007 // Otherwise, we can perform the comparison with the low bits.
5008 switch (Cond) {
5009 case ISD::SETEQ:
5010 case ISD::SETNE:
5011 case ISD::SETUGT:
5012 case ISD::SETUGE:
5013 case ISD::SETULT:
5014 case ISD::SETULE: {
5015 EVT newVT = N0.getOperand(0).getValueType();
5016 // FIXME: Should use isNarrowingProfitable.
5017 if (DCI.isBeforeLegalizeOps() ||
5018 (isOperationLegal(ISD::SETCC, newVT) &&
5019 isCondCodeLegal(Cond, newVT.getSimpleVT()) &&
5021 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
5022 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
5023
5024 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
5025 NewConst, Cond);
5026 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
5027 }
5028 break;
5029 }
5030 default:
5031 break; // todo, be more careful with signed comparisons
5032 }
5033 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
5034 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5036 OpVT)) {
5037 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
5038 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
5039 EVT ExtDstTy = N0.getValueType();
5040 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
5041
5042 // If the constant doesn't fit into the number of bits for the source of
5043 // the sign extension, it is impossible for both sides to be equal.
5044 if (C1.getSignificantBits() > ExtSrcTyBits)
5045 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
5046
5047 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
5048 ExtDstTy != ExtSrcTy && "Unexpected types!");
5049 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
5050 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
5051 DAG.getConstant(Imm, dl, ExtDstTy));
5052 if (!DCI.isCalledByLegalizer())
5053 DCI.AddToWorklist(ZextOp.getNode());
5054 // Otherwise, make this a use of a zext.
5055 return DAG.getSetCC(dl, VT, ZextOp,
5056 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
5057 } else if ((N1C->isZero() || N1C->isOne()) &&
5058 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5059 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
5060 // excluded as they are handled below whilst checking for foldBooleans.
5061 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
5062 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
5063 (N0.getValueType() == MVT::i1 ||
5067 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
5068 if (TrueWhenTrue)
5069 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
5070 // Invert the condition.
5071 if (N0.getOpcode() == ISD::SETCC) {
5074 if (DCI.isBeforeLegalizeOps() ||
5076 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
5077 }
5078 }
5079
5080 if ((N0.getOpcode() == ISD::XOR ||
5081 (N0.getOpcode() == ISD::AND &&
5082 N0.getOperand(0).getOpcode() == ISD::XOR &&
5083 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
5084 isOneConstant(N0.getOperand(1))) {
5085 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
5086 // can only do this if the top bits are known zero.
5087 unsigned BitWidth = N0.getValueSizeInBits();
5088 if (DAG.MaskedValueIsZero(N0,
5090 BitWidth-1))) {
5091 // Okay, get the un-inverted input value.
5092 SDValue Val;
5093 if (N0.getOpcode() == ISD::XOR) {
5094 Val = N0.getOperand(0);
5095 } else {
5096 assert(N0.getOpcode() == ISD::AND &&
5097 N0.getOperand(0).getOpcode() == ISD::XOR);
5098 // ((X^1)&1)^1 -> X & 1
5099 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
5100 N0.getOperand(0).getOperand(0),
5101 N0.getOperand(1));
5102 }
5103
5104 return DAG.getSetCC(dl, VT, Val, N1,
5106 }
5107 } else if (N1C->isOne()) {
5108 SDValue Op0 = N0;
5109 if (Op0.getOpcode() == ISD::TRUNCATE)
5110 Op0 = Op0.getOperand(0);
5111
5112 if ((Op0.getOpcode() == ISD::XOR) &&
5113 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
5114 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
5115 SDValue XorLHS = Op0.getOperand(0);
5116 SDValue XorRHS = Op0.getOperand(1);
5117 // Ensure that the input setccs return an i1 type or 0/1 value.
5118 if (Op0.getValueType() == MVT::i1 ||
5123 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
5125 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
5126 }
5127 }
5128 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
5129 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
5130 if (Op0.getValueType().bitsGT(VT))
5131 Op0 = DAG.getNode(ISD::AND, dl, VT,
5132 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
5133 DAG.getConstant(1, dl, VT));
5134 else if (Op0.getValueType().bitsLT(VT))
5135 Op0 = DAG.getNode(ISD::AND, dl, VT,
5136 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
5137 DAG.getConstant(1, dl, VT));
5138
5139 return DAG.getSetCC(dl, VT, Op0,
5140 DAG.getConstant(0, dl, Op0.getValueType()),
5142 }
5143 if (Op0.getOpcode() == ISD::AssertZext &&
5144 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
5145 return DAG.getSetCC(dl, VT, Op0,
5146 DAG.getConstant(0, dl, Op0.getValueType()),
5148 }
5149 }
5150
5151 // Given:
5152 // icmp eq/ne (urem %x, %y), 0
5153 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
5154 // icmp eq/ne %x, 0
5155 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
5156 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5157 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
5158 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
5159 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
5160 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
5161 }
5162
5163 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
5164 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
5165 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5167 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
5168 N1C->isAllOnes()) {
5169 return DAG.getSetCC(dl, VT, N0.getOperand(0),
5170 DAG.getConstant(0, dl, OpVT),
5172 }
5173
5174 // fold (setcc (trunc x) c) -> (setcc x c)
5175 if (N0.getOpcode() == ISD::TRUNCATE &&
5177 (N0->getFlags().hasNoSignedWrap() &&
5180 EVT NewVT = N0.getOperand(0).getValueType();
5181 SDValue NewConst = DAG.getConstant(
5183 ? C1.sext(NewVT.getSizeInBits())
5184 : C1.zext(NewVT.getSizeInBits()),
5185 dl, NewVT);
5186 return DAG.getSetCC(dl, VT, N0.getOperand(0), NewConst, Cond);
5187 }
5188
5189 if (SDValue V =
5190 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
5191 return V;
5192 }
5193
5194 // These simplifications apply to splat vectors as well.
5195 // TODO: Handle more splat vector cases.
5196 if (auto *N1C = isConstOrConstSplat(N1)) {
5197 const APInt &C1 = N1C->getAPIntValue();
5198
5199 APInt MinVal, MaxVal;
5200 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
5202 MinVal = APInt::getSignedMinValue(OperandBitSize);
5203 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
5204 } else {
5205 MinVal = APInt::getMinValue(OperandBitSize);
5206 MaxVal = APInt::getMaxValue(OperandBitSize);
5207 }
5208
5209 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
5210 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
5211 // X >= MIN --> true
5212 if (C1 == MinVal)
5213 return DAG.getBoolConstant(true, dl, VT, OpVT);
5214
5215 if (!VT.isVector()) { // TODO: Support this for vectors.
5216 // X >= C0 --> X > (C0 - 1)
5217 APInt C = C1 - 1;
5219 if ((DCI.isBeforeLegalizeOps() ||
5220 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5221 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5222 isLegalICmpImmediate(C.getSExtValue())))) {
5223 return DAG.getSetCC(dl, VT, N0,
5224 DAG.getConstant(C, dl, N1.getValueType()),
5225 NewCC);
5226 }
5227 }
5228 }
5229
5230 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
5231 // X <= MAX --> true
5232 if (C1 == MaxVal)
5233 return DAG.getBoolConstant(true, dl, VT, OpVT);
5234
5235 // X <= C0 --> X < (C0 + 1)
5236 if (!VT.isVector()) { // TODO: Support this for vectors.
5237 APInt C = C1 + 1;
5239 if ((DCI.isBeforeLegalizeOps() ||
5240 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5241 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5242 isLegalICmpImmediate(C.getSExtValue())))) {
5243 return DAG.getSetCC(dl, VT, N0,
5244 DAG.getConstant(C, dl, N1.getValueType()),
5245 NewCC);
5246 }
5247 }
5248 }
5249
5250 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
5251 if (C1 == MinVal)
5252 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
5253
5254 // TODO: Support this for vectors after legalize ops.
5255 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5256 // Canonicalize setlt X, Max --> setne X, Max
5257 if (C1 == MaxVal)
5258 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5259
5260 // If we have setult X, 1, turn it into seteq X, 0
5261 if (C1 == MinVal+1)
5262 return DAG.getSetCC(dl, VT, N0,
5263 DAG.getConstant(MinVal, dl, N0.getValueType()),
5264 ISD::SETEQ);
5265 }
5266 }
5267
5268 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5269 if (C1 == MaxVal)
5270 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
5271
5272 // TODO: Support this for vectors after legalize ops.
5273 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5274 // Canonicalize setgt X, Min --> setne X, Min
5275 if (C1 == MinVal)
5276 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5277
5278 // If we have setugt X, Max-1, turn it into seteq X, Max
5279 if (C1 == MaxVal-1)
5280 return DAG.getSetCC(dl, VT, N0,
5281 DAG.getConstant(MaxVal, dl, N0.getValueType()),
5282 ISD::SETEQ);
5283 }
5284 }
5285
5286 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5287 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5288 if (C1.isZero())
5289 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5290 VT, N0, N1, Cond, DCI, dl))
5291 return CC;
5292
5293 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5294 // For example, when high 32-bits of i64 X are known clear:
5295 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5296 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5297 bool CmpZero = N1C->isZero();
5298 bool CmpNegOne = N1C->isAllOnes();
5299 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5300 // Match or(lo,shl(hi,bw/2)) pattern.
5301 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5302 unsigned EltBits = V.getScalarValueSizeInBits();
5303 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5304 return false;
5305 SDValue LHS = V.getOperand(0);
5306 SDValue RHS = V.getOperand(1);
5307 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5308 // Unshifted element must have zero upperbits.
5309 if (RHS.getOpcode() == ISD::SHL &&
5310 isa<ConstantSDNode>(RHS.getOperand(1)) &&
5311 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5312 DAG.MaskedValueIsZero(LHS, HiBits)) {
5313 Lo = LHS;
5314 Hi = RHS.getOperand(0);
5315 return true;
5316 }
5317 if (LHS.getOpcode() == ISD::SHL &&
5318 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5319 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5320 DAG.MaskedValueIsZero(RHS, HiBits)) {
5321 Lo = RHS;
5322 Hi = LHS.getOperand(0);
5323 return true;
5324 }
5325 return false;
5326 };
5327
5328 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5329 unsigned EltBits = N0.getScalarValueSizeInBits();
5330 unsigned HalfBits = EltBits / 2;
5331 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5332 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5333 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5334 SDValue NewN0 =
5335 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5336 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5337 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5338 };
5339
5340 SDValue Lo, Hi;
5341 if (IsConcat(N0, Lo, Hi))
5342 return MergeConcat(Lo, Hi);
5343
5344 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5345 SDValue Lo0, Lo1, Hi0, Hi1;
5346 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5347 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5348 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5349 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5350 }
5351 }
5352 }
5353 }
5354
5355 // If we have "setcc X, C0", check to see if we can shrink the immediate
5356 // by changing cc.
5357 // TODO: Support this for vectors after legalize ops.
5358 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5359 // SETUGT X, SINTMAX -> SETLT X, 0
5360 // SETUGE X, SINTMIN -> SETLT X, 0
5361 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5362 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5363 return DAG.getSetCC(dl, VT, N0,
5364 DAG.getConstant(0, dl, N1.getValueType()),
5365 ISD::SETLT);
5366
5367 // SETULT X, SINTMIN -> SETGT X, -1
5368 // SETULE X, SINTMAX -> SETGT X, -1
5369 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5370 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5371 return DAG.getSetCC(dl, VT, N0,
5372 DAG.getAllOnesConstant(dl, N1.getValueType()),
5373 ISD::SETGT);
5374 }
5375 }
5376
5377 // Back to non-vector simplifications.
5378 // TODO: Can we do these for vector splats?
5379 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5380 const APInt &C1 = N1C->getAPIntValue();
5381 EVT ShValTy = N0.getValueType();
5382
5383 // Fold bit comparisons when we can. This will result in an
5384 // incorrect value when boolean false is negative one, unless
5385 // the bitsize is 1 in which case the false value is the same
5386 // in practice regardless of the representation.
5387 if ((VT.getSizeInBits() == 1 ||
5389 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5390 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5391 N0.getOpcode() == ISD::AND) {
5392 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5393 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5394 // Perform the xform if the AND RHS is a single bit.
5395 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5396 if (AndRHS->getAPIntValue().isPowerOf2() &&
5397 !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5398 return DAG.getNode(
5399 ISD::TRUNCATE, dl, VT,
5400 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5401 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5402 }
5403 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5404 // (X & 8) == 8 --> (X & 8) >> 3
5405 // Perform the xform if C1 is a single bit.
5406 unsigned ShCt = C1.logBase2();
5407 if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5408 return DAG.getNode(
5409 ISD::TRUNCATE, dl, VT,
5410 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5411 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5412 }
5413 }
5414 }
5415 }
5416
5417 if (C1.getSignificantBits() <= 64 &&
5419 // (X & -256) == 256 -> (X >> 8) == 1
5420 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5421 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5422 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5423 const APInt &AndRHSC = AndRHS->getAPIntValue();
5424 if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5425 unsigned ShiftBits = AndRHSC.countr_zero();
5426 if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5427 // If using an unsigned shift doesn't yield a legal compare
5428 // immediate, try using sra instead.
5429 APInt NewC = C1.lshr(ShiftBits);
5430 if (NewC.getSignificantBits() <= 64 &&
5432 APInt SignedC = C1.ashr(ShiftBits);
5433 if (SignedC.getSignificantBits() <= 64 &&
5435 SDValue Shift = DAG.getNode(
5436 ISD::SRA, dl, ShValTy, N0.getOperand(0),
5437 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5438 SDValue CmpRHS = DAG.getConstant(SignedC, dl, ShValTy);
5439 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5440 }
5441 }
5442 SDValue Shift = DAG.getNode(
5443 ISD::SRL, dl, ShValTy, N0.getOperand(0),
5444 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5445 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5446 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5447 }
5448 }
5449 }
5450 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5451 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5452 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5453 // X < 0x100000000 -> (X >> 32) < 1
5454 // X >= 0x100000000 -> (X >> 32) >= 1
5455 // X <= 0x0ffffffff -> (X >> 32) < 1
5456 // X > 0x0ffffffff -> (X >> 32) >= 1
5457 unsigned ShiftBits;
5458 APInt NewC = C1;
5459 ISD::CondCode NewCond = Cond;
5460 if (AdjOne) {
5461 ShiftBits = C1.countr_one();
5462 NewC = NewC + 1;
5463 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5464 } else {
5465 ShiftBits = C1.countr_zero();
5466 }
5467 NewC.lshrInPlace(ShiftBits);
5468 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5470 !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5471 SDValue Shift =
5472 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5473 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5474 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5475 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5476 }
5477 }
5478 }
5479 }
5480
5482 auto *CFP = cast<ConstantFPSDNode>(N1);
5483 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5484
5485 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5486 // constant if knowing that the operand is non-nan is enough. We prefer to
5487 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5488 // materialize 0.0.
5489 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5490 return DAG.getSetCC(dl, VT, N0, N0, Cond);
5491
5492 // setcc (fneg x), C -> setcc swap(pred) x, -C
5493 if (N0.getOpcode() == ISD::FNEG) {
5495 if (DCI.isBeforeLegalizeOps() ||
5496 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5497 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5498 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5499 }
5500 }
5501
5502 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5504 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5505 bool IsFabs = N0.getOpcode() == ISD::FABS;
5506 SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5507 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5508 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5509 : (IsFabs ? fcInf : fcPosInf);
5510 if (Cond == ISD::SETUEQ)
5511 Flag |= fcNan;
5512 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5513 DAG.getTargetConstant(Flag, dl, MVT::i32));
5514 }
5515 }
5516
5517 // If the condition is not legal, see if we can find an equivalent one
5518 // which is legal.
5520 // If the comparison was an awkward floating-point == or != and one of
5521 // the comparison operands is infinity or negative infinity, convert the
5522 // condition to a less-awkward <= or >=.
5523 if (CFP->getValueAPF().isInfinity()) {
5524 bool IsNegInf = CFP->getValueAPF().isNegative();
5526 switch (Cond) {
5527 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5528 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5529 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5530 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5531 default: break;
5532 }
5533 if (NewCond != ISD::SETCC_INVALID &&
5534 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5535 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5536 }
5537 }
5538 }
5539
5540 if (N0 == N1) {
5541 // The sext(setcc()) => setcc() optimization relies on the appropriate
5542 // constant being emitted.
5543 assert(!N0.getValueType().isInteger() &&
5544 "Integer types should be handled by FoldSetCC");
5545
5546 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5547 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5548 if (UOF == 2) // FP operators that are undefined on NaNs.
5549 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5550 if (UOF == unsigned(EqTrue))
5551 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5552 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5553 // if it is not already.
5554 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5555 if (NewCond != Cond &&
5556 (DCI.isBeforeLegalizeOps() ||
5557 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5558 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5559 }
5560
5561 // ~X > ~Y --> Y > X
5562 // ~X < ~Y --> Y < X
5563 // ~X < C --> X > ~C
5564 // ~X > C --> X < ~C
5565 if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5566 N0.getValueType().isInteger()) {
5567 if (isBitwiseNot(N0)) {
5568 if (isBitwiseNot(N1))
5569 return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5570
5573 SDValue Not = DAG.getNOT(dl, N1, OpVT);
5574 return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5575 }
5576 }
5577 }
5578
5579 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5580 N0.getValueType().isInteger()) {
5581 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5582 N0.getOpcode() == ISD::XOR) {
5583 // Simplify (X+Y) == (X+Z) --> Y == Z
5584 if (N0.getOpcode() == N1.getOpcode()) {
5585 if (N0.getOperand(0) == N1.getOperand(0))
5586 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5587 if (N0.getOperand(1) == N1.getOperand(1))
5588 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5589 if (isCommutativeBinOp(N0.getOpcode())) {
5590 // If X op Y == Y op X, try other combinations.
5591 if (N0.getOperand(0) == N1.getOperand(1))
5592 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5593 Cond);
5594 if (N0.getOperand(1) == N1.getOperand(0))
5595 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5596 Cond);
5597 }
5598 }
5599
5600 // If RHS is a legal immediate value for a compare instruction, we need
5601 // to be careful about increasing register pressure needlessly.
5602 bool LegalRHSImm = false;
5603
5604 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5605 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5606 // Turn (X+C1) == C2 --> X == C2-C1
5607 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5608 return DAG.getSetCC(
5609 dl, VT, N0.getOperand(0),
5610 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5611 dl, N0.getValueType()),
5612 Cond);
5613
5614 // Turn (X^C1) == C2 --> X == C1^C2
5615 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5616 return DAG.getSetCC(
5617 dl, VT, N0.getOperand(0),
5618 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5619 dl, N0.getValueType()),
5620 Cond);
5621 }
5622
5623 // Turn (C1-X) == C2 --> X == C1-C2
5624 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5625 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5626 return DAG.getSetCC(
5627 dl, VT, N0.getOperand(1),
5628 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5629 dl, N0.getValueType()),
5630 Cond);
5631
5632 // Could RHSC fold directly into a compare?
5633 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5634 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5635 }
5636
5637 // (X+Y) == X --> Y == 0 and similar folds.
5638 // Don't do this if X is an immediate that can fold into a cmp
5639 // instruction and X+Y has other uses. It could be an induction variable
5640 // chain, and the transform would increase register pressure.
5641 if (!LegalRHSImm || N0.hasOneUse())
5642 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5643 return V;
5644 }
5645
5646 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5647 N1.getOpcode() == ISD::XOR)
5648 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5649 return V;
5650
5651 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5652 return V;
5653
5654 if (SDValue V = foldSetCCWithOr(VT, N0, N1, Cond, dl, DCI))
5655 return V;
5656 }
5657
5658 // Fold remainder of division by a constant.
5659 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5660 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5661 // When division is cheap or optimizing for minimum size,
5662 // fall through to DIVREM creation by skipping this fold.
5663 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5664 if (N0.getOpcode() == ISD::UREM) {
5665 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5666 return Folded;
5667 } else if (N0.getOpcode() == ISD::SREM) {
5668 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5669 return Folded;
5670 }
5671 }
5672 }
5673
5674 // Fold away ALL boolean setcc's.
5675 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5676 SDValue Temp;
5677 switch (Cond) {
5678 default: llvm_unreachable("Unknown integer setcc!");
5679 case ISD::SETEQ: // X == Y -> ~(X^Y)
5680 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5681 N0 = DAG.getNOT(dl, Temp, OpVT);
5682 if (!DCI.isCalledByLegalizer())
5683 DCI.AddToWorklist(Temp.getNode());
5684 break;
5685 case ISD::SETNE: // X != Y --> (X^Y)
5686 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5687 break;
5688 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5689 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5690 Temp = DAG.getNOT(dl, N0, OpVT);
5691 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5692 if (!DCI.isCalledByLegalizer())
5693 DCI.AddToWorklist(Temp.getNode());
5694 break;
5695 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5696 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5697 Temp = DAG.getNOT(dl, N1, OpVT);
5698 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5699 if (!DCI.isCalledByLegalizer())
5700 DCI.AddToWorklist(Temp.getNode());
5701 break;
5702 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5703 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5704 Temp = DAG.getNOT(dl, N0, OpVT);
5705 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5706 if (!DCI.isCalledByLegalizer())
5707 DCI.AddToWorklist(Temp.getNode());
5708 break;
5709 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5710 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5711 Temp = DAG.getNOT(dl, N1, OpVT);
5712 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5713 break;
5714 }
5715 if (VT.getScalarType() != MVT::i1) {
5716 if (!DCI.isCalledByLegalizer())
5717 DCI.AddToWorklist(N0.getNode());
5718 // FIXME: If running after legalize, we probably can't do this.
5720 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5721 }
5722 return N0;
5723 }
5724
5725 // Fold (setcc (trunc x) (trunc y)) -> (setcc x y)
5726 if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE &&
5727 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
5729 N1->getFlags().hasNoUnsignedWrap()) ||
5731 N1->getFlags().hasNoSignedWrap())) &&
5733 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5734 }
5735
5736 // Fold (setcc (sub nsw a, b), zero, s??) -> (setcc a, b, s??)
5737 // TODO: Remove that .isVector() check
5738 if (VT.isVector() && isZeroOrZeroSplat(N1) && N0.getOpcode() == ISD::SUB &&
5740 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), Cond);
5741 }
5742
5743 // Could not fold it.
5744 return SDValue();
5745}
5746
5747/// Returns true (and the GlobalValue and the offset) if the node is a
5748/// GlobalAddress + offset.
5750 int64_t &Offset) const {
5751
5752 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5753
5754 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5755 GA = GASD->getGlobal();
5756 Offset += GASD->getOffset();
5757 return true;
5758 }
5759
5760 if (N->isAnyAdd()) {
5761 SDValue N1 = N->getOperand(0);
5762 SDValue N2 = N->getOperand(1);
5763 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5764 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5765 Offset += V->getSExtValue();
5766 return true;
5767 }
5768 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5769 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5770 Offset += V->getSExtValue();
5771 return true;
5772 }
5773 }
5774 }
5775
5776 return false;
5777}
5778
5780 DAGCombinerInfo &DCI) const {
5781 // Default implementation: no optimization.
5782 return SDValue();
5783}
5784
5785//===----------------------------------------------------------------------===//
5786// Inline Assembler Implementation Methods
5787//===----------------------------------------------------------------------===//
5788
5791 unsigned S = Constraint.size();
5792
5793 if (S == 1) {
5794 switch (Constraint[0]) {
5795 default: break;
5796 case 'r':
5797 return C_RegisterClass;
5798 case 'm': // memory
5799 case 'o': // offsetable
5800 case 'V': // not offsetable
5801 return C_Memory;
5802 case 'p': // Address.
5803 return C_Address;
5804 case 'n': // Simple Integer
5805 case 'E': // Floating Point Constant
5806 case 'F': // Floating Point Constant
5807 return C_Immediate;
5808 case 'i': // Simple Integer or Relocatable Constant
5809 case 's': // Relocatable Constant
5810 case 'X': // Allow ANY value.
5811 case 'I': // Target registers.
5812 case 'J':
5813 case 'K':
5814 case 'L':
5815 case 'M':
5816 case 'N':
5817 case 'O':
5818 case 'P':
5819 case '<':
5820 case '>':
5821 return C_Other;
5822 }
5823 }
5824
5825 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5826 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5827 return C_Memory;
5828 return C_Register;
5829 }
5830 return C_Unknown;
5831}
5832
5833/// Try to replace an X constraint, which matches anything, with another that
5834/// has more specific requirements based on the type of the corresponding
5835/// operand.
5836const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5837 if (ConstraintVT.isInteger())
5838 return "r";
5839 if (ConstraintVT.isFloatingPoint())
5840 return "f"; // works for many targets
5841 return nullptr;
5842}
5843
5845 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5846 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5847 return SDValue();
5848}
5849
5850/// Lower the specified operand into the Ops vector.
5851/// If it is invalid, don't add anything to Ops.
5853 StringRef Constraint,
5854 std::vector<SDValue> &Ops,
5855 SelectionDAG &DAG) const {
5856
5857 if (Constraint.size() > 1)
5858 return;
5859
5860 char ConstraintLetter = Constraint[0];
5861 switch (ConstraintLetter) {
5862 default: break;
5863 case 'X': // Allows any operand
5864 case 'i': // Simple Integer or Relocatable Constant
5865 case 'n': // Simple Integer
5866 case 's': { // Relocatable Constant
5867
5869 uint64_t Offset = 0;
5870
5871 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5872 // etc., since getelementpointer is variadic. We can't use
5873 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5874 // while in this case the GA may be furthest from the root node which is
5875 // likely an ISD::ADD.
5876 while (true) {
5877 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5878 // gcc prints these as sign extended. Sign extend value to 64 bits
5879 // now; without this it would get ZExt'd later in
5880 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5881 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5882 BooleanContent BCont = getBooleanContents(MVT::i64);
5883 ISD::NodeType ExtOpc =
5884 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5885 int64_t ExtVal =
5886 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5887 Ops.push_back(
5888 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5889 return;
5890 }
5891 if (ConstraintLetter != 'n') {
5892 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5893 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5894 GA->getValueType(0),
5895 Offset + GA->getOffset()));
5896 return;
5897 }
5898 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5899 Ops.push_back(DAG.getTargetBlockAddress(
5900 BA->getBlockAddress(), BA->getValueType(0),
5901 Offset + BA->getOffset(), BA->getTargetFlags()));
5902 return;
5903 }
5905 Ops.push_back(Op);
5906 return;
5907 }
5908 }
5909 const unsigned OpCode = Op.getOpcode();
5910 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5911 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5912 Op = Op.getOperand(1);
5913 // Subtraction is not commutative.
5914 else if (OpCode == ISD::ADD &&
5915 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5916 Op = Op.getOperand(0);
5917 else
5918 return;
5919 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5920 continue;
5921 }
5922 return;
5923 }
5924 break;
5925 }
5926 }
5927}
5928
5932
5933std::pair<unsigned, const TargetRegisterClass *>
5935 StringRef Constraint,
5936 MVT VT) const {
5937 if (!Constraint.starts_with("{"))
5938 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5939 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5940
5941 // Remove the braces from around the name.
5942 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5943
5944 std::pair<unsigned, const TargetRegisterClass *> R =
5945 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5946
5947 // Figure out which register class contains this reg.
5948 for (const TargetRegisterClass *RC : RI->regclasses()) {
5949 // If none of the value types for this register class are valid, we
5950 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5951 if (!isLegalRC(*RI, *RC))
5952 continue;
5953
5954 for (const MCPhysReg &PR : *RC) {
5955 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5956 std::pair<unsigned, const TargetRegisterClass *> S =
5957 std::make_pair(PR, RC);
5958
5959 // If this register class has the requested value type, return it,
5960 // otherwise keep searching and return the first class found
5961 // if no other is found which explicitly has the requested type.
5962 if (RI->isTypeLegalForClass(*RC, VT))
5963 return S;
5964 if (!R.second)
5965 R = S;
5966 }
5967 }
5968 }
5969
5970 return R;
5971}
5972
5973//===----------------------------------------------------------------------===//
5974// Constraint Selection.
5975
5976/// Return true of this is an input operand that is a matching constraint like
5977/// "4".
5979 assert(!ConstraintCode.empty() && "No known constraint!");
5980 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5981}
5982
5983/// If this is an input matching constraint, this method returns the output
5984/// operand it matches.
5986 assert(!ConstraintCode.empty() && "No known constraint!");
5987 return atoi(ConstraintCode.c_str());
5988}
5989
5990/// Split up the constraint string from the inline assembly value into the
5991/// specific constraints and their prefixes, and also tie in the associated
5992/// operand values.
5993/// If this returns an empty vector, and if the constraint string itself
5994/// isn't empty, there was an error parsing.
5997 const TargetRegisterInfo *TRI,
5998 const CallBase &Call) const {
5999 /// Information about all of the constraints.
6000 AsmOperandInfoVector ConstraintOperands;
6001 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
6002 unsigned maCount = 0; // Largest number of multiple alternative constraints.
6003
6004 // Do a prepass over the constraints, canonicalizing them, and building up the
6005 // ConstraintOperands list.
6006 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
6007 unsigned ResNo = 0; // ResNo - The result number of the next output.
6008 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
6009
6010 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
6011 ConstraintOperands.emplace_back(std::move(CI));
6012 AsmOperandInfo &OpInfo = ConstraintOperands.back();
6013
6014 // Update multiple alternative constraint count.
6015 if (OpInfo.multipleAlternatives.size() > maCount)
6016 maCount = OpInfo.multipleAlternatives.size();
6017
6018 OpInfo.ConstraintVT = MVT::Other;
6019
6020 // Compute the value type for each operand.
6021 switch (OpInfo.Type) {
6022 case InlineAsm::isOutput: {
6023 // Indirect outputs just consume an argument.
6024 if (OpInfo.isIndirect) {
6025 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
6026 break;
6027 }
6028
6029 // The return value of the call is this value. As such, there is no
6030 // corresponding argument.
6031 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
6032 EVT VT;
6033 if (auto *STy = dyn_cast<StructType>(Call.getType())) {
6034 VT = getAsmOperandValueType(DL, STy->getElementType(ResNo));
6035 } else {
6036 assert(ResNo == 0 && "Asm only has one result!");
6037 VT = getAsmOperandValueType(DL, Call.getType());
6038 }
6039 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6040 ++ResNo;
6041 break;
6042 }
6043 case InlineAsm::isInput:
6044 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
6045 break;
6046 case InlineAsm::isLabel:
6047 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
6048 ++LabelNo;
6049 continue;
6051 // Nothing to do.
6052 break;
6053 }
6054
6055 if (OpInfo.CallOperandVal) {
6056 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
6057 if (OpInfo.isIndirect) {
6058 OpTy = Call.getParamElementType(ArgNo);
6059 assert(OpTy && "Indirect operand must have elementtype attribute");
6060 }
6061
6062 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
6063 if (StructType *STy = dyn_cast<StructType>(OpTy))
6064 if (STy->getNumElements() == 1)
6065 OpTy = STy->getElementType(0);
6066
6067 // If OpTy is not a single value, it may be a struct/union that we
6068 // can tile with integers.
6069 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
6070 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
6071 switch (BitSize) {
6072 default: break;
6073 case 1:
6074 case 8:
6075 case 16:
6076 case 32:
6077 case 64:
6078 case 128:
6079 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
6080 break;
6081 }
6082 }
6083
6084 EVT VT = getAsmOperandValueType(DL, OpTy, true);
6085 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6086 ArgNo++;
6087 }
6088 }
6089
6090 // If we have multiple alternative constraints, select the best alternative.
6091 if (!ConstraintOperands.empty()) {
6092 if (maCount) {
6093 unsigned bestMAIndex = 0;
6094 int bestWeight = -1;
6095 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
6096 int weight = -1;
6097 unsigned maIndex;
6098 // Compute the sums of the weights for each alternative, keeping track
6099 // of the best (highest weight) one so far.
6100 for (maIndex = 0; maIndex < maCount; ++maIndex) {
6101 int weightSum = 0;
6102 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6103 cIndex != eIndex; ++cIndex) {
6104 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6105 if (OpInfo.Type == InlineAsm::isClobber)
6106 continue;
6107
6108 // If this is an output operand with a matching input operand,
6109 // look up the matching input. If their types mismatch, e.g. one
6110 // is an integer, the other is floating point, or their sizes are
6111 // different, flag it as an maCantMatch.
6112 if (OpInfo.hasMatchingInput()) {
6113 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6114 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6115 if ((OpInfo.ConstraintVT.isInteger() !=
6116 Input.ConstraintVT.isInteger()) ||
6117 (OpInfo.ConstraintVT.getSizeInBits() !=
6118 Input.ConstraintVT.getSizeInBits())) {
6119 weightSum = -1; // Can't match.
6120 break;
6121 }
6122 }
6123 }
6124 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
6125 if (weight == -1) {
6126 weightSum = -1;
6127 break;
6128 }
6129 weightSum += weight;
6130 }
6131 // Update best.
6132 if (weightSum > bestWeight) {
6133 bestWeight = weightSum;
6134 bestMAIndex = maIndex;
6135 }
6136 }
6137
6138 // Now select chosen alternative in each constraint.
6139 for (AsmOperandInfo &cInfo : ConstraintOperands)
6140 if (cInfo.Type != InlineAsm::isClobber)
6141 cInfo.selectAlternative(bestMAIndex);
6142 }
6143 }
6144
6145 // Check and hook up tied operands, choose constraint code to use.
6146 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6147 cIndex != eIndex; ++cIndex) {
6148 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6149
6150 // If this is an output operand with a matching input operand, look up the
6151 // matching input. If their types mismatch, e.g. one is an integer, the
6152 // other is floating point, or their sizes are different, flag it as an
6153 // error.
6154 if (OpInfo.hasMatchingInput()) {
6155 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6156
6157 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6158 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
6159 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
6160 OpInfo.ConstraintVT);
6161 std::pair<unsigned, const TargetRegisterClass *> InputRC =
6162 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
6163 Input.ConstraintVT);
6164 const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
6165 OpInfo.ConstraintVT.isFloatingPoint();
6166 const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
6167 Input.ConstraintVT.isFloatingPoint();
6168 if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
6169 (MatchRC.second != InputRC.second)) {
6170 report_fatal_error("Unsupported asm: input constraint"
6171 " with a matching output constraint of"
6172 " incompatible type!");
6173 }
6174 }
6175 }
6176 }
6177
6178 return ConstraintOperands;
6179}
6180
6181/// Return a number indicating our preference for chosing a type of constraint
6182/// over another, for the purpose of sorting them. Immediates are almost always
6183/// preferrable (when they can be emitted). A higher return value means a
6184/// stronger preference for one constraint type relative to another.
6185/// FIXME: We should prefer registers over memory but doing so may lead to
6186/// unrecoverable register exhaustion later.
6187/// https://github.com/llvm/llvm-project/issues/20571
6189 switch (CT) {
6192 return 4;
6195 return 3;
6197 return 2;
6199 return 1;
6201 return 0;
6202 }
6203 llvm_unreachable("Invalid constraint type");
6204}
6205
6206/// Examine constraint type and operand type and determine a weight value.
6207/// This object must already have been set up with the operand type
6208/// and the current alternative constraint selected.
6211 AsmOperandInfo &info, int maIndex) const {
6213 if (maIndex >= (int)info.multipleAlternatives.size())
6214 rCodes = &info.Codes;
6215 else
6216 rCodes = &info.multipleAlternatives[maIndex].Codes;
6217 ConstraintWeight BestWeight = CW_Invalid;
6218
6219 // Loop over the options, keeping track of the most general one.
6220 for (const std::string &rCode : *rCodes) {
6221 ConstraintWeight weight =
6222 getSingleConstraintMatchWeight(info, rCode.c_str());
6223 if (weight > BestWeight)
6224 BestWeight = weight;
6225 }
6226
6227 return BestWeight;
6228}
6229
6230/// Examine constraint type and operand type and determine a weight value.
6231/// This object must already have been set up with the operand type
6232/// and the current alternative constraint selected.
6235 AsmOperandInfo &info, const char *constraint) const {
6237 Value *CallOperandVal = info.CallOperandVal;
6238 // If we don't have a value, we can't do a match,
6239 // but allow it at the lowest weight.
6240 if (!CallOperandVal)
6241 return CW_Default;
6242 // Look at the constraint type.
6243 switch (*constraint) {
6244 case 'i': // immediate integer.
6245 case 'n': // immediate integer with a known value.
6246 if (isa<ConstantInt>(CallOperandVal))
6247 weight = CW_Constant;
6248 break;
6249 case 's': // non-explicit intregal immediate.
6250 if (isa<GlobalValue>(CallOperandVal))
6251 weight = CW_Constant;
6252 break;
6253 case 'E': // immediate float if host format.
6254 case 'F': // immediate float.
6255 if (isa<ConstantFP>(CallOperandVal))
6256 weight = CW_Constant;
6257 break;
6258 case '<': // memory operand with autodecrement.
6259 case '>': // memory operand with autoincrement.
6260 case 'm': // memory operand.
6261 case 'o': // offsettable memory operand
6262 case 'V': // non-offsettable memory operand
6263 weight = CW_Memory;
6264 break;
6265 case 'r': // general register.
6266 case 'g': // general register, memory operand or immediate integer.
6267 // note: Clang converts "g" to "imr".
6268 if (CallOperandVal->getType()->isIntegerTy())
6269 weight = CW_Register;
6270 break;
6271 case 'X': // any operand.
6272 default:
6273 weight = CW_Default;
6274 break;
6275 }
6276 return weight;
6277}
6278
6279/// If there are multiple different constraints that we could pick for this
6280/// operand (e.g. "imr") try to pick the 'best' one.
6281/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6282/// into seven classes:
6283/// Register -> one specific register
6284/// RegisterClass -> a group of regs
6285/// Memory -> memory
6286/// Address -> a symbolic memory reference
6287/// Immediate -> immediate values
6288/// Other -> magic values (such as "Flag Output Operands")
6289/// Unknown -> something we don't recognize yet and can't handle
6290/// Ideally, we would pick the most specific constraint possible: if we have
6291/// something that fits into a register, we would pick it. The problem here
6292/// is that if we have something that could either be in a register or in
6293/// memory that use of the register could cause selection of *other*
6294/// operands to fail: they might only succeed if we pick memory. Because of
6295/// this the heuristic we use is:
6296///
6297/// 1) If there is an 'other' constraint, and if the operand is valid for
6298/// that constraint, use it. This makes us take advantage of 'i'
6299/// constraints when available.
6300/// 2) Otherwise, pick the most general constraint present. This prefers
6301/// 'm' over 'r', for example.
6302///
6304 TargetLowering::AsmOperandInfo &OpInfo) const {
6305 ConstraintGroup Ret;
6306
6307 Ret.reserve(OpInfo.Codes.size());
6308 for (StringRef Code : OpInfo.Codes) {
6310
6311 // Indirect 'other' or 'immediate' constraints are not allowed.
6312 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6313 CType == TargetLowering::C_Register ||
6315 continue;
6316
6317 // Things with matching constraints can only be registers, per gcc
6318 // documentation. This mainly affects "g" constraints.
6319 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6320 continue;
6321
6322 Ret.emplace_back(Code, CType);
6323 }
6324
6326 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6327 });
6328
6329 return Ret;
6330}
6331
6332/// If we have an immediate, see if we can lower it. Return true if we can,
6333/// false otherwise.
6335 SDValue Op, SelectionDAG *DAG,
6336 const TargetLowering &TLI) {
6337
6338 assert((P.second == TargetLowering::C_Other ||
6339 P.second == TargetLowering::C_Immediate) &&
6340 "need immediate or other");
6341
6342 if (!Op.getNode())
6343 return false;
6344
6345 std::vector<SDValue> ResultOps;
6346 TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6347 return !ResultOps.empty();
6348}
6349
6350/// Determines the constraint code and constraint type to use for the specific
6351/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6353 SDValue Op,
6354 SelectionDAG *DAG) const {
6355 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6356
6357 // Single-letter constraints ('r') are very common.
6358 if (OpInfo.Codes.size() == 1) {
6359 OpInfo.ConstraintCode = OpInfo.Codes[0];
6360 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6361 } else {
6363 if (G.empty())
6364 return;
6365
6366 unsigned BestIdx = 0;
6367 for (const unsigned E = G.size();
6368 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6369 G[BestIdx].second == TargetLowering::C_Immediate);
6370 ++BestIdx) {
6371 if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6372 break;
6373 // If we're out of constraints, just pick the first one.
6374 if (BestIdx + 1 == E) {
6375 BestIdx = 0;
6376 break;
6377 }
6378 }
6379
6380 OpInfo.ConstraintCode = G[BestIdx].first;
6381 OpInfo.ConstraintType = G[BestIdx].second;
6382 }
6383
6384 // 'X' matches anything.
6385 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6386 // Constants are handled elsewhere. For Functions, the type here is the
6387 // type of the result, which is not what we want to look at; leave them
6388 // alone.
6389 Value *v = OpInfo.CallOperandVal;
6390 if (isa<ConstantInt>(v) || isa<Function>(v)) {
6391 return;
6392 }
6393
6394 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6395 OpInfo.ConstraintCode = "i";
6396 return;
6397 }
6398
6399 // Otherwise, try to resolve it to something we know about by looking at
6400 // the actual operand type.
6401 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6402 OpInfo.ConstraintCode = Repl;
6403 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6404 }
6405 }
6406}
6407
6408/// Given an exact SDIV by a constant, create a multiplication
6409/// with the multiplicative inverse of the constant.
6410/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6412 const SDLoc &dl, SelectionDAG &DAG,
6413 SmallVectorImpl<SDNode *> &Created) {
6414 SDValue Op0 = N->getOperand(0);
6415 SDValue Op1 = N->getOperand(1);
6416 EVT VT = N->getValueType(0);
6417 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6418 EVT ShSVT = ShVT.getScalarType();
6419
6420 bool UseSRA = false;
6421 SmallVector<SDValue, 16> Shifts, Factors;
6422
6423 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6424 if (C->isZero())
6425 return false;
6426
6427 EVT CT = C->getValueType(0);
6428 APInt Divisor = C->getAPIntValue();
6429 unsigned Shift = Divisor.countr_zero();
6430 if (Shift) {
6431 Divisor.ashrInPlace(Shift);
6432 UseSRA = true;
6433 }
6434 APInt Factor = Divisor.multiplicativeInverse();
6435 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6436 Factors.push_back(DAG.getConstant(Factor, dl, CT));
6437 return true;
6438 };
6439
6440 // Collect all magic values from the build vector.
6441 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6442 return SDValue();
6443
6444 SDValue Shift, Factor;
6445 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6446 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6447 Factor = DAG.getBuildVector(VT, dl, Factors);
6448 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6449 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6450 "Expected matchUnaryPredicate to return one element for scalable "
6451 "vectors");
6452 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6453 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6454 } else {
6455 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6456 Shift = Shifts[0];
6457 Factor = Factors[0];
6458 }
6459
6460 SDValue Res = Op0;
6461 if (UseSRA) {
6462 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, SDNodeFlags::Exact);
6463 Created.push_back(Res.getNode());
6464 }
6465
6466 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6467}
6468
6469/// Given an exact UDIV by a constant, create a multiplication
6470/// with the multiplicative inverse of the constant.
6471/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6473 const SDLoc &dl, SelectionDAG &DAG,
6474 SmallVectorImpl<SDNode *> &Created) {
6475 EVT VT = N->getValueType(0);
6476 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6477 EVT ShSVT = ShVT.getScalarType();
6478
6479 bool UseSRL = false;
6480 SmallVector<SDValue, 16> Shifts, Factors;
6481
6482 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6483 if (C->isZero())
6484 return false;
6485
6486 EVT CT = C->getValueType(0);
6487 APInt Divisor = C->getAPIntValue();
6488 unsigned Shift = Divisor.countr_zero();
6489 if (Shift) {
6490 Divisor.lshrInPlace(Shift);
6491 UseSRL = true;
6492 }
6493 // Calculate the multiplicative inverse modulo BW.
6494 APInt Factor = Divisor.multiplicativeInverse();
6495 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6496 Factors.push_back(DAG.getConstant(Factor, dl, CT));
6497 return true;
6498 };
6499
6500 SDValue Op1 = N->getOperand(1);
6501
6502 // Collect all magic values from the build vector.
6503 if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6504 return SDValue();
6505
6506 SDValue Shift, Factor;
6507 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6508 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6509 Factor = DAG.getBuildVector(VT, dl, Factors);
6510 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6511 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6512 "Expected matchUnaryPredicate to return one element for scalable "
6513 "vectors");
6514 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6515 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6516 } else {
6517 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6518 Shift = Shifts[0];
6519 Factor = Factors[0];
6520 }
6521
6522 SDValue Res = N->getOperand(0);
6523 if (UseSRL) {
6524 Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, SDNodeFlags::Exact);
6525 Created.push_back(Res.getNode());
6526 }
6527
6528 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6529}
6530
6532 SelectionDAG &DAG,
6533 SmallVectorImpl<SDNode *> &Created) const {
6534 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6535 if (isIntDivCheap(N->getValueType(0), Attr))
6536 return SDValue(N, 0); // Lower SDIV as SDIV
6537 return SDValue();
6538}
6539
6540SDValue
6542 SelectionDAG &DAG,
6543 SmallVectorImpl<SDNode *> &Created) const {
6544 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6545 if (isIntDivCheap(N->getValueType(0), Attr))
6546 return SDValue(N, 0); // Lower SREM as SREM
6547 return SDValue();
6548}
6549
6550/// Build sdiv by power-of-2 with conditional move instructions
6551/// Ref: "Hacker's Delight" by Henry Warren 10-1
6552/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6553/// bgez x, label
6554/// add x, x, 2**k-1
6555/// label:
6556/// sra res, x, k
6557/// neg res, res (when the divisor is negative)
6559 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6560 SmallVectorImpl<SDNode *> &Created) const {
6561 unsigned Lg2 = Divisor.countr_zero();
6562 EVT VT = N->getValueType(0);
6563
6564 SDLoc DL(N);
6565 SDValue N0 = N->getOperand(0);
6566 SDValue Zero = DAG.getConstant(0, DL, VT);
6567 APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6568 SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6569
6570 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6571 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6572 SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6573 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6574 SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6575
6576 Created.push_back(Cmp.getNode());
6577 Created.push_back(Add.getNode());
6578 Created.push_back(CMov.getNode());
6579
6580 // Divide by pow2.
6581 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, CMov,
6582 DAG.getShiftAmountConstant(Lg2, VT, DL));
6583
6584 // If we're dividing by a positive value, we're done. Otherwise, we must
6585 // negate the result.
6586 if (Divisor.isNonNegative())
6587 return SRA;
6588
6589 Created.push_back(SRA.getNode());
6590 return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6591}
6592
6593/// Given an ISD::SDIV node expressing a divide by constant,
6594/// return a DAG expression to select that will generate the same value by
6595/// multiplying by a magic number.
6596/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6598 bool IsAfterLegalization,
6599 bool IsAfterLegalTypes,
6600 SmallVectorImpl<SDNode *> &Created) const {
6601 SDLoc dl(N);
6602
6603 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6604 if (N->getFlags().hasExact())
6605 return BuildExactSDIV(*this, N, dl, DAG, Created);
6606
6607 EVT VT = N->getValueType(0);
6608 EVT SVT = VT.getScalarType();
6609 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6610 EVT ShSVT = ShVT.getScalarType();
6611 unsigned EltBits = VT.getScalarSizeInBits();
6612 EVT MulVT;
6613
6614 // Check to see if we can do this.
6615 // FIXME: We should be more aggressive here.
6616 EVT QueryVT = VT;
6617 if (VT.isVector()) {
6618 // If the vector type will be legalized to a vector type with the same
6619 // element type, allow the transform before type legalization if MULHS or
6620 // SMUL_LOHI are supported.
6621 QueryVT = getLegalTypeToTransformTo(*DAG.getContext(), VT);
6622 if (!QueryVT.isVector() ||
6624 return SDValue();
6625 } else if (!isTypeLegal(VT)) {
6626 // Limit this to simple scalars for now.
6627 if (!VT.isSimple())
6628 return SDValue();
6629
6630 // If this type will be promoted to a large enough type with a legal
6631 // multiply operation, we can go ahead and do this transform.
6633 return SDValue();
6634
6635 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6636 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6637 !isOperationLegal(ISD::MUL, MulVT))
6638 return SDValue();
6639 }
6640
6641 bool HasMULHS =
6642 isOperationLegalOrCustom(ISD::MULHS, QueryVT, IsAfterLegalization);
6643 bool HasSMUL_LOHI =
6644 isOperationLegalOrCustom(ISD::SMUL_LOHI, QueryVT, IsAfterLegalization);
6645
6646 if (isTypeLegal(VT) && !HasMULHS && !HasSMUL_LOHI && MulVT == EVT()) {
6647 // If type twice as wide legal, widen and use a mul plus a shift.
6648 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
6649 // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6650 // custom lowered. This is very expensive so avoid it at all costs for
6651 // constant divisors.
6652 if ((!IsAfterLegalTypes && isOperationExpand(ISD::SDIV, VT) &&
6655 MulVT = WideVT;
6656 }
6657
6658 if (!HasMULHS && !HasSMUL_LOHI && MulVT == EVT())
6659 return SDValue();
6660
6661 // If we're after type legalization and SVT is not legal, use the
6662 // promoted type for creating constants to avoid creating nodes with
6663 // illegal types.
6664 if (IsAfterLegalTypes && VT.isVector()) {
6665 SVT = getTypeToTransformTo(*DAG.getContext(), SVT);
6666 if (SVT.bitsLT(VT.getScalarType()))
6667 return SDValue();
6668 ShSVT = getTypeToTransformTo(*DAG.getContext(), ShSVT);
6669 if (ShSVT.bitsLT(ShVT.getScalarType()))
6670 return SDValue();
6671 }
6672 const unsigned SVTBits = SVT.getSizeInBits();
6673
6674 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6675
6676 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6677 if (C->isZero())
6678 return false;
6679 // Truncate the divisor to the target scalar type in case it was promoted
6680 // during type legalization.
6681 APInt Divisor = C->getAPIntValue().trunc(EltBits);
6683 int NumeratorFactor = 0;
6684 int ShiftMask = -1;
6685
6686 if (Divisor.isOne() || Divisor.isAllOnes()) {
6687 // If d is +1/-1, we just multiply the numerator by +1/-1.
6688 NumeratorFactor = Divisor.getSExtValue();
6689 magics.Magic = 0;
6690 magics.ShiftAmount = 0;
6691 ShiftMask = 0;
6692 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6693 // If d > 0 and m < 0, add the numerator.
6694 NumeratorFactor = 1;
6695 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6696 // If d < 0 and m > 0, subtract the numerator.
6697 NumeratorFactor = -1;
6698 }
6699
6700 MagicFactors.push_back(
6701 DAG.getConstant(magics.Magic.zext(SVTBits), dl, SVT));
6702 Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6703 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6704 ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6705 return true;
6706 };
6707
6708 SDValue N0 = N->getOperand(0);
6709 SDValue N1 = N->getOperand(1);
6710
6711 // Collect the shifts / magic values from each element.
6712 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern, /*AllowUndefs=*/false,
6713 /*AllowTruncation=*/true))
6714 return SDValue();
6715
6716 SDValue MagicFactor, Factor, Shift, ShiftMask;
6717 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6718 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6719 Factor = DAG.getBuildVector(VT, dl, Factors);
6720 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6721 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6722 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6723 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6724 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6725 "Expected matchUnaryPredicate to return one element for scalable "
6726 "vectors");
6727 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6728 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6729 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6730 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6731 } else {
6732 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6733 MagicFactor = MagicFactors[0];
6734 Factor = Factors[0];
6735 Shift = Shifts[0];
6736 ShiftMask = ShiftMasks[0];
6737 }
6738
6739 // Multiply the numerator (operand 0) by the magic value.
6740 auto GetMULHS = [&](SDValue X, SDValue Y) {
6741 if (HasMULHS)
6742 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6743 if (HasSMUL_LOHI) {
6744 SDValue LoHi =
6745 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6746 return LoHi.getValue(1);
6747 }
6748
6749 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6750 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6751 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6752 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6753 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6754 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6755 };
6756
6757 SDValue Q = GetMULHS(N0, MagicFactor);
6758 if (!Q)
6759 return SDValue();
6760
6761 Created.push_back(Q.getNode());
6762
6763 // (Optionally) Add/subtract the numerator using Factor.
6764 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6765 Created.push_back(Factor.getNode());
6766 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6767 Created.push_back(Q.getNode());
6768
6769 // Shift right algebraic by shift value.
6770 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6771 Created.push_back(Q.getNode());
6772
6773 // Extract the sign bit, mask it and add it to the quotient.
6774 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6775 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6776 Created.push_back(T.getNode());
6777 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6778 Created.push_back(T.getNode());
6779 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6780}
6781
6782/// Given an ISD::UDIV node expressing a divide by constant,
6783/// return a DAG expression to select that will generate the same value by
6784/// multiplying by a magic number.
6785/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6787 bool IsAfterLegalization,
6788 bool IsAfterLegalTypes,
6789 SmallVectorImpl<SDNode *> &Created) const {
6790 SDLoc dl(N);
6791
6792 // If the udiv has an 'exact' bit we can use a simpler lowering.
6793 if (N->getFlags().hasExact())
6794 return BuildExactUDIV(*this, N, dl, DAG, Created);
6795
6796 EVT VT = N->getValueType(0);
6797 EVT SVT = VT.getScalarType();
6798 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6799 EVT ShSVT = ShVT.getScalarType();
6800 unsigned EltBits = VT.getScalarSizeInBits();
6801 EVT MulVT;
6802
6803 // Check to see if we can do this.
6804 // FIXME: We should be more aggressive here.
6805 EVT QueryVT = VT;
6806 if (VT.isVector()) {
6807 // If the vector type will be legalized to a vector type with the same
6808 // element type, allow the transform before type legalization if MULHU or
6809 // UMUL_LOHI are supported.
6810 QueryVT = getLegalTypeToTransformTo(*DAG.getContext(), VT);
6811 if (!QueryVT.isVector() ||
6813 return SDValue();
6814 } else if (!isTypeLegal(VT)) {
6815 // Limit this to simple scalars for now.
6816 if (!VT.isSimple())
6817 return SDValue();
6818
6819 // If this type will be promoted to a large enough type with a legal
6820 // multiply operation, we can go ahead and do this transform.
6822 return SDValue();
6823
6824 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6825 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6826 !isOperationLegal(ISD::MUL, MulVT))
6827 return SDValue();
6828 }
6829
6830 bool HasMULHU =
6831 isOperationLegalOrCustom(ISD::MULHU, QueryVT, IsAfterLegalization);
6832 bool HasUMUL_LOHI =
6833 isOperationLegalOrCustom(ISD::UMUL_LOHI, QueryVT, IsAfterLegalization);
6834
6835 if (isTypeLegal(VT) && !HasMULHU && !HasUMUL_LOHI && MulVT == EVT()) {
6836 // If type twice as wide legal, widen and use a mul plus a shift.
6837 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
6838 // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6839 // custom lowered. This is very expensive so avoid it at all costs for
6840 // constant divisors.
6841 if ((!IsAfterLegalTypes && isOperationExpand(ISD::UDIV, VT) &&
6844 MulVT = WideVT;
6845 }
6846
6847 if (!HasMULHU && !HasUMUL_LOHI && MulVT == EVT())
6848 return SDValue();
6849
6850 SDValue N0 = N->getOperand(0);
6851 SDValue N1 = N->getOperand(1);
6852
6853 // Try to use leading zeros of the dividend to reduce the multiplier and
6854 // avoid expensive fixups.
6855 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6856
6857 // If we're after type legalization and SVT is not legal, use the
6858 // promoted type for creating constants to avoid creating nodes with
6859 // illegal types.
6860 if (IsAfterLegalTypes && VT.isVector()) {
6861 SVT = getTypeToTransformTo(*DAG.getContext(), SVT);
6862 if (SVT.bitsLT(VT.getScalarType()))
6863 return SDValue();
6864 ShSVT = getTypeToTransformTo(*DAG.getContext(), ShSVT);
6865 if (ShSVT.bitsLT(ShVT.getScalarType()))
6866 return SDValue();
6867 }
6868 const unsigned SVTBits = SVT.getSizeInBits();
6869
6870 // Allow i32 to be widened to i64 for uncooperative divisors if i64 MULHU or
6871 // UMUL_LOHI is supported.
6872 const EVT WideSVT = MVT::i64;
6873 const bool HasWideMULHU =
6874 VT == MVT::i32 &&
6875 isOperationLegalOrCustom(ISD::MULHU, WideSVT, IsAfterLegalization);
6876 const bool HasWideUMUL_LOHI =
6877 VT == MVT::i32 &&
6878 isOperationLegalOrCustom(ISD::UMUL_LOHI, WideSVT, IsAfterLegalization);
6879 const bool AllowWiden = (HasWideMULHU || HasWideUMUL_LOHI);
6880
6881 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6882 bool UseWiden = false;
6883 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6884
6885 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6886 if (C->isZero())
6887 return false;
6888 // Truncate the divisor to the target scalar type in case it was promoted
6889 // during type legalization.
6890 APInt Divisor = C->getAPIntValue().trunc(EltBits);
6891
6892 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6893
6894 // Magic algorithm doesn't work for division by 1. We need to emit a select
6895 // at the end.
6896 if (Divisor.isOne()) {
6897 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6898 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6899 } else {
6902 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()),
6903 /*AllowEvenDivisorOptimization=*/true,
6904 /*AllowWidenOptimization=*/AllowWiden);
6905
6906 if (magics.Widen) {
6907 UseWiden = true;
6908 MagicFactor = DAG.getConstant(magics.Magic, dl, WideSVT);
6909 } else {
6910 MagicFactor = DAG.getConstant(magics.Magic.zext(SVTBits), dl, SVT);
6911 }
6912
6913 assert(magics.PreShift < Divisor.getBitWidth() &&
6914 "We shouldn't generate an undefined shift!");
6915 assert(magics.PostShift < Divisor.getBitWidth() &&
6916 "We shouldn't generate an undefined shift!");
6917 assert((!magics.IsAdd || magics.PreShift == 0) &&
6918 "Unexpected pre-shift");
6919 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6920 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6921 NPQFactor = DAG.getConstant(
6922 magics.IsAdd ? APInt::getOneBitSet(SVTBits, EltBits - 1)
6923 : APInt::getZero(SVTBits),
6924 dl, SVT);
6925 UseNPQ |= magics.IsAdd;
6926 UsePreShift |= magics.PreShift != 0;
6927 UsePostShift |= magics.PostShift != 0;
6928 }
6929
6930 PreShifts.push_back(PreShift);
6931 MagicFactors.push_back(MagicFactor);
6932 NPQFactors.push_back(NPQFactor);
6933 PostShifts.push_back(PostShift);
6934 return true;
6935 };
6936
6937 // Collect the shifts/magic values from each element.
6938 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern, /*AllowUndefs=*/false,
6939 /*AllowTruncation=*/true))
6940 return SDValue();
6941
6942 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6943 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6944 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6945 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6946 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6947 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6948 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6949 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6950 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6951 "Expected matchUnaryPredicate to return one for scalable vectors");
6952 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6953 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6954 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6955 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6956 } else {
6957 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6958 PreShift = PreShifts[0];
6959 MagicFactor = MagicFactors[0];
6960 PostShift = PostShifts[0];
6961 }
6962
6963 if (UseWiden) {
6964 // Compute: (WideSVT(x) * MagicFactor) >> WideSVTBits.
6965 SDValue WideN0 = DAG.getNode(ISD::ZERO_EXTEND, dl, WideSVT, N0);
6966
6967 // Perform WideSVTxWideSVT -> 2*WideSVT multiplication and extract high
6968 // WideSVT bits
6969 SDValue High;
6970 if (HasWideMULHU) {
6971 High = DAG.getNode(ISD::MULHU, dl, WideSVT, WideN0, MagicFactor);
6972 } else {
6973 assert(HasWideUMUL_LOHI);
6974 SDValue LoHi =
6975 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(WideSVT, WideSVT),
6976 WideN0, MagicFactor);
6977 High = LoHi.getValue(1);
6978 }
6979
6980 Created.push_back(High.getNode());
6981 return DAG.getNode(ISD::TRUNCATE, dl, VT, High);
6982 }
6983
6984 SDValue Q = N0;
6985 if (UsePreShift) {
6986 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6987 Created.push_back(Q.getNode());
6988 }
6989
6990 auto GetMULHU = [&](SDValue X, SDValue Y) {
6991 if (HasMULHU)
6992 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6993 if (HasUMUL_LOHI) {
6994 SDValue LoHi =
6995 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6996 return LoHi.getValue(1);
6997 }
6998
6999 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
7000 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
7001 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
7002 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
7003 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
7004 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
7005 };
7006
7007 // Multiply the numerator (operand 0) by the magic value.
7008 Q = GetMULHU(Q, MagicFactor);
7009 if (!Q)
7010 return SDValue();
7011
7012 Created.push_back(Q.getNode());
7013
7014 if (UseNPQ) {
7015 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
7016 Created.push_back(NPQ.getNode());
7017
7018 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
7019 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
7020 if (VT.isVector())
7021 NPQ = GetMULHU(NPQ, NPQFactor);
7022 else
7023 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
7024
7025 Created.push_back(NPQ.getNode());
7026
7027 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
7028 Created.push_back(Q.getNode());
7029 }
7030
7031 if (UsePostShift) {
7032 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
7033 Created.push_back(Q.getNode());
7034 }
7035
7036 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7037
7038 SDValue One = DAG.getConstant(1, dl, VT);
7039 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
7040 return DAG.getSelect(dl, VT, IsOne, N0, Q);
7041}
7042
7043/// If all values in Values that *don't* match the predicate are same 'splat'
7044/// value, then replace all values with that splat value.
7045/// Else, if AlternativeReplacement was provided, then replace all values that
7046/// do match predicate with AlternativeReplacement value.
7047static void
7049 std::function<bool(SDValue)> Predicate,
7050 SDValue AlternativeReplacement = SDValue()) {
7051 SDValue Replacement;
7052 // Is there a value for which the Predicate does *NOT* match? What is it?
7053 auto SplatValue = llvm::find_if_not(Values, Predicate);
7054 if (SplatValue != Values.end()) {
7055 // Does Values consist only of SplatValue's and values matching Predicate?
7056 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
7057 return Value == *SplatValue || Predicate(Value);
7058 })) // Then we shall replace values matching predicate with SplatValue.
7059 Replacement = *SplatValue;
7060 }
7061 if (!Replacement) {
7062 // Oops, we did not find the "baseline" splat value.
7063 if (!AlternativeReplacement)
7064 return; // Nothing to do.
7065 // Let's replace with provided value then.
7066 Replacement = AlternativeReplacement;
7067 }
7068 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
7069}
7070
7071/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
7072/// where the divisor and comparison target are constants,
7073/// return a DAG expression that will generate the same comparison result
7074/// using only multiplications, additions and shifts/rotations.
7075/// Ref: "Hacker's Delight" 10-17.
7076SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
7077 SDValue CompTargetNode,
7079 DAGCombinerInfo &DCI,
7080 const SDLoc &DL) const {
7082 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7083 DCI, DL, Built)) {
7084 for (SDNode *N : Built)
7085 DCI.AddToWorklist(N);
7086 return Folded;
7087 }
7088
7089 return SDValue();
7090}
7091
7092SDValue
7093TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
7094 SDValue CompTargetNode, ISD::CondCode Cond,
7095 DAGCombinerInfo &DCI, const SDLoc &DL,
7096 SmallVectorImpl<SDNode *> &Created) const {
7097 // fold (seteq/ne (urem N, D), C) ->
7098 // (setule/ugt (rotr (mul (sub N, C), P), K), Q)
7099 // - D must be constant, with D = D0 * 2^K where D0 is odd
7100 // - P is the multiplicative inverse of D0 modulo 2^W
7101 // - Q = floor(((2^W) - 1) / D)
7102 // where W is the width of the common type of N and D.
7103 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7104 "Only applicable for (in)equality comparisons.");
7105
7106 SelectionDAG &DAG = DCI.DAG;
7107
7108 EVT VT = REMNode.getValueType();
7109 EVT SVT = VT.getScalarType();
7110 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7111 EVT ShSVT = ShVT.getScalarType();
7112
7113 // If MUL is unavailable, we cannot proceed in any case.
7114 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7115 return SDValue();
7116
7117 bool ComparingWithAllZeros = true;
7118 bool AllComparisonsWithNonZerosAreTautological = true;
7119 bool HadTautologicalLanes = false;
7120 bool AllLanesAreTautological = true;
7121 bool HadEvenDivisor = false;
7122 bool AllDivisorsArePowerOfTwo = true;
7123 bool HadTautologicalInvertedLanes = false;
7124 SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
7125
7126 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
7127 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7128 if (CDiv->isZero())
7129 return false;
7130
7131 const APInt &D = CDiv->getAPIntValue();
7132 const APInt &Cmp = CCmp->getAPIntValue();
7133
7134 ComparingWithAllZeros &= Cmp.isZero();
7135
7136 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7137 // if C2 is not less than C1, the comparison is always false.
7138 // But we will only be able to produce the comparison that will give the
7139 // opposive tautological answer. So this lane would need to be fixed up.
7140 bool TautologicalInvertedLane = D.ule(Cmp);
7141 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
7142
7143 // If all lanes are tautological (either all divisors are ones, or divisor
7144 // is not greater than the constant we are comparing with),
7145 // we will prefer to avoid the fold.
7146 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
7147 HadTautologicalLanes |= TautologicalLane;
7148 AllLanesAreTautological &= TautologicalLane;
7149
7150 // If we are comparing with non-zero, we need'll need to subtract said
7151 // comparison value from the LHS. But there is no point in doing that if
7152 // every lane where we are comparing with non-zero is tautological..
7153 if (!Cmp.isZero())
7154 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
7155
7156 // Decompose D into D0 * 2^K
7157 unsigned K = D.countr_zero();
7158 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7159 APInt D0 = D.lshr(K);
7160
7161 // D is even if it has trailing zeros.
7162 HadEvenDivisor |= (K != 0);
7163 // D is a power-of-two if D0 is one.
7164 // If all divisors are power-of-two, we will prefer to avoid the fold.
7165 AllDivisorsArePowerOfTwo &= D0.isOne();
7166
7167 // P = inv(D0, 2^W)
7168 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7169 unsigned W = D.getBitWidth();
7170 APInt P = D0.multiplicativeInverse();
7171 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7172
7173 // Q = floor((2^W - 1) u/ D)
7174 // R = ((2^W - 1) u% D)
7175 APInt Q, R;
7177
7178 // If we are comparing with zero, then that comparison constant is okay,
7179 // else it may need to be one less than that.
7180 if (Cmp.ugt(R))
7181 Q -= 1;
7182
7184 "We are expecting that K is always less than all-ones for ShSVT");
7185
7186 // If the lane is tautological the result can be constant-folded.
7187 if (TautologicalLane) {
7188 // Set P and K amount to a bogus values so we can try to splat them.
7189 P = 0;
7190 KAmts.push_back(DAG.getAllOnesConstant(DL, ShSVT));
7191 // And ensure that comparison constant is tautological,
7192 // it will always compare true/false.
7193 Q.setAllBits();
7194 } else {
7195 KAmts.push_back(DAG.getConstant(K, DL, ShSVT));
7196 }
7197
7198 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7199 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7200 return true;
7201 };
7202
7203 SDValue N = REMNode.getOperand(0);
7204 SDValue D = REMNode.getOperand(1);
7205
7206 // Collect the values from each element.
7207 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
7208 return SDValue();
7209
7210 // If all lanes are tautological, the result can be constant-folded.
7211 if (AllLanesAreTautological)
7212 return SDValue();
7213
7214 // If this is a urem by a powers-of-two, avoid the fold since it can be
7215 // best implemented as a bit test.
7216 if (AllDivisorsArePowerOfTwo)
7217 return SDValue();
7218
7219 SDValue PVal, KVal, QVal;
7220 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7221 if (HadTautologicalLanes) {
7222 // Try to turn PAmts into a splat, since we don't care about the values
7223 // that are currently '0'. If we can't, just keep '0'`s.
7225 // Try to turn KAmts into a splat, since we don't care about the values
7226 // that are currently '-1'. If we can't, change them to '0'`s.
7228 DAG.getConstant(0, DL, ShSVT));
7229 }
7230
7231 PVal = DAG.getBuildVector(VT, DL, PAmts);
7232 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7233 QVal = DAG.getBuildVector(VT, DL, QAmts);
7234 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7235 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
7236 "Expected matchBinaryPredicate to return one element for "
7237 "SPLAT_VECTORs");
7238 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7239 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7240 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7241 } else {
7242 PVal = PAmts[0];
7243 KVal = KAmts[0];
7244 QVal = QAmts[0];
7245 }
7246
7247 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
7248 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
7249 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
7250 assert(CompTargetNode.getValueType() == N.getValueType() &&
7251 "Expecting that the types on LHS and RHS of comparisons match.");
7252 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
7253 }
7254
7255 // (mul N, P)
7256 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7257 Created.push_back(Op0.getNode());
7258
7259 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7260 // divisors as a performance improvement, since rotating by 0 is a no-op.
7261 if (HadEvenDivisor) {
7262 // We need ROTR to do this.
7263 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7264 return SDValue();
7265 // UREM: (rotr (mul N, P), K)
7266 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7267 Created.push_back(Op0.getNode());
7268 }
7269
7270 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
7271 SDValue NewCC =
7272 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7274 if (!HadTautologicalInvertedLanes)
7275 return NewCC;
7276
7277 // If any lanes previously compared always-false, the NewCC will give
7278 // always-true result for them, so we need to fixup those lanes.
7279 // Or the other way around for inequality predicate.
7280 assert(VT.isVector() && "Can/should only get here for vectors.");
7281 Created.push_back(NewCC.getNode());
7282
7283 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7284 // if C2 is not less than C1, the comparison is always false.
7285 // But we have produced the comparison that will give the
7286 // opposive tautological answer. So these lanes would need to be fixed up.
7287 SDValue TautologicalInvertedChannels =
7288 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
7289 Created.push_back(TautologicalInvertedChannels.getNode());
7290
7291 // NOTE: we avoid letting illegal types through even if we're before legalize
7292 // ops – legalization has a hard time producing good code for this.
7293 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
7294 // If we have a vector select, let's replace the comparison results in the
7295 // affected lanes with the correct tautological result.
7296 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
7297 DL, SETCCVT, SETCCVT);
7298 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
7299 Replacement, NewCC);
7300 }
7301
7302 // Else, we can just invert the comparison result in the appropriate lanes.
7303 //
7304 // NOTE: see the note above VSELECT above.
7305 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
7306 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
7307 TautologicalInvertedChannels);
7308
7309 return SDValue(); // Don't know how to lower.
7310}
7311
7312/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
7313/// where the divisor is constant and the comparison target is zero,
7314/// return a DAG expression that will generate the same comparison result
7315/// using only multiplications, additions and shifts/rotations.
7316/// Ref: "Hacker's Delight" 10-17.
7317SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
7318 SDValue CompTargetNode,
7320 DAGCombinerInfo &DCI,
7321 const SDLoc &DL) const {
7323 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7324 DCI, DL, Built)) {
7325 assert(Built.size() <= 7 && "Max size prediction failed.");
7326 for (SDNode *N : Built)
7327 DCI.AddToWorklist(N);
7328 return Folded;
7329 }
7330
7331 return SDValue();
7332}
7333
7334SDValue
7335TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
7336 SDValue CompTargetNode, ISD::CondCode Cond,
7337 DAGCombinerInfo &DCI, const SDLoc &DL,
7338 SmallVectorImpl<SDNode *> &Created) const {
7339 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
7340 // Fold:
7341 // (seteq/ne (srem N, D), 0)
7342 // To:
7343 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
7344 //
7345 // - D must be constant, with D = D0 * 2^K where D0 is odd
7346 // - P is the multiplicative inverse of D0 modulo 2^W
7347 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7348 // - Q = floor((2 * A) / (2^K))
7349 // where W is the width of the common type of N and D.
7350 //
7351 // When D is a power of two (and thus D0 is 1), the normal
7352 // formula for A and Q don't apply, because the derivation
7353 // depends on D not dividing 2^(W-1), and thus theorem ZRS
7354 // does not apply. This specifically fails when N = INT_MIN.
7355 //
7356 // Instead, for power-of-two D, we use:
7357 // - A = 0
7358 // | -> No offset needed. We're effectively treating it the same as urem.
7359 // - Q = 2^(W-K) - 1
7360 // |-> Test that the top K bits are zero after rotation
7361 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7362 "Only applicable for (in)equality comparisons.");
7363
7364 SelectionDAG &DAG = DCI.DAG;
7365
7366 EVT VT = REMNode.getValueType();
7367 EVT SVT = VT.getScalarType();
7368 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7369 EVT ShSVT = ShVT.getScalarType();
7370
7371 // If we are after ops legalization, and MUL is unavailable, we can not
7372 // proceed.
7373 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7374 return SDValue();
7375
7376 // TODO: Could support comparing with non-zero too.
7377 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
7378 if (!CompTarget || !CompTarget->isZero())
7379 return SDValue();
7380
7381 bool HadOneDivisor = false;
7382 bool AllDivisorsAreOnes = true;
7383 bool HadEvenDivisor = false;
7384 bool AllDivisorsArePowerOfTwo = true;
7385 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7386
7387 auto BuildSREMPattern = [&](ConstantSDNode *C) {
7388 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7389 if (C->isZero())
7390 return false;
7391
7392 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7393
7394 // WARNING: this fold is only valid for positive divisors!
7395 // `rem %X, -C` is equivalent to `rem %X, C`
7396 APInt D = C->getAPIntValue().abs();
7397
7398 // If all divisors are ones, we will prefer to avoid the fold.
7399 HadOneDivisor |= D.isOne();
7400 AllDivisorsAreOnes &= D.isOne();
7401
7402 // Decompose D into D0 * 2^K
7403 unsigned K = D.countr_zero();
7404 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7405 APInt D0 = D.lshr(K);
7406
7407 // D is even if it has trailing zeros.
7408 HadEvenDivisor |= (K != 0);
7409
7410 // D is a power-of-two if D0 is one. This includes INT_MIN.
7411 // If all divisors are power-of-two, we will prefer to avoid the fold.
7412 AllDivisorsArePowerOfTwo &= D0.isOne();
7413
7414 // P = inv(D0, 2^W)
7415 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7416 unsigned W = D.getBitWidth();
7417 APInt P = D0.multiplicativeInverse();
7418 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7419
7420 // A = floor((2^(W - 1) - 1) / D0) & -2^K
7421 APInt A = APInt::getSignedMaxValue(W).udiv(D0);
7422 A.clearLowBits(K);
7423
7424 // Q = floor((2 * A) / (2^K))
7425 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7426
7428 "We are expecting that A is always less than all-ones for SVT");
7430 "We are expecting that K is always less than all-ones for ShSVT");
7431
7432 // If D was a power of two, apply the alternate constant derivation.
7433 if (D0.isOne()) {
7434 // A = 0
7435 A = APInt(W, 0);
7436 // - Q = 2^(W-K) - 1
7437 Q = APInt::getLowBitsSet(W, W - K);
7438 }
7439
7440 // If the divisor is 1 the result can be constant-folded.
7441 if (D.isOne()) {
7442 // Set P, A and K to a bogus values so we can try to splat them.
7443 P = 0;
7444 A.setAllBits();
7445 KAmts.push_back(DAG.getAllOnesConstant(DL, ShSVT));
7446
7447 // x ?% 1 == 0 <--> true <--> x u<= -1
7448 Q.setAllBits();
7449 } else {
7450 KAmts.push_back(DAG.getConstant(K, DL, ShSVT));
7451 }
7452
7453 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7454 AAmts.push_back(DAG.getConstant(A, DL, SVT));
7455 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7456 return true;
7457 };
7458
7459 SDValue N = REMNode.getOperand(0);
7460 SDValue D = REMNode.getOperand(1);
7461
7462 // Collect the values from each element.
7463 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7464 return SDValue();
7465
7466 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7467 if (AllDivisorsAreOnes)
7468 return SDValue();
7469
7470 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7471 // since it can be best implemented as a bit test.
7472 if (AllDivisorsArePowerOfTwo)
7473 return SDValue();
7474
7475 SDValue PVal, AVal, KVal, QVal;
7476 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7477 if (HadOneDivisor) {
7478 // Try to turn PAmts into a splat, since we don't care about the values
7479 // that are currently '0'. If we can't, just keep '0'`s.
7481 // Try to turn AAmts into a splat, since we don't care about the
7482 // values that are currently '-1'. If we can't, change them to '0'`s.
7484 DAG.getConstant(0, DL, SVT));
7485 // Try to turn KAmts into a splat, since we don't care about the values
7486 // that are currently '-1'. If we can't, change them to '0'`s.
7488 DAG.getConstant(0, DL, ShSVT));
7489 }
7490
7491 PVal = DAG.getBuildVector(VT, DL, PAmts);
7492 AVal = DAG.getBuildVector(VT, DL, AAmts);
7493 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7494 QVal = DAG.getBuildVector(VT, DL, QAmts);
7495 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7496 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7497 QAmts.size() == 1 &&
7498 "Expected matchUnaryPredicate to return one element for scalable "
7499 "vectors");
7500 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7501 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7502 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7503 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7504 } else {
7505 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7506 PVal = PAmts[0];
7507 AVal = AAmts[0];
7508 KVal = KAmts[0];
7509 QVal = QAmts[0];
7510 }
7511
7512 // (mul N, P)
7513 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7514 Created.push_back(Op0.getNode());
7515
7516 // We need ADD to do this.
7517 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7518 return SDValue();
7519
7520 // (add (mul N, P), A)
7521 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7522 Created.push_back(Op0.getNode());
7523
7524 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7525 // divisors as a performance improvement, since rotating by 0 is a no-op.
7526 if (HadEvenDivisor) {
7527 // We need ROTR to do this.
7528 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7529 return SDValue();
7530 // SREM: (rotr (add (mul N, P), A), K)
7531 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7532 Created.push_back(Op0.getNode());
7533 }
7534
7535 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7536 return DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7538}
7539
7541 const DenormalMode &Mode,
7542 SDNodeFlags Flags) const {
7543 SDLoc DL(Op);
7544 EVT VT = Op.getValueType();
7545 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7546 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7547
7548 // This is specifically a check for the handling of denormal inputs, not the
7549 // result.
7550 if (Mode.Input == DenormalMode::PreserveSign ||
7551 Mode.Input == DenormalMode::PositiveZero) {
7552 // Test = X == 0.0
7553 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ, /*Chain=*/{},
7554 /*Signaling=*/false, Flags);
7555 }
7556
7557 // Testing it with denormal inputs to avoid wrong estimate.
7558 //
7559 // Test = fabs(X) < SmallestNormal
7560 const fltSemantics &FltSem = VT.getFltSemantics();
7561 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7562 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7563 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op, Flags);
7564 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT, /*Chain=*/{},
7565 /*Signaling=*/false, Flags);
7566}
7567
7569 bool LegalOps, bool OptForSize,
7571 unsigned Depth) const {
7572 // fneg is removable even if it has multiple uses.
7573 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7575 return Op.getOperand(0);
7576 }
7577
7578 // Don't recurse exponentially.
7580 return SDValue();
7581
7582 // Pre-increment recursion depth for use in recursive calls.
7583 ++Depth;
7584 const SDNodeFlags Flags = Op->getFlags();
7585 EVT VT = Op.getValueType();
7586 unsigned Opcode = Op.getOpcode();
7587
7588 // Don't allow anything with multiple uses unless we know it is free.
7589 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7590 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7591 isFPExtFree(VT, Op.getOperand(0).getValueType());
7592 if (!IsFreeExtend)
7593 return SDValue();
7594 }
7595
7596 auto RemoveDeadNode = [&](SDValue N) {
7597 if (N && N.getNode()->use_empty())
7598 DAG.RemoveDeadNode(N.getNode());
7599 };
7600
7601 SDLoc DL(Op);
7602
7603 // Because getNegatedExpression can delete nodes we need a handle to keep
7604 // temporary nodes alive in case the recursion manages to create an identical
7605 // node.
7606 std::list<HandleSDNode> Handles;
7607
7608 switch (Opcode) {
7609 case ISD::ConstantFP: {
7610 // Don't invert constant FP values after legalization unless the target says
7611 // the negated constant is legal.
7612 bool IsOpLegal =
7614 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7615 OptForSize);
7616
7617 if (LegalOps && !IsOpLegal)
7618 break;
7619
7620 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7621 V.changeSign();
7622 SDValue CFP = DAG.getConstantFP(V, DL, VT);
7623
7624 // If we already have the use of the negated floating constant, it is free
7625 // to negate it even it has multiple uses.
7626 if (!Op.hasOneUse() && CFP.use_empty())
7627 break;
7629 return CFP;
7630 }
7631 case ISD::SPLAT_VECTOR: {
7632 // fold splat_vector(fneg(X)) -> splat_vector(-X)
7633 SDValue X = Op.getOperand(0);
7635 break;
7636
7637 SDValue NegX = getCheaperNegatedExpression(X, DAG, LegalOps, OptForSize);
7638 if (!NegX)
7639 break;
7641 return DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, NegX);
7642 }
7643 case ISD::BUILD_VECTOR: {
7644 // Only permit BUILD_VECTOR of constants.
7645 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7646 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7647 }))
7648 break;
7649
7650 bool IsOpLegal =
7653 llvm::all_of(Op->op_values(), [&](SDValue N) {
7654 return N.isUndef() ||
7655 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7656 OptForSize);
7657 });
7658
7659 if (LegalOps && !IsOpLegal)
7660 break;
7661
7663 for (SDValue C : Op->op_values()) {
7664 if (C.isUndef()) {
7665 Ops.push_back(C);
7666 continue;
7667 }
7668 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7669 V.changeSign();
7670 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7671 }
7673 return DAG.getBuildVector(VT, DL, Ops);
7674 }
7675 case ISD::FADD: {
7676 if (!Flags.hasNoSignedZeros())
7677 break;
7678
7679 // After operation legalization, it might not be legal to create new FSUBs.
7680 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7681 break;
7682 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7683
7684 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7686 SDValue NegX =
7687 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7688 // Prevent this node from being deleted by the next call.
7689 if (NegX)
7690 Handles.emplace_back(NegX);
7691
7692 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7694 SDValue NegY =
7695 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7696
7697 // We're done with the handles.
7698 Handles.clear();
7699
7700 // Negate the X if its cost is less or equal than Y.
7701 if (NegX && (CostX <= CostY)) {
7702 Cost = CostX;
7703 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7704 if (NegY != N)
7705 RemoveDeadNode(NegY);
7706 return N;
7707 }
7708
7709 // Negate the Y if it is not expensive.
7710 if (NegY) {
7711 Cost = CostY;
7712 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7713 if (NegX != N)
7714 RemoveDeadNode(NegX);
7715 return N;
7716 }
7717 break;
7718 }
7719 case ISD::FSUB: {
7720 // We can't turn -(A-B) into B-A when we honor signed zeros.
7721 if (!Flags.hasNoSignedZeros())
7722 break;
7723
7724 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7725 // fold (fneg (fsub 0, Y)) -> Y
7726 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7727 if (C->isZero()) {
7729 return Y;
7730 }
7731
7732 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7734 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7735 }
7736 case ISD::FMUL:
7737 case ISD::FDIV: {
7738 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7739
7740 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7742 SDValue NegX =
7743 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7744 // Prevent this node from being deleted by the next call.
7745 if (NegX)
7746 Handles.emplace_back(NegX);
7747
7748 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7750 SDValue NegY =
7751 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7752
7753 // We're done with the handles.
7754 Handles.clear();
7755
7756 // Negate the X if its cost is less or equal than Y.
7757 if (NegX && (CostX <= CostY)) {
7758 Cost = CostX;
7759 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7760 if (NegY != N)
7761 RemoveDeadNode(NegY);
7762 return N;
7763 }
7764
7765 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7766 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7767 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7768 break;
7769
7770 // Negate the Y if it is not expensive.
7771 if (NegY) {
7772 Cost = CostY;
7773 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7774 if (NegX != N)
7775 RemoveDeadNode(NegX);
7776 return N;
7777 }
7778 break;
7779 }
7780 case ISD::FMA:
7781 case ISD::FMULADD:
7782 case ISD::FMAD: {
7783 if (!Flags.hasNoSignedZeros())
7784 break;
7785
7786 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7788 SDValue NegZ =
7789 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7790 // Give up if fail to negate the Z.
7791 if (!NegZ)
7792 break;
7793
7794 // Prevent this node from being deleted by the next two calls.
7795 Handles.emplace_back(NegZ);
7796
7797 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7799 SDValue NegX =
7800 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7801 // Prevent this node from being deleted by the next call.
7802 if (NegX)
7803 Handles.emplace_back(NegX);
7804
7805 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7807 SDValue NegY =
7808 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7809
7810 // We're done with the handles.
7811 Handles.clear();
7812
7813 // Negate the X if its cost is less or equal than Y.
7814 if (NegX && (CostX <= CostY)) {
7815 Cost = std::min(CostX, CostZ);
7816 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7817 if (NegY != N)
7818 RemoveDeadNode(NegY);
7819 return N;
7820 }
7821
7822 // Negate the Y if it is not expensive.
7823 if (NegY) {
7824 Cost = std::min(CostY, CostZ);
7825 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7826 if (NegX != N)
7827 RemoveDeadNode(NegX);
7828 return N;
7829 }
7830 break;
7831 }
7832
7833 case ISD::FP_EXTEND:
7834 case ISD::FSIN:
7835 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7836 OptForSize, Cost, Depth))
7837 return DAG.getNode(Opcode, DL, VT, NegV);
7838 break;
7839 case ISD::FP_ROUND:
7840 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7841 OptForSize, Cost, Depth))
7842 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7843 break;
7844 case ISD::SELECT:
7845 case ISD::VSELECT: {
7846 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7847 // iff at least one cost is cheaper and the other is neutral/cheaper
7848 SDValue LHS = Op.getOperand(1);
7850 SDValue NegLHS =
7851 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7852 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7853 RemoveDeadNode(NegLHS);
7854 break;
7855 }
7856
7857 // Prevent this node from being deleted by the next call.
7858 Handles.emplace_back(NegLHS);
7859
7860 SDValue RHS = Op.getOperand(2);
7862 SDValue NegRHS =
7863 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7864
7865 // We're done with the handles.
7866 Handles.clear();
7867
7868 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7869 (CostLHS != NegatibleCost::Cheaper &&
7870 CostRHS != NegatibleCost::Cheaper)) {
7871 RemoveDeadNode(NegLHS);
7872 RemoveDeadNode(NegRHS);
7873 break;
7874 }
7875
7876 Cost = std::min(CostLHS, CostRHS);
7877 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7878 }
7879 }
7880
7881 return SDValue();
7882}
7883
7884//===----------------------------------------------------------------------===//
7885// Legalization Utilities
7886//===----------------------------------------------------------------------===//
7887
7888bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7889 SDValue LHS, SDValue RHS,
7891 EVT HiLoVT, SelectionDAG &DAG,
7892 MulExpansionKind Kind, SDValue LL,
7893 SDValue LH, SDValue RL, SDValue RH) const {
7894 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7895 Opcode == ISD::SMUL_LOHI);
7896
7897 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7899 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7901 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7903 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7905
7906 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7907 return false;
7908
7909 unsigned OuterBitSize = VT.getScalarSizeInBits();
7910 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7911
7912 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7913 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7914 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7915
7916 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7917 bool Signed) -> bool {
7918 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7919 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7920 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7921 Hi = Lo.getValue(1);
7922 return true;
7923 }
7924 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7925 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7926 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7927 return true;
7928 }
7929 return false;
7930 };
7931
7932 SDValue Lo, Hi;
7933
7934 if (!LL.getNode() && !RL.getNode() &&
7936 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7937 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7938 }
7939
7940 if (!LL.getNode())
7941 return false;
7942
7943 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7944 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7945 DAG.MaskedValueIsZero(RHS, HighMask)) {
7946 // The inputs are both zero-extended.
7947 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7948 Result.push_back(Lo);
7949 Result.push_back(Hi);
7950 if (Opcode != ISD::MUL) {
7951 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7952 Result.push_back(Zero);
7953 Result.push_back(Zero);
7954 }
7955 return true;
7956 }
7957 }
7958
7959 if (!VT.isVector() && Opcode == ISD::MUL &&
7960 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7961 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7962 // The input values are both sign-extended.
7963 // TODO non-MUL case?
7964 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7965 Result.push_back(Lo);
7966 Result.push_back(Hi);
7967 return true;
7968 }
7969 }
7970
7971 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7972 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7973
7974 if (!LH.getNode() && !RH.getNode() &&
7977 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7978 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7979 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7980 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7981 }
7982
7983 if (!LH.getNode())
7984 return false;
7985
7986 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7987 return false;
7988
7989 Result.push_back(Lo);
7990
7991 if (Opcode == ISD::MUL) {
7992 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7993 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7994 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7995 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7996 Result.push_back(Hi);
7997 return true;
7998 }
7999
8000 // Compute the full width result.
8001 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
8002 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
8003 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
8004 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
8005 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
8006 };
8007
8008 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
8009 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
8010 return false;
8011
8012 // This is effectively the add part of a multiply-add of half-sized operands,
8013 // so it cannot overflow.
8014 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
8015
8016 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
8017 return false;
8018
8019 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
8020 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8021
8022 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
8024 if (UseGlue)
8025 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
8026 Merge(Lo, Hi));
8027 else
8028 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
8029 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
8030
8031 SDValue Carry = Next.getValue(1);
8032 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8033 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
8034
8035 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
8036 return false;
8037
8038 if (UseGlue)
8039 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
8040 Carry);
8041 else
8042 Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
8043 Zero, Carry);
8044
8045 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
8046
8047 if (Opcode == ISD::SMUL_LOHI) {
8048 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
8049 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
8050 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
8051
8052 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
8053 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
8054 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
8055 }
8056
8057 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8058 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
8059 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8060 return true;
8061}
8062
8064 SelectionDAG &DAG, MulExpansionKind Kind,
8065 SDValue LL, SDValue LH, SDValue RL,
8066 SDValue RH) const {
8068 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
8069 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
8070 DAG, Kind, LL, LH, RL, RH);
8071 if (Ok) {
8072 assert(Result.size() == 2);
8073 Lo = Result[0];
8074 Hi = Result[1];
8075 }
8076 return Ok;
8077}
8078
8079// Optimize unsigned division or remainder by constants for types twice as large
8080// as a legal VT.
8081//
8082// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
8083// can be computed
8084// as:
8085// Sum = __builtin_uadd_overflow(Lo, High, &Sum);
8086// Remainder = Sum % Constant;
8087//
8088// If (1 << (BitWidth / 2)) % Constant != 1, we can search for a smaller value
8089// W such that W != (BitWidth / 2) and (1 << W) % Constant == 1. We can break
8090// High:Low into 3 chunks of W bits and compute remainder as
8091// Sum = Chunk0 + Chunk1 + Chunk2;
8092// Remainder = Sum % Constant;
8093//
8094// This is based on "Remainder by Summing Digits" from Hacker's Delight.
8095//
8096// For division, we can compute the remainder using the algorithm described
8097// above, subtract it from the dividend to get an exact multiple of Constant.
8098// Then multiply that exact multiply by the multiplicative inverse modulo
8099// (1 << (BitWidth / 2)) to get the quotient.
8100
8101// If Constant is even, we can shift right the dividend and the divisor by the
8102// number of trailing zeros in Constant before applying the remainder algorithm.
8103// If we're after the quotient, we can subtract this value from the shifted
8104// dividend and multiply by the multiplicative inverse of the shifted divisor.
8105// If we want the remainder, we shift the value left by the number of trailing
8106// zeros and add the bits that were shifted out of the dividend.
8107bool TargetLowering::expandUDIVREMByConstantViaUREMDecomposition(
8108 SDNode *N, APInt Divisor, SmallVectorImpl<SDValue> &Result, EVT HiLoVT,
8109 SelectionDAG &DAG, SDValue LL, SDValue LH) const {
8110 unsigned Opcode = N->getOpcode();
8111 EVT VT = N->getValueType(0);
8112
8113 unsigned BitWidth = Divisor.getBitWidth();
8114 unsigned HBitWidth = BitWidth / 2;
8116 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
8117
8118 // If the divisor is even, shift it until it becomes odd.
8119 unsigned TrailingZeros = 0;
8120 if (!Divisor[0]) {
8121 TrailingZeros = Divisor.countr_zero();
8122 Divisor.lshrInPlace(TrailingZeros);
8123 }
8124
8125 // After removing trailing zeros, the divisor needs to be less than
8126 // (1 << HBitWidth).
8127 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
8128 if (Divisor.uge(HalfMaxPlus1))
8129 return false;
8130
8131 // Look for the largest chunk width W such that (1 << W) % Divisor == 1 or
8132 // (1 << W) % Divisor == -1.
8133 unsigned BestChunkWidth = 0, AltChunkWidth = 0;
8134 for (unsigned I = HBitWidth, E = HBitWidth / 2; I > E; --I) {
8135 // Skip HBitWidth-1, it doesn't have enough bits for carries.
8136 if (I == HBitWidth - 1)
8137 continue;
8138
8139 APInt Mod = APInt::getOneBitSet(Divisor.getBitWidth(), I).urem(Divisor);
8140
8141 if (Mod.isOne()) {
8142 BestChunkWidth = I;
8143 break;
8144 }
8145
8146 // We have an alternate strategy for Remainder == Divisor - 1.
8147 // FIXME: Support HBitWidth.
8148 if (I != HBitWidth && Mod == Divisor - 1)
8149 AltChunkWidth = I;
8150 }
8151
8152 bool Alternate = false;
8153 if (!BestChunkWidth) {
8154 if (!AltChunkWidth)
8155 return false;
8156 Alternate = true;
8157 BestChunkWidth = AltChunkWidth;
8158 }
8159
8160 SDLoc dl(N);
8161
8162 assert(!LL == !LH && "Expected both input halves or no input halves!");
8163 if (!LL)
8164 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
8165
8166 bool HasFSHR = isOperationLegal(ISD::FSHR, HiLoVT);
8167
8168 auto GetFSHR = [&](SDValue Lo, SDValue Hi, unsigned ShiftAmt) {
8169 assert(ShiftAmt > 0 && ShiftAmt < HBitWidth);
8170 if (HasFSHR)
8171 return DAG.getNode(ISD::FSHR, dl, HiLoVT, Hi, Lo,
8172 DAG.getShiftAmountConstant(ShiftAmt, HiLoVT, dl));
8173 return DAG.getNode(
8174 ISD::OR, dl, HiLoVT,
8175 DAG.getNode(ISD::SRL, dl, HiLoVT, Lo,
8176 DAG.getShiftAmountConstant(ShiftAmt, HiLoVT, dl)),
8177 DAG.getNode(
8178 ISD::SHL, dl, HiLoVT, Hi,
8179 DAG.getShiftAmountConstant(HBitWidth - ShiftAmt, HiLoVT, dl)));
8180 };
8181
8182 // Helper to perform a right shift on a 128-bit value split into two halves.
8183 // Handles shifts >= HBitWidth by moving Hi to Lo and shifting Hi.
8184 auto ShiftRight = [&](SDValue &Lo, SDValue &Hi, unsigned ShiftAmt) {
8185 if (ShiftAmt == 0)
8186 return;
8187 if (ShiftAmt < HBitWidth) {
8188 Lo = GetFSHR(Lo, Hi, ShiftAmt);
8189 Hi = DAG.getNode(ISD::SRL, dl, HiLoVT, Hi,
8190 DAG.getShiftAmountConstant(ShiftAmt, HiLoVT, dl));
8191 } else if (ShiftAmt == HBitWidth) {
8192 Lo = Hi;
8193 Hi = DAG.getConstant(0, dl, HiLoVT);
8194 } else {
8195 Lo = DAG.getNode(
8196 ISD::SRL, dl, HiLoVT, Hi,
8197 DAG.getShiftAmountConstant(ShiftAmt - HBitWidth, HiLoVT, dl));
8198 Hi = DAG.getConstant(0, dl, HiLoVT);
8199 }
8200 };
8201
8202 // Shift the input by the number of TrailingZeros in the divisor. The
8203 // shifted out bits will be added to the remainder later.
8204 SDValue PartialRemL, PartialRemH;
8205 if (TrailingZeros && Opcode != ISD::UDIV) {
8206 // Save the shifted off bits if we need the remainder.
8207 if (TrailingZeros < HBitWidth) {
8208 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
8209 PartialRemL = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
8210 DAG.getConstant(Mask, dl, HiLoVT));
8211 } else if (TrailingZeros == HBitWidth) {
8212 // All of LL is part of the remainder.
8213 PartialRemL = LL;
8214 } else {
8215 // TrailingZeros > HBitWidth: LL and part of LH are the remainder.
8216 PartialRemL = LL;
8217 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros - HBitWidth);
8218 PartialRemH = DAG.getNode(ISD::AND, dl, HiLoVT, LH,
8219 DAG.getConstant(Mask, dl, HiLoVT));
8220 }
8221 }
8222
8223 SDValue Sum;
8224 // If BestChunkWidth is HBitWidth add low and high half. If there is a carry
8225 // out, add that to the final sum.
8226 if (BestChunkWidth == HBitWidth) {
8227 assert(!Alternate);
8228 // Shift LH:LL right if there were trailing zeros in the divisor.
8229 ShiftRight(LL, LH, TrailingZeros);
8230
8231 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
8232 EVT SetCCType =
8233 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
8235 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
8236 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
8237 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
8238 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
8239 } else {
8240 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
8241 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
8242 // If the boolean for the target is 0 or 1, we can add the setcc result
8243 // directly.
8244 if (getBooleanContents(HiLoVT) ==
8246 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
8247 else
8248 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
8249 DAG.getConstant(0, dl, HiLoVT));
8250 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
8251 }
8252 } else {
8253 // Otherwise split into multple chunks and add them together. We chose
8254 // BestChunkWidth so that the sum will not overflow.
8255 SDValue Mask = DAG.getConstant(
8256 APInt::getLowBitsSet(HBitWidth, BestChunkWidth), dl, HiLoVT);
8257
8258 for (unsigned I = 0; I < BitWidth - TrailingZeros; I += BestChunkWidth) {
8259 // If there were trailing zeros in the divisor, increase the shift amount.
8260 unsigned Shift = I + TrailingZeros;
8261 SDValue Chunk;
8262 if (Shift == 0)
8263 Chunk = LL;
8264 else if (Shift >= HBitWidth)
8265 Chunk = DAG.getNode(
8266 ISD::SRL, dl, HiLoVT, LH,
8267 DAG.getShiftAmountConstant(Shift - HBitWidth, HiLoVT, dl));
8268 else
8269 Chunk = GetFSHR(LL, LH, Shift);
8270 // If we're on the last chunk, we don't need an AND.
8271 if (I + BestChunkWidth < BitWidth - TrailingZeros)
8272 Chunk = DAG.getNode(ISD::AND, dl, HiLoVT, Chunk, Mask);
8273 if (!Sum) {
8274 Sum = Chunk;
8275 } else {
8276 // For Alternate, we need to subtract odd chunks.
8277 unsigned ChunkNum = I / BestChunkWidth;
8278 unsigned Opc = (Alternate && (ChunkNum % 2) != 0) ? ISD::SUB : ISD::ADD;
8279 Sum = DAG.getNode(Opc, dl, HiLoVT, Sum, Chunk);
8280 }
8281 }
8282
8283 // For Alternate, the sum may be negative, but we need a positive sum. We
8284 // can increase it by a multiple of the divisor to make it positive. For 3
8285 // chunks the largest negative value is -(2^BestChunkWidth - 1). For 4
8286 // chunks, it's 2*-(2^BestChunkWidth - 1). We know that 2^BestChunkWidth + 1
8287 // is a multiple of the divisor. Add that 1 or 2 times to make the sum
8288 // positive.
8289 if (Alternate) {
8290 unsigned NumChunks = divideCeil(BitWidth - TrailingZeros, BestChunkWidth);
8291 assert(NumChunks <= 4);
8292
8293 APInt Adjust = APInt::getOneBitSet(HBitWidth, BestChunkWidth);
8294 Adjust.setBit(0);
8295 // If there are 4 chunks, we need to adjust twice.
8296 if (NumChunks == 4)
8297 Adjust <<= 1;
8298 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum,
8299 DAG.getConstant(Adjust, dl, HiLoVT));
8300 }
8301 }
8302
8303 // Perform a HiLoVT urem on the Sum using truncated divisor.
8304 SDValue RemL =
8305 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
8306 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
8307 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
8308
8309 if (Opcode != ISD::UREM) {
8310 // If we didn't shift LH/LR earlier, do it now.
8311 if (BestChunkWidth != HBitWidth)
8312 ShiftRight(LL, LH, TrailingZeros);
8313
8314 // Subtract the remainder from the shifted dividend.
8315 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
8316 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
8317
8318 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
8319
8320 // Multiply by the multiplicative inverse of the divisor modulo
8321 // (1 << BitWidth).
8322 APInt MulFactor = Divisor.multiplicativeInverse();
8323
8324 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
8325 DAG.getConstant(MulFactor, dl, VT));
8326
8327 // Split the quotient into low and high parts.
8328 SDValue QuotL, QuotH;
8329 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
8330 Result.push_back(QuotL);
8331 Result.push_back(QuotH);
8332 }
8333
8334 if (Opcode != ISD::UDIV) {
8335 // If we shifted the input, shift the remainder left and add the bits we
8336 // shifted off the input.
8337 if (TrailingZeros) {
8338 if (TrailingZeros < HBitWidth) {
8339 // Shift RemH:RemL left by TrailingZeros.
8340 // RemH gets the high bits shifted out of RemL.
8341 RemH = DAG.getNode(
8342 ISD::SRL, dl, HiLoVT, RemL,
8343 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros, HiLoVT, dl));
8344 RemL =
8345 DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
8346 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8347 // OR in the partial remainder.
8348 RemL = DAG.getNode(ISD::OR, dl, HiLoVT, RemL, PartialRemL,
8350 } else if (TrailingZeros == HBitWidth) {
8351 // Shift left by exactly HBitWidth: RemH becomes RemL, RemL becomes
8352 // PartialRemL.
8353 RemH = RemL;
8354 RemL = PartialRemL;
8355 } else {
8356 // Shift left by more than HBitWidth.
8357 RemH = DAG.getNode(
8358 ISD::SHL, dl, HiLoVT, RemL,
8359 DAG.getShiftAmountConstant(TrailingZeros - HBitWidth, HiLoVT, dl));
8360 RemH = DAG.getNode(ISD::OR, dl, HiLoVT, RemH, PartialRemH,
8362 RemL = PartialRemL;
8363 }
8364 }
8365 Result.push_back(RemL);
8366 Result.push_back(RemH);
8367 }
8368
8369 return true;
8370}
8371
8372bool TargetLowering::expandUDIVREMByConstantViaUMulHiMagic(
8373 SDNode *N, const APInt &Divisor, SmallVectorImpl<SDValue> &Result,
8374 EVT HiLoVT, SelectionDAG &DAG, SDValue LL, SDValue LH) const {
8375
8376 SDValue N0 = N->getOperand(0);
8377 EVT VT = N0->getValueType(0);
8378 SDLoc DL{N};
8379
8380 assert(!Divisor.isOne() && "Magic algorithm does not work for division by 1");
8381
8382 // This helper creates a MUL_LOHI of the pair (LL, LH) by a constant.
8383 auto MakeMUL_LOHIByConst = [&](unsigned Opc, SDValue LL, SDValue LH,
8384 const APInt &Const,
8385 SmallVectorImpl<SDValue> &Result) {
8386 SDValue LHS = DAG.getNode(ISD::BUILD_PAIR, DL, VT, LL, LH);
8387 SDValue RHS = DAG.getConstant(Const, DL, VT);
8388 auto [RL, RH] = DAG.SplitScalar(RHS, DL, HiLoVT, HiLoVT);
8389 return expandMUL_LOHI(Opc, VT, DL, LHS, RHS, Result, HiLoVT, DAG,
8391 LL, LH, RL, RH);
8392 };
8393
8394 // This helper creates an ADD/SUB of the pairs (LL, LH) and (RL, RH).
8395 auto MakeAddSubLong = [&](unsigned Opc, SDValue LL, SDValue LH, SDValue RL,
8396 SDValue RH) {
8397 SDValue AddSubNode =
8399 DAG.getVTList(HiLoVT, MVT::i1), LL, RL);
8400 SDValue OutL = AddSubNode.getValue(0);
8401 SDValue Overflow = AddSubNode.getValue(1);
8402 SDValue AddSubWithOverflow =
8404 DAG.getVTList(HiLoVT, MVT::i1), LH, RH, Overflow);
8405 SDValue OutH = AddSubWithOverflow.getValue(0);
8406 return std::make_pair(OutL, OutH);
8407 };
8408
8409 // This helper creates a SRL of the pair (LL, LH) by Shift.
8410 auto MakeSRLLong = [&](SDValue LL, SDValue LH, unsigned Shift) {
8411 unsigned HBitWidth = HiLoVT.getScalarSizeInBits();
8412 if (Shift < HBitWidth) {
8413 SDValue ShAmt = DAG.getShiftAmountConstant(Shift, HiLoVT, DL);
8414 SDValue ResL = DAG.getNode(ISD::FSHR, DL, HiLoVT, LH, LL, ShAmt);
8415 SDValue ResH = DAG.getNode(ISD::SRL, DL, HiLoVT, LH, ShAmt);
8416 return std::make_pair(ResL, ResH);
8417 }
8418 SDValue Zero = DAG.getConstant(0, DL, HiLoVT);
8419 if (Shift == HBitWidth)
8420 return std::make_pair(LH, Zero);
8421 assert(Shift - HBitWidth < HBitWidth &&
8422 "We shouldn't generate an undefined shift");
8423 SDValue ShAmt = DAG.getShiftAmountConstant(Shift - HBitWidth, HiLoVT, DL);
8424 return std::make_pair(DAG.getNode(ISD::SRL, DL, HiLoVT, LH, ShAmt), Zero);
8425 };
8426
8427 // Knowledge of leading zeros may help to reduce the multiplier.
8428 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
8429
8430 UnsignedDivisionByConstantInfo Magics = UnsignedDivisionByConstantInfo::get(
8431 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
8432
8433 assert(!LL == !LH && "Expected both input halves or no input halves!");
8434 if (!LL)
8435 std::tie(LL, LH) = DAG.SplitScalar(N0, DL, HiLoVT, HiLoVT);
8436 SDValue QL = LL;
8437 SDValue QH = LH;
8438 if (Magics.PreShift != 0)
8439 std::tie(QL, QH) = MakeSRLLong(QL, QH, Magics.PreShift);
8440
8441 SmallVector<SDValue, 4> UMulResult;
8442 if (!MakeMUL_LOHIByConst(ISD::UMUL_LOHI, QL, QH, Magics.Magic, UMulResult))
8443 return false;
8444
8445 QL = UMulResult[2];
8446 QH = UMulResult[3];
8447
8448 if (Magics.IsAdd) {
8449 auto [NPQL, NPQH] = MakeAddSubLong(ISD::SUB, LL, LH, QL, QH);
8450 std::tie(NPQL, NPQH) = MakeSRLLong(NPQL, NPQH, 1);
8451 std::tie(QL, QH) = MakeAddSubLong(ISD::ADD, NPQL, NPQH, QL, QH);
8452 }
8453
8454 if (Magics.PostShift != 0)
8455 std::tie(QL, QH) = MakeSRLLong(QL, QH, Magics.PostShift);
8456
8457 unsigned Opcode = N->getOpcode();
8458 if (Opcode != ISD::UREM) {
8459 Result.push_back(QL);
8460 Result.push_back(QH);
8461 }
8462
8463 if (Opcode != ISD::UDIV) {
8464 SmallVector<SDValue, 2> MulResult;
8465 if (!MakeMUL_LOHIByConst(ISD::MUL, QL, QH, Divisor, MulResult))
8466 return false;
8467
8468 assert(MulResult.size() == 2);
8469
8470 auto [RemL, RemH] =
8471 MakeAddSubLong(ISD::SUB, LL, LH, MulResult[0], MulResult[1]);
8472
8473 Result.push_back(RemL);
8474 Result.push_back(RemH);
8475 }
8476
8477 return true;
8478}
8479
8482 EVT HiLoVT, SelectionDAG &DAG,
8483 SDValue LL, SDValue LH) const {
8484 unsigned Opcode = N->getOpcode();
8485
8486 // TODO: Support signed division/remainder.
8487 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
8488 return false;
8489 assert(
8490 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
8491 "Unexpected opcode");
8492
8493 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
8494 if (!CN)
8495 return false;
8496
8497 APInt Divisor = CN->getAPIntValue();
8498
8499 // We depend on the UREM by constant optimization in DAGCombiner that requires
8500 // high multiply.
8501 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
8503 return false;
8504
8505 // Don't expand if optimizing for size.
8506 if (DAG.shouldOptForSize())
8507 return false;
8508
8509 // Early out for 0 or 1 divisors.
8510 if (Divisor.ule(1))
8511 return false;
8512
8513 if (expandUDIVREMByConstantViaUREMDecomposition(N, Divisor, Result, HiLoVT,
8514 DAG, LL, LH))
8515 return true;
8516
8517 if (expandUDIVREMByConstantViaUMulHiMagic(N, Divisor, Result, HiLoVT, DAG, LL,
8518 LH))
8519 return true;
8520
8521 return false;
8522}
8523
8524// Check that (every element of) Z is undef or not an exact multiple of BW.
8525static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
8527 Z,
8528 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
8529 /*AllowUndefs=*/true, /*AllowTruncation=*/true);
8530}
8531
8533 EVT VT = Node->getValueType(0);
8534 SDValue ShX, ShY;
8535 SDValue ShAmt, InvShAmt;
8536 SDValue X = Node->getOperand(0);
8537 SDValue Y = Node->getOperand(1);
8538 SDValue Z = Node->getOperand(2);
8539 SDValue Mask = Node->getOperand(3);
8540 SDValue VL = Node->getOperand(4);
8541
8542 unsigned BW = VT.getScalarSizeInBits();
8543 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
8544 SDLoc DL(SDValue(Node, 0));
8545
8546 EVT ShVT = Z.getValueType();
8547 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8548 // fshl: X << C | Y >> (BW - C)
8549 // fshr: X << (BW - C) | Y >> C
8550 // where C = Z % BW is not zero
8551 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8552 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8553 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
8554 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
8555 VL);
8556 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
8557 VL);
8558 } else {
8559 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8560 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8561 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
8562 if (isPowerOf2_32(BW)) {
8563 // Z % BW -> Z & (BW - 1)
8564 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
8565 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8566 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
8567 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
8568 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
8569 } else {
8570 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8571 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8572 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
8573 }
8574
8575 SDValue One = DAG.getConstant(1, DL, ShVT);
8576 if (IsFSHL) {
8577 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
8578 SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
8579 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
8580 } else {
8581 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
8582 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
8583 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
8584 }
8585 }
8586 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
8587}
8588
8590 SelectionDAG &DAG) const {
8591 if (Node->isVPOpcode())
8592 return expandVPFunnelShift(Node, DAG);
8593
8594 EVT VT = Node->getValueType(0);
8595
8596 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8600 return SDValue();
8601
8602 SDValue X = Node->getOperand(0);
8603 SDValue Y = Node->getOperand(1);
8604 SDValue Z = Node->getOperand(2);
8605
8606 unsigned BW = VT.getScalarSizeInBits();
8607 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8608 SDLoc DL(SDValue(Node, 0));
8609
8610 EVT ShVT = Z.getValueType();
8611
8612 // If a funnel shift in the other direction is more supported, use it.
8613 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8614 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8615 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
8616 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8617 // fshl X, Y, Z -> fshr X, Y, -Z
8618 // fshr X, Y, Z -> fshl X, Y, -Z
8619 Z = DAG.getNegative(Z, DL, ShVT);
8620 } else {
8621 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8622 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8623 SDValue One = DAG.getConstant(1, DL, ShVT);
8624 if (IsFSHL) {
8625 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8626 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8627 } else {
8628 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8629 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8630 }
8631 Z = DAG.getNOT(DL, Z, ShVT);
8632 }
8633 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8634 }
8635
8636 SDValue ShX, ShY;
8637 SDValue ShAmt, InvShAmt;
8638 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8639 // fshl: X << C | Y >> (BW - C)
8640 // fshr: X << (BW - C) | Y >> C
8641 // where C = Z % BW is not zero
8642 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8643 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8644 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8645 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8646 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8647 } else {
8648 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8649 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8650 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8651 if (isPowerOf2_32(BW)) {
8652 // Z % BW -> Z & (BW - 1)
8653 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8654 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8655 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8656 } else {
8657 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8658 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8659 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8660 }
8661
8662 SDValue One = DAG.getConstant(1, DL, ShVT);
8663 if (IsFSHL) {
8664 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8665 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8666 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8667 } else {
8668 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8669 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8670 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8671 }
8672 }
8673 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8674}
8675
8676// TODO: Merge with expandFunnelShift.
8678 SelectionDAG &DAG) const {
8679 EVT VT = Node->getValueType(0);
8680 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8681 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8682 SDValue Op0 = Node->getOperand(0);
8683 SDValue Op1 = Node->getOperand(1);
8684 SDLoc DL(SDValue(Node, 0));
8685
8686 EVT ShVT = Op1.getValueType();
8687 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8688
8689 // If a rotate in the other direction is more supported, use it.
8690 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8691 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8692 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8693 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8694 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8695 }
8696
8697 if (!AllowVectorOps && VT.isVector() &&
8703 return SDValue();
8704
8705 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8706 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8707 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8708 SDValue ShVal;
8709 SDValue HsVal;
8710 if (isPowerOf2_32(EltSizeInBits)) {
8711 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8712 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8713 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8714 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8715 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8716 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8717 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8718 } else {
8719 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8720 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8721 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8722 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8723 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8724 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8725 SDValue One = DAG.getConstant(1, DL, ShVT);
8726 HsVal =
8727 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8728 }
8729 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8730}
8731
8732/// Check if CLMUL on VT can eventually reach a type with legal CLMUL through
8733/// a chain of halving decompositions (halving element width) and/or vector
8734/// widening (doubling element count). This guides expansion strategy selection:
8735/// if true, the halving/widening path produces better code than bit-by-bit.
8736///
8737/// HalveDepth tracks halving steps only (each creates ~4x more operations).
8738/// Widening steps are cheap (O(1) pad/extract) and don't count.
8739/// Limiting halvings to 2 prevents exponential blowup:
8740/// 1 halving: ~4 sub-CLMULs (good, e.g. v8i16 -> v8i8)
8741/// 2 halvings: ~16 sub-CLMULs (acceptable, e.g. v4i32 -> v4i16 -> v8i8)
8742/// 3 halvings: ~64 sub-CLMULs (worse than bit-by-bit expansion)
8744 EVT VT, unsigned HalveDepth = 0,
8745 unsigned TotalDepth = 0) {
8746 if (HalveDepth > 2 || TotalDepth > 8 || !VT.isFixedLengthVector())
8747 return false;
8749 return true;
8750 if (!TLI.isTypeLegal(VT))
8751 return false;
8752
8753 unsigned BW = VT.getScalarSizeInBits();
8754
8755 // Halve: halve element width, same element count.
8756 // This is the expensive step -- each halving creates ~4x more operations.
8757 if (BW % 2 == 0) {
8758 EVT HalfEltVT = EVT::getIntegerVT(Ctx, BW / 2);
8759 EVT HalfVT = VT.changeVectorElementType(Ctx, HalfEltVT);
8760 if (TLI.isTypeLegal(HalfVT) &&
8761 canNarrowCLMULToLegal(TLI, Ctx, HalfVT, HalveDepth + 1, TotalDepth + 1))
8762 return true;
8763 }
8764
8765 // Widen: double element count (fixed-width vectors only).
8766 // This is cheap -- just INSERT_SUBVECTOR + EXTRACT_SUBVECTOR.
8767 EVT WideVT = VT.getDoubleNumVectorElementsVT(Ctx);
8768 if (TLI.isTypeLegal(WideVT) &&
8769 canNarrowCLMULToLegal(TLI, Ctx, WideVT, HalveDepth, TotalDepth + 1))
8770 return true;
8771
8772 return false;
8773}
8774
8776 SDLoc DL(Node);
8777 EVT VT = Node->getValueType(0);
8778 SDValue X = Node->getOperand(0);
8779 SDValue Y = Node->getOperand(1);
8780 unsigned BW = VT.getScalarSizeInBits();
8781 unsigned Opcode = Node->getOpcode();
8782 LLVMContext &Ctx = *DAG.getContext();
8783
8784 switch (Opcode) {
8785 case ISD::CLMUL: {
8786 // For vector types, try decomposition strategies that leverage legal
8787 // CLMUL on narrower or wider element types, avoiding the expensive
8788 // bit-by-bit expansion.
8789 if (VT.isVector()) {
8790 // Strategy 1: Halving decomposition to half-element-width CLMUL.
8791 // Applies ExpandIntRes_CLMUL's identity element-wise:
8792 // CLMUL(X, Y) = (Hi << HalfBW) | Lo
8793 // where:
8794 // Lo = CLMUL(XLo, YLo)
8795 // Hi = CLMULH(XLo, YLo) ^ CLMUL(XLo, YHi) ^ CLMUL(XHi, YLo)
8796 unsigned HalfBW = BW / 2;
8797 if (BW % 2 == 0) {
8798 EVT HalfEltVT = EVT::getIntegerVT(Ctx, HalfBW);
8799 EVT HalfVT =
8800 EVT::getVectorVT(Ctx, HalfEltVT, VT.getVectorElementCount());
8801 if (isTypeLegal(HalfVT) && canNarrowCLMULToLegal(*this, Ctx, HalfVT,
8802 /*HalveDepth=*/1)) {
8803 SDValue ShAmt = DAG.getShiftAmountConstant(HalfBW, VT, DL);
8804
8805 // Extract low and high halves of each element.
8806 SDValue XLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, X);
8807 SDValue XHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT,
8808 DAG.getNode(ISD::SRL, DL, VT, X, ShAmt));
8809 SDValue YLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, Y);
8810 SDValue YHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT,
8811 DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt));
8812
8813 // Lo = CLMUL(XLo, YLo)
8814 SDValue Lo = DAG.getNode(ISD::CLMUL, DL, HalfVT, XLo, YLo);
8815
8816 // Hi = CLMULH(XLo, YLo) ^ CLMUL(XLo, YHi) ^ CLMUL(XHi, YLo)
8817 SDValue LoH = DAG.getNode(ISD::CLMULH, DL, HalfVT, XLo, YLo);
8818 SDValue Cross1 = DAG.getNode(ISD::CLMUL, DL, HalfVT, XLo, YHi);
8819 SDValue Cross2 = DAG.getNode(ISD::CLMUL, DL, HalfVT, XHi, YLo);
8820 SDValue Cross = DAG.getNode(ISD::XOR, DL, HalfVT, Cross1, Cross2);
8821 SDValue Hi = DAG.getNode(ISD::XOR, DL, HalfVT, LoH, Cross);
8822
8823 // Reassemble: Result = ZExt(Lo) | (AnyExt(Hi) << HalfBW)
8824 SDValue LoExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo);
8825 SDValue HiExt = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Hi);
8826 SDValue HiShifted = DAG.getNode(ISD::SHL, DL, VT, HiExt, ShAmt);
8827 return DAG.getNode(ISD::OR, DL, VT, LoExt, HiShifted);
8828 }
8829 }
8830
8831 // Strategy 2: Promote to double-element-width CLMUL.
8832 // CLMUL(X, Y) = Trunc(CLMUL(AnyExt(X), AnyExt(Y)))
8833 {
8834 EVT ExtVT = VT.widenIntegerElementType(Ctx);
8835 if (isTypeLegal(ExtVT) && isOperationLegalOrCustom(ISD::CLMUL, ExtVT)) {
8836 // If CLMUL on ExtVT is Custom (not Legal), the target may
8837 // scalarize it, costing O(NumElements) scalar ops. The bit-by-bit
8838 // fallback costs O(BW) vectorized iterations. Only widen when
8839 // element count is small enough that scalarization is cheaper.
8840 unsigned NumElts = VT.getVectorMinNumElements();
8841 if (isOperationLegal(ISD::CLMUL, ExtVT) || NumElts < BW) {
8842 SDValue XExt = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, X);
8843 SDValue YExt = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, Y);
8844 SDValue Mul = DAG.getNode(ISD::CLMUL, DL, ExtVT, XExt, YExt);
8845 return DAG.getNode(ISD::TRUNCATE, DL, VT, Mul);
8846 }
8847 }
8848 }
8849
8850 // Strategy 3: Widen element count (pad with undef, do CLMUL on wider
8851 // vector, extract lower result). CLMUL is element-wise, so upper
8852 // (undef) lanes don't affect the lower results.
8853 // e.g. v4i16 => pad to v8i16 => halve to v8i8 PMUL => extract v4i16.
8854 if (auto EC = VT.getVectorElementCount(); EC.isFixed()) {
8855 EVT WideVT = EVT::getVectorVT(Ctx, VT.getVectorElementType(), EC * 2);
8856 if (isTypeLegal(WideVT) && canNarrowCLMULToLegal(*this, Ctx, WideVT)) {
8857 SDValue Undef = DAG.getUNDEF(WideVT);
8858 SDValue XWide = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, Undef,
8859 X, DAG.getVectorIdxConstant(0, DL));
8860 SDValue YWide = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, Undef,
8861 Y, DAG.getVectorIdxConstant(0, DL));
8862 SDValue WideRes = DAG.getNode(ISD::CLMUL, DL, WideVT, XWide, YWide);
8863 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, WideRes,
8864 DAG.getVectorIdxConstant(0, DL));
8865 }
8866 }
8867 }
8868
8869 // NOTE: If you change this expansion, please update the cost model
8870 // calculation in BasicTTIImpl::getTypeBasedIntrinsicInstrCost for
8871 // Intrinsic::clmul.
8872
8873 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), Ctx, VT);
8874
8875 SDValue Res = DAG.getConstant(0, DL, VT);
8876 for (unsigned I = 0; I < BW; ++I) {
8877 SDValue ShiftAmt = DAG.getShiftAmountConstant(I, VT, DL);
8878 SDValue Mask = DAG.getConstant(APInt::getOneBitSet(BW, I), DL, VT);
8879 SDValue YMasked = DAG.getNode(ISD::AND, DL, VT, Y, Mask);
8880
8881 // For targets with a fast bit test instruction (e.g., x86 BT) or without
8882 // multiply, use a shift-based expansion to avoid expensive MUL
8883 // instructions.
8884 SDValue Part;
8885 if (!hasBitTest(Y, ShiftAmt) &&
8888 Part = DAG.getNode(ISD::MUL, DL, VT, X, YMasked);
8889 } else {
8890 // Canonical bit test: (Y & (1 << I)) != 0
8891 SDValue Zero = DAG.getConstant(0, DL, VT);
8892 SDValue Cond = DAG.getSetCC(DL, SetCCVT, YMasked, Zero, ISD::SETEQ);
8893 SDValue XShifted = DAG.getNode(ISD::SHL, DL, VT, X, ShiftAmt);
8894 Part = DAG.getSelect(DL, VT, Cond, Zero, XShifted);
8895 }
8896 Res = DAG.getNode(ISD::XOR, DL, VT, Res, Part);
8897 }
8898 return Res;
8899 }
8900 case ISD::CLMULR:
8901 // If we have CLMUL/CLMULH, merge the shifted results to form CLMULR.
8904 SDValue Lo = DAG.getNode(ISD::CLMUL, DL, VT, X, Y);
8905 SDValue Hi = DAG.getNode(ISD::CLMULH, DL, VT, X, Y);
8906 Lo = DAG.getNode(ISD::SRL, DL, VT, Lo,
8907 DAG.getShiftAmountConstant(BW - 1, VT, DL));
8908 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
8909 DAG.getShiftAmountConstant(1, VT, DL));
8910 return DAG.getNode(ISD::OR, DL, VT, Lo, Hi);
8911 }
8912 [[fallthrough]];
8913 case ISD::CLMULH: {
8914 EVT ExtVT = VT.widenIntegerElementType(Ctx);
8915 // Use bitreverse-based lowering (CLMULR/H = rev(CLMUL(rev,rev)) >> S)
8916 // when any of these hold:
8917 // (a) ZERO_EXTEND to ExtVT or SRL on ExtVT isn't legal.
8918 // (b) CLMUL is legal on VT but not on ExtVT (e.g. v8i8 on AArch64).
8919 // (c) CLMUL on ExtVT isn't legal, but CLMUL on VT can be efficiently
8920 // expanded via halving/widening to reach legal CLMUL. The bitreverse
8921 // path creates CLMUL(VT) which will be expanded efficiently. The
8922 // promote path would create CLMUL(ExtVT) => halving => CLMULH(VT),
8923 // causing a cycle.
8924 // Note: when CLMUL is legal on ExtVT, the zext => CLMUL(ExtVT) => shift
8925 // => trunc path is preferred over the bitreverse path, as it avoids the
8926 // cost of 3 bitreverse operations.
8931 canNarrowCLMULToLegal(*this, Ctx, VT)))) {
8932 SDValue XRev = DAG.getNode(ISD::BITREVERSE, DL, VT, X);
8933 SDValue YRev = DAG.getNode(ISD::BITREVERSE, DL, VT, Y);
8934 SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, VT, XRev, YRev);
8935 SDValue Res = DAG.getNode(ISD::BITREVERSE, DL, VT, ClMul);
8936 if (Opcode == ISD::CLMULH)
8937 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
8938 DAG.getShiftAmountConstant(1, VT, DL));
8939 return Res;
8940 }
8941 SDValue XExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, X);
8942 SDValue YExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Y);
8943 SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, ExtVT, XExt, YExt);
8944 unsigned ShAmt = Opcode == ISD::CLMULR ? BW - 1 : BW;
8945 SDValue HiBits = DAG.getNode(ISD::SRL, DL, ExtVT, ClMul,
8946 DAG.getShiftAmountConstant(ShAmt, ExtVT, DL));
8947 return DAG.getNode(ISD::TRUNCATE, DL, VT, HiBits);
8948 }
8949 }
8950 llvm_unreachable("Expected CLMUL, CLMULR, or CLMULH");
8951}
8952
8954 SelectionDAG &DAG) const {
8955 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8956 EVT VT = Node->getValueType(0);
8957 unsigned VTBits = VT.getScalarSizeInBits();
8958 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8959
8960 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8961 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8962 SDValue ShOpLo = Node->getOperand(0);
8963 SDValue ShOpHi = Node->getOperand(1);
8964 SDValue ShAmt = Node->getOperand(2);
8965 EVT ShAmtVT = ShAmt.getValueType();
8966 EVT ShAmtCCVT =
8967 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8968 SDLoc dl(Node);
8969
8970 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8971 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8972 // away during isel.
8973 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8974 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8975 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8976 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8977 : DAG.getConstant(0, dl, VT);
8978
8979 SDValue Tmp2, Tmp3;
8980 if (IsSHL) {
8981 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8982 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8983 } else {
8984 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8985 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8986 }
8987
8988 // If the shift amount is larger or equal than the width of a part we don't
8989 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8990 // values for large shift amounts.
8991 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8992 DAG.getConstant(VTBits, dl, ShAmtVT));
8993 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8994 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8995
8996 if (IsSHL) {
8997 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8998 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8999 } else {
9000 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
9001 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
9002 }
9003}
9004
9006 SelectionDAG &DAG) const {
9007 // This implements llvm.canonicalize.f* by multiplication with 1.0, as
9008 // suggested in
9009 // https://llvm.org/docs/LangRef.html#llvm-canonicalize-intrinsic.
9010 // It uses strict_fp operations even outside a strict_fp context in order
9011 // to guarantee that the canonicalization is not optimized away by later
9012 // passes. The result chain introduced by that is intentionally ignored
9013 // since no ordering requirement is intended here.
9014 EVT VT = Node->getValueType(0);
9015 SDLoc DL(Node);
9016 SDNodeFlags Flags = Node->getFlags();
9017 Flags.setNoFPExcept(true);
9018 SDValue One = DAG.getConstantFP(1.0, DL, VT);
9019 SDValue Mul =
9020 DAG.getNode(ISD::STRICT_FMUL, DL, {VT, MVT::Other},
9021 {DAG.getEntryNode(), Node->getOperand(0), One}, Flags);
9022 return Mul;
9023}
9024
9025SDValue
9027 SelectionDAG &DAG) const {
9028 SDLoc dl(Node);
9029 EVT DstVT = Node->getValueType(0);
9030 EVT DstScalarVT = DstVT.getScalarType();
9031
9032 SDValue IntVal = Node->getOperand(0);
9033 const uint64_t SemEnum = Node->getConstantOperandVal(1);
9034 const auto Sem = static_cast<APFloatBase::Semantics>(SemEnum);
9035
9036 // Supported source formats.
9037 switch (Sem) {
9043 break;
9044 default:
9045 DAG.getContext()->emitError("CONVERT_FROM_ARBITRARY_FP: not implemented "
9046 "source format (semantics enum " +
9047 Twine(SemEnum) + ")");
9048 return SDValue();
9049 }
9050
9051 const fltSemantics &SrcSem = APFloatBase::EnumToSemantics(Sem);
9052 const unsigned SrcBits = APFloat::getSizeInBits(SrcSem);
9053 const unsigned SrcPrecision = APFloat::semanticsPrecision(SrcSem);
9054 const unsigned SrcMant = SrcPrecision - 1;
9055 const unsigned SrcExp = SrcBits - SrcMant - 1;
9056 const int SrcBias = 1 - APFloat::semanticsMinExponent(SrcSem);
9057 const fltNonfiniteBehavior NFBehavior = SrcSem.nonFiniteBehavior;
9058
9059 // Destination format parameters.
9060 const fltSemantics &DstSem = DstScalarVT.getFltSemantics();
9061 const unsigned DstBits = APFloat::getSizeInBits(DstSem);
9062 const unsigned DstMant = APFloat::semanticsPrecision(DstSem) - 1;
9063 const unsigned DstExpBits = DstBits - DstMant - 1;
9064 const int DstMinExp = APFloat::semanticsMinExponent(DstSem);
9065 const int DstBias = 1 - DstMinExp;
9066 const uint64_t DstExpAllOnes = (1ULL << DstExpBits) - 1;
9067
9068 // Work in an integer type matching the destination float width.
9069 EVT IntScalarVT = EVT::getIntegerVT(*DAG.getContext(), DstBits);
9070 EVT IntVT = DstVT.isVector()
9071 ? EVT::getVectorVT(*DAG.getContext(), IntScalarVT,
9072 DstVT.getVectorElementCount())
9073 : IntScalarVT;
9074
9075 SDValue Src = DAG.getZExtOrTrunc(IntVal, dl, IntVT);
9076
9077 EVT SetCCVT =
9078 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), IntVT);
9079
9080 SDValue Zero = DAG.getConstant(0, dl, IntVT);
9081 SDValue One = DAG.getConstant(1, dl, IntVT);
9082
9083 // Extract bit fields.
9084 const uint64_t MantMask = (SrcMant > 0) ? ((1ULL << SrcMant) - 1) : 0;
9085 const uint64_t ExpMask = (1ULL << SrcExp) - 1;
9086
9087 SDValue MantField = DAG.getNode(ISD::AND, dl, IntVT, Src,
9088 DAG.getConstant(MantMask, dl, IntVT));
9089
9090 SDValue ExpField =
9091 DAG.getNode(ISD::AND, dl, IntVT,
9092 DAG.getNode(ISD::SRL, dl, IntVT, Src,
9093 DAG.getShiftAmountConstant(SrcMant, IntVT, dl)),
9094 DAG.getConstant(ExpMask, dl, IntVT));
9095
9096 SDValue SignBit =
9097 DAG.getNode(ISD::SRL, dl, IntVT, Src,
9098 DAG.getShiftAmountConstant(SrcBits - 1, IntVT, dl));
9099
9100 SDValue SignShifted =
9101 DAG.getNode(ISD::SHL, dl, IntVT, SignBit,
9102 DAG.getShiftAmountConstant(DstBits - 1, IntVT, dl));
9103
9104 // Classify the input.
9105 SDValue ExpAllOnes = DAG.getConstant(ExpMask, dl, IntVT);
9106 SDValue IsExpAllOnes =
9107 DAG.getSetCC(dl, SetCCVT, ExpField, ExpAllOnes, ISD::SETEQ);
9108 SDValue IsExpZero = DAG.getSetCC(dl, SetCCVT, ExpField, Zero, ISD::SETEQ);
9109 SDValue IsMantZero = DAG.getSetCC(dl, SetCCVT, MantField, Zero, ISD::SETEQ);
9110 SDValue IsMantNonZero =
9111 DAG.getSetCC(dl, SetCCVT, MantField, Zero, ISD::SETNE);
9112
9113 SDValue IsNaN;
9114 if (NFBehavior == fltNonfiniteBehavior::FiniteOnly) {
9115 IsNaN = DAG.getBoolConstant(false, dl, SetCCVT, IntVT);
9116 } else if (NFBehavior == fltNonfiniteBehavior::IEEE754) {
9117 IsNaN = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpAllOnes, IsMantNonZero);
9118 } else {
9120 SDValue MantAllOnes = DAG.getConstant(MantMask, dl, IntVT);
9121 SDValue IsMantAllOnes =
9122 DAG.getSetCC(dl, SetCCVT, MantField, MantAllOnes, ISD::SETEQ);
9123 IsNaN = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpAllOnes, IsMantAllOnes);
9124 }
9125
9126 SDValue IsInf;
9127 if (NFBehavior == fltNonfiniteBehavior::IEEE754)
9128 IsInf = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpAllOnes, IsMantZero);
9129 else
9130 IsInf = DAG.getBoolConstant(false, dl, SetCCVT, IntVT);
9131
9132 SDValue IsZero = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpZero, IsMantZero);
9133 SDValue IsDenorm =
9134 DAG.getNode(ISD::AND, dl, SetCCVT, IsExpZero, IsMantNonZero);
9135
9136 // Normal value conversion.
9137 const int BiasAdjust = DstBias - SrcBias;
9138 SDValue NormDstExp =
9139 DAG.getNode(ISD::ADD, dl, IntVT, ExpField,
9140 DAG.getConstant(APInt(DstBits, BiasAdjust, true), dl, IntVT));
9141
9142 SDValue NormDstMant;
9143 if (DstMant > SrcMant) {
9144 SDValue NormDstMantShift =
9145 DAG.getShiftAmountConstant(DstMant - SrcMant, IntVT, dl);
9146 NormDstMant = DAG.getNode(ISD::SHL, dl, IntVT, MantField, NormDstMantShift);
9147 } else {
9148 NormDstMant = MantField;
9149 }
9150
9151 SDValue DstMantShift = DAG.getShiftAmountConstant(DstMant, IntVT, dl);
9152 SDValue NormExpShifted =
9153 DAG.getNode(ISD::SHL, dl, IntVT, NormDstExp, DstMantShift);
9154 SDValue NormResult =
9155 DAG.getNode(ISD::OR, dl, IntVT,
9156 DAG.getNode(ISD::OR, dl, IntVT, SignShifted, NormExpShifted),
9157 NormDstMant);
9158
9159 // Denormal value conversion.
9160 SDValue DenormResult;
9161 {
9162 const unsigned IntVTBits = DstBits;
9163 SDValue LeadingZeros =
9164 DAG.getNode(ISD::CTLZ_ZERO_POISON, dl, IntVT, MantField);
9165
9166 const int DenormExpConst =
9167 (int)IntVTBits + DstBias - SrcBias - (int)SrcMant;
9168 SDValue DenormDstExp = DAG.getNode(
9169 ISD::SUB, dl, IntVT,
9170 DAG.getConstant(APInt(DstBits, DenormExpConst, true), dl, IntVT),
9171 LeadingZeros);
9172
9173 SDValue MantMSB =
9174 DAG.getNode(ISD::SUB, dl, IntVT,
9175 DAG.getConstant(IntVTBits - 1, dl, IntVT), LeadingZeros);
9176
9177 SDValue LeadingOne = DAG.getNode(ISD::SHL, dl, IntVT, One, MantMSB);
9178 SDValue Frac = DAG.getNode(ISD::XOR, dl, IntVT, MantField, LeadingOne);
9179
9180 const unsigned ShiftSub = IntVTBits - 1 - DstMant;
9181 SDValue ShiftAmount = DAG.getNode(ISD::SUB, dl, IntVT, LeadingZeros,
9182 DAG.getConstant(ShiftSub, dl, IntVT));
9183
9184 SDValue DenormDstMant = DAG.getNode(ISD::SHL, dl, IntVT, Frac, ShiftAmount);
9185
9186 SDValue DenormExpShifted =
9187 DAG.getNode(ISD::SHL, dl, IntVT, DenormDstExp, DstMantShift);
9188 DenormResult = DAG.getNode(
9189 ISD::OR, dl, IntVT,
9190 DAG.getNode(ISD::OR, dl, IntVT, SignShifted, DenormExpShifted),
9191 DenormDstMant);
9192 }
9193
9194 SDValue FiniteResult =
9195 DAG.getSelect(dl, IntVT, IsDenorm, DenormResult, NormResult);
9196
9197 const uint64_t QNaNBit = (DstMant > 0) ? (1ULL << (DstMant - 1)) : 0;
9198 SDValue NaNResult =
9199 DAG.getConstant((DstExpAllOnes << DstMant) | QNaNBit, dl, IntVT);
9200
9201 SDValue InfResult =
9202 DAG.getNode(ISD::OR, dl, IntVT, SignShifted,
9203 DAG.getConstant(DstExpAllOnes << DstMant, dl, IntVT));
9204
9205 SDValue ZeroResult = SignShifted;
9206
9207 SDValue Result = FiniteResult;
9208 Result = DAG.getSelect(dl, IntVT, IsZero, ZeroResult, Result);
9209 Result = DAG.getSelect(dl, IntVT, IsInf, InfResult, Result);
9210 Result = DAG.getSelect(dl, IntVT, IsNaN, NaNResult, Result);
9211
9212 return DAG.getNode(ISD::BITCAST, dl, DstVT, Result);
9213}
9214
9216 SelectionDAG &DAG) const {
9217 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
9218 SDValue Src = Node->getOperand(OpNo);
9219 EVT SrcVT = Src.getValueType();
9220 EVT DstVT = Node->getValueType(0);
9221 SDLoc dl(SDValue(Node, 0));
9222
9223 // FIXME: Only f32 to i64 conversions are supported.
9224 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
9225 return false;
9226
9227 if (Node->isStrictFPOpcode())
9228 // When a NaN is converted to an integer a trap is allowed. We can't
9229 // use this expansion here because it would eliminate that trap. Other
9230 // traps are also allowed and cannot be eliminated. See
9231 // IEEE 754-2008 sec 5.8.
9232 return false;
9233
9234 // Expand f32 -> i64 conversion
9235 // This algorithm comes from compiler-rt's implementation of fixsfdi:
9236 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
9237 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
9238 EVT IntVT = SrcVT.changeTypeToInteger();
9239 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
9240
9241 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
9242 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
9243 SDValue Bias = DAG.getConstant(127, dl, IntVT);
9244 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
9245 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
9246 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
9247
9248 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
9249
9250 SDValue ExponentBits = DAG.getNode(
9251 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
9252 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
9253 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
9254
9255 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
9256 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
9257 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
9258 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
9259
9260 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
9261 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
9262 DAG.getConstant(0x00800000, dl, IntVT));
9263
9264 R = DAG.getZExtOrTrunc(R, dl, DstVT);
9265
9266 R = DAG.getSelectCC(
9267 dl, Exponent, ExponentLoBit,
9268 DAG.getNode(ISD::SHL, dl, DstVT, R,
9269 DAG.getZExtOrTrunc(
9270 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
9271 dl, IntShVT)),
9272 DAG.getNode(ISD::SRL, dl, DstVT, R,
9273 DAG.getZExtOrTrunc(
9274 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
9275 dl, IntShVT)),
9276 ISD::SETGT);
9277
9278 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
9279 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
9280
9281 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
9282 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
9283 return true;
9284}
9285
9287 SDValue &Chain,
9288 SelectionDAG &DAG) const {
9289 SDLoc dl(SDValue(Node, 0));
9290 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
9291 SDValue Src = Node->getOperand(OpNo);
9292
9293 EVT SrcVT = Src.getValueType();
9294 EVT DstVT = Node->getValueType(0);
9295 EVT SetCCVT =
9296 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
9297 EVT DstSetCCVT =
9298 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
9299
9300 // Only expand vector types if we have the appropriate vector bit operations.
9301 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
9303 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
9305 return false;
9306
9307 // If the maximum float value is smaller then the signed integer range,
9308 // the destination signmask can't be represented by the float, so we can
9309 // just use FP_TO_SINT directly.
9310 const fltSemantics &APFSem = SrcVT.getFltSemantics();
9311 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
9312 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
9314 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
9315 if (Node->isStrictFPOpcode()) {
9316 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
9317 { Node->getOperand(0), Src });
9318 Chain = Result.getValue(1);
9319 } else
9320 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
9321 return true;
9322 }
9323
9324 // Don't expand it if there isn't cheap fsub instruction.
9326 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
9327 return false;
9328
9329 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
9330 SDValue Sel;
9331
9332 if (Node->isStrictFPOpcode()) {
9333 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
9334 Node->getOperand(0), /*IsSignaling*/ true);
9335 Chain = Sel.getValue(1);
9336 } else {
9337 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
9338 }
9339
9340 bool Strict = Node->isStrictFPOpcode() ||
9341 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
9342
9343 if (Strict) {
9344 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
9345 // signmask then offset (the result of which should be fully representable).
9346 // Sel = Src < 0x8000000000000000
9347 // FltOfs = select Sel, 0, 0x8000000000000000
9348 // IntOfs = select Sel, 0, 0x8000000000000000
9349 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
9350
9351 // TODO: Should any fast-math-flags be set for the FSUB?
9352 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
9353 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
9354 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
9355 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
9356 DAG.getConstant(0, dl, DstVT),
9357 DAG.getConstant(SignMask, dl, DstVT));
9358 SDValue SInt;
9359 if (Node->isStrictFPOpcode()) {
9360 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
9361 { Chain, Src, FltOfs });
9362 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
9363 { Val.getValue(1), Val });
9364 Chain = SInt.getValue(1);
9365 } else {
9366 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
9367 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
9368 }
9369 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
9370 } else {
9371 // Expand based on maximum range of FP_TO_SINT:
9372 // True = fp_to_sint(Src)
9373 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
9374 // Result = select (Src < 0x8000000000000000), True, False
9375
9376 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
9377 // TODO: Should any fast-math-flags be set for the FSUB?
9378 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
9379 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
9380 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
9381 DAG.getConstant(SignMask, dl, DstVT));
9382 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
9383 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
9384 }
9385 return true;
9386}
9387
9389 SDValue &Chain, SelectionDAG &DAG) const {
9390 // This transform is not correct for converting 0 when rounding mode is set
9391 // to round toward negative infinity which will produce -0.0. So disable
9392 // under strictfp.
9393 if (Node->isStrictFPOpcode())
9394 return false;
9395
9396 SDValue Src = Node->getOperand(0);
9397 EVT SrcVT = Src.getValueType();
9398 EVT DstVT = Node->getValueType(0);
9399
9400 // If the input is known to be non-negative and SINT_TO_FP is legal then use
9401 // it.
9402 if (Node->getFlags().hasNonNeg() &&
9404 Result =
9405 DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
9406 return true;
9407 }
9408
9409 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
9410 return false;
9411
9412 // Only expand vector types if we have the appropriate vector bit
9413 // operations.
9414 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
9419 return false;
9420
9421 SDLoc dl(SDValue(Node, 0));
9422
9423 // Implementation of unsigned i64 to f64 following the algorithm in
9424 // __floatundidf in compiler_rt. This implementation performs rounding
9425 // correctly in all rounding modes with the exception of converting 0
9426 // when rounding toward negative infinity. In that case the fsub will
9427 // produce -0.0. This will be added to +0.0 and produce -0.0 which is
9428 // incorrect.
9429 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
9430 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
9431 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
9432 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
9433 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
9434 SDValue HiShift = DAG.getShiftAmountConstant(32, SrcVT, dl);
9435
9436 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
9437 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
9438 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
9439 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
9440 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
9441 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
9442 SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
9443 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
9444 return true;
9445}
9446
9447SDValue
9449 SelectionDAG &DAG) const {
9450 unsigned Opcode = Node->getOpcode();
9451 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
9452 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
9453 "Wrong opcode");
9454
9455 if (Node->getFlags().hasNoNaNs()) {
9456 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
9457 EVT VT = Node->getValueType(0);
9458 if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
9460 VT.isVector())
9461 return SDValue();
9462 SDValue Op1 = Node->getOperand(0);
9463 SDValue Op2 = Node->getOperand(1);
9464 return DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred,
9465 Node->getFlags());
9466 }
9467
9468 return SDValue();
9469}
9470
9472 SelectionDAG &DAG) const {
9473 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
9474 return Expanded;
9475
9476 EVT VT = Node->getValueType(0);
9477 if (VT.isScalableVector())
9479 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
9480
9481 SDLoc dl(Node);
9482 unsigned NewOp =
9484
9485 if (isOperationLegalOrCustom(NewOp, VT)) {
9486 SDValue Quiet0 = Node->getOperand(0);
9487 SDValue Quiet1 = Node->getOperand(1);
9488
9489 if (!Node->getFlags().hasNoNaNs()) {
9490 // Insert canonicalizes if it's possible we need to quiet to get correct
9491 // sNaN behavior.
9492 if (!DAG.isKnownNeverSNaN(Quiet0)) {
9493 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
9494 Node->getFlags());
9495 }
9496 if (!DAG.isKnownNeverSNaN(Quiet1)) {
9497 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
9498 Node->getFlags());
9499 }
9500 }
9501
9502 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
9503 }
9504
9505 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
9506 // instead if there are no NaNs.
9507 if (Node->getFlags().hasNoNaNs() ||
9508 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
9509 DAG.isKnownNeverNaN(Node->getOperand(1)))) {
9510 unsigned IEEE2018Op =
9511 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
9512 if (isOperationLegalOrCustom(IEEE2018Op, VT))
9513 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
9514 Node->getOperand(1), Node->getFlags());
9515 }
9516
9518 return SelCC;
9519
9520 return SDValue();
9521}
9522
9524 SelectionDAG &DAG) const {
9525 if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
9526 return Expanded;
9527
9528 SDLoc DL(N);
9529 SDValue LHS = N->getOperand(0);
9530 SDValue RHS = N->getOperand(1);
9531 unsigned Opc = N->getOpcode();
9532 EVT VT = N->getValueType(0);
9533 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9534 bool IsMax = Opc == ISD::FMAXIMUM;
9535 SDNodeFlags Flags = N->getFlags();
9536
9537 // First, implement comparison not propagating NaN. If no native fmin or fmax
9538 // available, use plain select with setcc instead.
9540 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
9541 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
9542
9543 // FIXME: We should probably define fminnum/fmaxnum variants with correct
9544 // signed zero behavior.
9545 bool MinMaxMustRespectOrderedZero = false;
9546
9547 if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
9548 MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
9549 MinMaxMustRespectOrderedZero = true;
9550 } else if (isOperationLegalOrCustom(CompOpc, VT)) {
9551 MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
9552 } else {
9554 return DAG.UnrollVectorOp(N);
9555
9556 // NaN (if exists) will be propagated later, so orderness doesn't matter.
9557 SDValue Compare =
9558 DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
9559 MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
9560 }
9561
9562 // Propagate any NaN of both operands
9563 if (!N->getFlags().hasNoNaNs() &&
9564 (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
9565 ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
9567 MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
9568 DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
9569 }
9570
9571 // fminimum/fmaximum requires -0.0 less than +0.0
9572 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
9573 !DAG.isKnownNeverLogicalZero(RHS) && !DAG.isKnownNeverLogicalZero(LHS)) {
9574 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
9575 DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
9576 SDValue TestZero =
9577 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
9578 SDValue LCmp = DAG.getSelect(
9579 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
9580 MinMax, Flags);
9581 SDValue RCmp = DAG.getSelect(
9582 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
9583 LCmp, Flags);
9584 MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
9585 }
9586
9587 return MinMax;
9588}
9589
9591 SelectionDAG &DAG) const {
9592 SDLoc DL(Node);
9593 SDValue LHS = Node->getOperand(0);
9594 SDValue RHS = Node->getOperand(1);
9595 unsigned Opc = Node->getOpcode();
9596 EVT VT = Node->getValueType(0);
9597 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9598 bool IsMax = Opc == ISD::FMAXIMUMNUM;
9599 SDNodeFlags Flags = Node->getFlags();
9600
9601 unsigned NewOp =
9603
9604 if (isOperationLegalOrCustom(NewOp, VT)) {
9605 if (!Flags.hasNoNaNs()) {
9606 // Insert canonicalizes if it's possible we need to quiet to get correct
9607 // sNaN behavior.
9608 if (!DAG.isKnownNeverSNaN(LHS)) {
9609 LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
9610 }
9611 if (!DAG.isKnownNeverSNaN(RHS)) {
9612 RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
9613 }
9614 }
9615
9616 return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
9617 }
9618
9619 // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
9620 // same behaviors for all of other cases: +0.0 vs -0.0 included.
9621 if (Flags.hasNoNaNs() ||
9622 (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
9623 unsigned IEEE2019Op =
9625 if (isOperationLegalOrCustom(IEEE2019Op, VT))
9626 return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
9627 }
9628
9629 // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
9630 // either one for +0.0 vs -0.0.
9631 if ((Flags.hasNoNaNs() ||
9632 (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
9633 (Flags.hasNoSignedZeros() || DAG.isKnownNeverLogicalZero(LHS) ||
9634 DAG.isKnownNeverLogicalZero(RHS))) {
9635 unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
9636 if (isOperationLegalOrCustom(IEEE2008Op, VT))
9637 return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
9638 }
9639
9640 if (VT.isVector() &&
9643 return DAG.UnrollVectorOp(Node);
9644
9645 // If only one operand is NaN, override it with another operand.
9646 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
9647 LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
9648 }
9649 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
9650 RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
9651 }
9652
9653 // Always prefer RHS if equal.
9654 SDValue MinMax =
9655 DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
9656
9657 // TODO: We need quiet sNaN if strictfp.
9658
9659 // Fixup signed zero behavior.
9660 if (Flags.hasNoSignedZeros() || DAG.isKnownNeverLogicalZero(LHS) ||
9661 DAG.isKnownNeverLogicalZero(RHS)) {
9662 return MinMax;
9663 }
9664 SDValue TestZero =
9665 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
9666 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
9667 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
9668 EVT IntVT = VT.changeTypeToInteger();
9669 EVT FloatVT = VT.changeElementType(*DAG.getContext(), MVT::f32);
9670 SDValue LHSTrunc = LHS;
9672 LHSTrunc = DAG.getNode(ISD::FP_ROUND, DL, FloatVT, LHS,
9673 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
9674 }
9675 // It's OK to select from LHS and MinMax, with only one ISD::IS_FPCLASS, as
9676 // we preferred RHS when generate MinMax, if the operands are equal.
9677 SDValue RetZero = DAG.getSelect(
9678 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHSTrunc, TestZero), LHS,
9679 MinMax, Flags);
9680 return DAG.getSelect(DL, VT, IsZero, RetZero, MinMax, Flags);
9681}
9682
9683/// Returns a true value if if this FPClassTest can be performed with an ordered
9684/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
9685/// std::nullopt if it cannot be performed as a compare with 0.
9686static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
9687 const fltSemantics &Semantics,
9688 const MachineFunction &MF) {
9689 FPClassTest OrderedMask = Test & ~fcNan;
9690 FPClassTest NanTest = Test & fcNan;
9691 bool IsOrdered = NanTest == fcNone;
9692 bool IsUnordered = NanTest == fcNan;
9693
9694 // Skip cases that are testing for only a qnan or snan.
9695 if (!IsOrdered && !IsUnordered)
9696 return std::nullopt;
9697
9698 if (OrderedMask == fcZero &&
9699 MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
9700 return IsOrdered;
9701 if (OrderedMask == (fcZero | fcSubnormal) &&
9702 MF.getDenormalMode(Semantics).inputsAreZero())
9703 return IsOrdered;
9704 return std::nullopt;
9705}
9706
9708 const FPClassTest OrigTestMask,
9709 SDNodeFlags Flags, const SDLoc &DL,
9710 SelectionDAG &DAG) const {
9711 EVT OperandVT = Op.getValueType();
9712 assert(OperandVT.isFloatingPoint());
9713 FPClassTest Test = OrigTestMask;
9714
9715 // Degenerated cases.
9716 if (Test == fcNone)
9717 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
9718 if (Test == fcAllFlags)
9719 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
9720
9721 // PPC double double is a pair of doubles, of which the higher part determines
9722 // the value class.
9723 if (OperandVT == MVT::ppcf128) {
9724 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
9725 DAG.getConstant(1, DL, MVT::i32));
9726 OperandVT = MVT::f64;
9727 }
9728
9729 // Floating-point type properties.
9730 EVT ScalarFloatVT = OperandVT.getScalarType();
9731 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
9732 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
9733 bool IsF80 = (ScalarFloatVT == MVT::f80);
9734
9735 // Some checks can be implemented using float comparisons, if floating point
9736 // exceptions are ignored.
9737 if (Flags.hasNoFPExcept() &&
9739 FPClassTest FPTestMask = Test;
9740 bool IsInvertedFP = false;
9741
9742 if (FPClassTest InvertedFPCheck =
9743 invertFPClassTestIfSimpler(FPTestMask, true)) {
9744 FPTestMask = InvertedFPCheck;
9745 IsInvertedFP = true;
9746 }
9747
9748 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
9749 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
9750
9751 // See if we can fold an | fcNan into an unordered compare.
9752 FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
9753
9754 // Can't fold the ordered check if we're only testing for snan or qnan
9755 // individually.
9756 if ((FPTestMask & fcNan) != fcNan)
9757 OrderedFPTestMask = FPTestMask;
9758
9759 const bool IsOrdered = FPTestMask == OrderedFPTestMask;
9760
9761 if (std::optional<bool> IsCmp0 =
9762 isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
9763 IsCmp0 && (isCondCodeLegalOrCustom(
9764 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
9765 OperandVT.getScalarType().getSimpleVT()))) {
9766
9767 // If denormals could be implicitly treated as 0, this is not equivalent
9768 // to a compare with 0 since it will also be true for denormals.
9769 return DAG.getSetCC(DL, ResultVT, Op,
9770 DAG.getConstantFP(0.0, DL, OperandVT),
9771 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
9772 }
9773
9774 if (FPTestMask == fcNan &&
9776 OperandVT.getScalarType().getSimpleVT()))
9777 return DAG.getSetCC(DL, ResultVT, Op, Op,
9778 IsInvertedFP ? ISD::SETO : ISD::SETUO);
9779
9780 bool IsOrderedInf = FPTestMask == fcInf;
9781 if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
9782 isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
9783 : UnorderedCmpOpcode,
9784 OperandVT.getScalarType().getSimpleVT()) &&
9787 (OperandVT.isVector() &&
9789 // isinf(x) --> fabs(x) == inf
9790 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9791 SDValue Inf =
9792 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
9793 return DAG.getSetCC(DL, ResultVT, Abs, Inf,
9794 IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
9795 }
9796
9797 if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
9798 isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
9799 : UnorderedCmpOpcode,
9800 OperandVT.getSimpleVT())) {
9801 // isposinf(x) --> x == inf
9802 // isneginf(x) --> x == -inf
9803 // isposinf(x) || nan --> x u== inf
9804 // isneginf(x) || nan --> x u== -inf
9805
9806 SDValue Inf = DAG.getConstantFP(
9807 APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
9808 OperandVT);
9809 return DAG.getSetCC(DL, ResultVT, Op, Inf,
9810 IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
9811 }
9812
9813 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
9814 // TODO: Could handle ordered case, but it produces worse code for
9815 // x86. Maybe handle ordered if fabs is free?
9816
9817 ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9818 ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
9819
9820 if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
9821 OperandVT.getScalarType().getSimpleVT())) {
9822 // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
9823
9824 // TODO: Maybe only makes sense if fabs is free. Integer test of
9825 // exponent bits seems better for x86.
9826 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9827 SDValue SmallestNormal = DAG.getConstantFP(
9828 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9829 return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
9830 IsOrdered ? OrderedOp : UnorderedOp);
9831 }
9832 }
9833
9834 if (FPTestMask == fcNormal) {
9835 // TODO: Handle unordered
9836 ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9837 ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
9838
9839 if (isCondCodeLegalOrCustom(IsFiniteOp,
9840 OperandVT.getScalarType().getSimpleVT()) &&
9841 isCondCodeLegalOrCustom(IsNormalOp,
9842 OperandVT.getScalarType().getSimpleVT()) &&
9843 isFAbsFree(OperandVT)) {
9844 // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
9845 SDValue Inf =
9846 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
9847 SDValue SmallestNormal = DAG.getConstantFP(
9848 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9849
9850 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9851 SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
9852 SDValue IsNormal =
9853 DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
9854 unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
9855 return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
9856 }
9857 }
9858 }
9859
9860 // Some checks may be represented as inversion of simpler check, for example
9861 // "inf|normal|subnormal|zero" => !"nan".
9862 bool IsInverted = false;
9863
9864 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
9865 Test = InvertedCheck;
9866 IsInverted = true;
9867 }
9868
9869 // In the general case use integer operations.
9870 unsigned BitSize = OperandVT.getScalarSizeInBits();
9871 EVT IntVT = OperandVT.changeElementType(
9872 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), BitSize));
9873 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
9874
9875 // Various masks.
9876 APInt SignBit = APInt::getSignMask(BitSize);
9877 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9878 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9879 const unsigned ExplicitIntBitInF80 = 63;
9880 APInt ExpMask = Inf;
9881 if (IsF80)
9882 ExpMask.clearBit(ExplicitIntBitInF80);
9883 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9884 APInt QNaNBitMask =
9885 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9886 APInt InversionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
9887
9888 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
9889 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
9890 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
9891 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
9892 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
9893 SDValue ResultInversionMask = DAG.getConstant(InversionMask, DL, ResultVT);
9894
9895 SDValue Res;
9896 const auto appendResult = [&](SDValue PartialRes) {
9897 if (PartialRes) {
9898 if (Res)
9899 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
9900 else
9901 Res = PartialRes;
9902 }
9903 };
9904
9905 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
9906 const auto getIntBitIsSet = [&]() -> SDValue {
9907 if (!IntBitIsSetV) {
9908 APInt IntBitMask(BitSize, 0);
9909 IntBitMask.setBit(ExplicitIntBitInF80);
9910 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
9911 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
9912 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
9913 }
9914 return IntBitIsSetV;
9915 };
9916
9917 // Split the value into sign bit and absolute value.
9918 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
9919 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
9920 DAG.getConstant(0, DL, IntVT), ISD::SETLT);
9921
9922 // Tests that involve more than one class should be processed first.
9923 SDValue PartialRes;
9924
9925 if (IsF80)
9926 ; // Detect finite numbers of f80 by checking individual classes because
9927 // they have different settings of the explicit integer bit.
9928 else if ((Test & fcFinite) == fcFinite) {
9929 // finite(V) ==> (a << 1) < (inf << 1)
9930 //
9931 // See https://github.com/llvm/llvm-project/issues/169270, this is slightly
9932 // shorter than the `finite(V) ==> abs(V) < exp_mask` formula used before.
9933
9935 "finite check requires IEEE-like FP");
9936
9937 SDValue One = DAG.getShiftAmountConstant(1, IntVT, DL);
9938 SDValue TwiceOp = DAG.getNode(ISD::SHL, DL, IntVT, OpAsInt, One);
9939 SDValue TwiceInf = DAG.getNode(ISD::SHL, DL, IntVT, ExpMaskV, One);
9940
9941 PartialRes = DAG.getSetCC(DL, ResultVT, TwiceOp, TwiceInf, ISD::SETULT);
9942 Test &= ~fcFinite;
9943 } else if ((Test & fcFinite) == fcPosFinite) {
9944 // finite(V) && V > 0 ==> V < exp_mask
9945 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
9946 Test &= ~fcPosFinite;
9947 } else if ((Test & fcFinite) == fcNegFinite) {
9948 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
9949 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
9950 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9951 Test &= ~fcNegFinite;
9952 }
9953 appendResult(PartialRes);
9954
9955 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
9956 // fcZero | fcSubnormal => test all exponent bits are 0
9957 // TODO: Handle sign bit specific cases
9958 if (PartialCheck == (fcZero | fcSubnormal)) {
9959 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
9960 SDValue ExpIsZero =
9961 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9962 appendResult(ExpIsZero);
9963 Test &= ~PartialCheck & fcAllFlags;
9964 }
9965 }
9966
9967 // Check for individual classes.
9968
9969 if (unsigned PartialCheck = Test & fcZero) {
9970 if (PartialCheck == fcPosZero)
9971 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
9972 else if (PartialCheck == fcZero)
9973 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
9974 else // ISD::fcNegZero
9975 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
9976 appendResult(PartialRes);
9977 }
9978
9979 if (unsigned PartialCheck = Test & fcSubnormal) {
9980 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
9981 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
9982 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
9983 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
9984 SDValue VMinusOneV =
9985 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
9986 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
9987 if (PartialCheck == fcNegSubnormal)
9988 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9989 appendResult(PartialRes);
9990 }
9991
9992 if (unsigned PartialCheck = Test & fcInf) {
9993 if (PartialCheck == fcPosInf)
9994 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
9995 else if (PartialCheck == fcInf)
9996 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
9997 else { // ISD::fcNegInf
9998 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9999 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
10000 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
10001 }
10002 appendResult(PartialRes);
10003 }
10004
10005 if (unsigned PartialCheck = Test & fcNan) {
10006 APInt InfWithQnanBit = Inf | QNaNBitMask;
10007 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
10008 if (PartialCheck == fcNan) {
10009 // isnan(V) ==> abs(V) > int(inf)
10010 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
10011 if (IsF80) {
10012 // Recognize unsupported values as NaNs for compatibility with glibc.
10013 // In them (exp(V)==0) == int_bit.
10014 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
10015 SDValue ExpIsZero =
10016 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
10017 SDValue IsPseudo =
10018 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
10019 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
10020 }
10021 } else if (PartialCheck == fcQNan) {
10022 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
10023 PartialRes =
10024 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
10025 } else { // ISD::fcSNan
10026 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
10027 // abs(V) < (unsigned(Inf) | quiet_bit)
10028 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
10029 SDValue IsNotQnan =
10030 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
10031 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
10032 }
10033 appendResult(PartialRes);
10034 }
10035
10036 if (unsigned PartialCheck = Test & fcNormal) {
10037 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
10038 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
10039 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
10040 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
10041 APInt ExpLimit = ExpMask - ExpLSB;
10042 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
10043 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
10044 if (PartialCheck == fcNegNormal)
10045 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
10046 else if (PartialCheck == fcPosNormal) {
10047 SDValue PosSignV =
10048 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInversionMask);
10049 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
10050 }
10051 if (IsF80)
10052 PartialRes =
10053 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
10054 appendResult(PartialRes);
10055 }
10056
10057 if (!Res)
10058 return DAG.getConstant(IsInverted, DL, ResultVT);
10059 if (IsInverted)
10060 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInversionMask);
10061 return Res;
10062}
10063
10064// Only expand vector types if we have the appropriate vector bit operations.
10065static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
10066 assert(VT.isVector() && "Expected vector type");
10067 unsigned Len = VT.getScalarSizeInBits();
10068 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
10071 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
10073}
10074
10076 SDLoc dl(Node);
10077 EVT VT = Node->getValueType(0);
10078 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
10079 SDValue Op = Node->getOperand(0);
10080 unsigned Len = VT.getScalarSizeInBits();
10081 assert(VT.isInteger() && "CTPOP not implemented for this type.");
10082
10083 // TODO: Add support for irregular type lengths.
10084 if (!(Len <= 128 && Len % 8 == 0))
10085 return SDValue();
10086
10087 // Only expand vector types if we have the appropriate vector bit operations.
10088 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
10089 return SDValue();
10090
10091 // This is the "best" algorithm from
10092 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
10093 SDValue Mask55 =
10094 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
10095 SDValue Mask33 =
10096 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
10097 SDValue Mask0F =
10098 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
10099
10100 // v = v - ((v >> 1) & 0x55555555...)
10101 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
10102 DAG.getNode(ISD::AND, dl, VT,
10103 DAG.getNode(ISD::SRL, dl, VT, Op,
10104 DAG.getConstant(1, dl, ShVT)),
10105 Mask55));
10106 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
10107 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
10108 DAG.getNode(ISD::AND, dl, VT,
10109 DAG.getNode(ISD::SRL, dl, VT, Op,
10110 DAG.getConstant(2, dl, ShVT)),
10111 Mask33));
10112 // v = (v + (v >> 4)) & 0x0F0F0F0F...
10113 Op = DAG.getNode(ISD::AND, dl, VT,
10114 DAG.getNode(ISD::ADD, dl, VT, Op,
10115 DAG.getNode(ISD::SRL, dl, VT, Op,
10116 DAG.getConstant(4, dl, ShVT))),
10117 Mask0F);
10118
10119 if (Len <= 8)
10120 return Op;
10121
10122 // Avoid the multiply if we only have 2 bytes to add.
10123 // TODO: Only doing this for scalars because vectors weren't as obviously
10124 // improved.
10125 if (Len == 16 && !VT.isVector()) {
10126 // v = (v + (v >> 8)) & 0x00FF;
10127 return DAG.getNode(ISD::AND, dl, VT,
10128 DAG.getNode(ISD::ADD, dl, VT, Op,
10129 DAG.getNode(ISD::SRL, dl, VT, Op,
10130 DAG.getConstant(8, dl, ShVT))),
10131 DAG.getConstant(0xFF, dl, VT));
10132 }
10133
10134 // v = (v * 0x01010101...) >> (Len - 8)
10135 SDValue V;
10138 SDValue Mask01 =
10139 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
10140 V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
10141 } else {
10142 V = Op;
10143 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
10144 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
10145 V = DAG.getNode(ISD::ADD, dl, VT, V,
10146 DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
10147 }
10148 }
10149 return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
10150}
10151
10153 SDLoc dl(Node);
10154 EVT VT = Node->getValueType(0);
10155 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
10156 SDValue Op = Node->getOperand(0);
10157 SDValue Mask = Node->getOperand(1);
10158 SDValue VL = Node->getOperand(2);
10159 unsigned Len = VT.getScalarSizeInBits();
10160 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
10161
10162 // TODO: Add support for irregular type lengths.
10163 if (!(Len <= 128 && Len % 8 == 0))
10164 return SDValue();
10165
10166 // This is same algorithm of expandCTPOP from
10167 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
10168 SDValue Mask55 =
10169 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
10170 SDValue Mask33 =
10171 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
10172 SDValue Mask0F =
10173 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
10174
10175 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
10176
10177 // v = v - ((v >> 1) & 0x55555555...)
10178 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
10179 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
10180 DAG.getConstant(1, dl, ShVT), Mask, VL),
10181 Mask55, Mask, VL);
10182 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
10183
10184 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
10185 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
10186 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
10187 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
10188 DAG.getConstant(2, dl, ShVT), Mask, VL),
10189 Mask33, Mask, VL);
10190 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
10191
10192 // v = (v + (v >> 4)) & 0x0F0F0F0F...
10193 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
10194 Mask, VL),
10195 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
10196 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
10197
10198 if (Len <= 8)
10199 return Op;
10200
10201 // v = (v * 0x01010101...) >> (Len - 8)
10202 SDValue V;
10204 ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
10205 SDValue Mask01 =
10206 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
10207 V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
10208 } else {
10209 V = Op;
10210 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
10211 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
10212 V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
10213 DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
10214 Mask, VL);
10215 }
10216 }
10217 return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
10218 Mask, VL);
10219}
10220
10222 SDLoc dl(Node);
10223 EVT VT = Node->getValueType(0);
10224 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
10225 SDValue Op = Node->getOperand(0);
10226 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10227
10228 // If the non-ZERO_POISON version is supported we can use that instead.
10229 if (Node->getOpcode() == ISD::CTLZ_ZERO_POISON &&
10231 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
10232
10233 // If the ZERO_POISON version is supported use that and handle the zero case.
10235 EVT SetCCVT =
10236 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10237 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_POISON, dl, VT, Op);
10238 SDValue Zero = DAG.getConstant(0, dl, VT);
10239 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
10240 return DAG.getSelect(dl, VT, SrcIsZero,
10241 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
10242 }
10243
10244 // Only expand vector types if we have the appropriate vector bit operations.
10245 // This includes the operations needed to expand CTPOP if it isn't supported.
10246 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
10248 !canExpandVectorCTPOP(*this, VT)) ||
10251 return SDValue();
10252
10253 // for now, we do this:
10254 // x = x | (x >> 1);
10255 // x = x | (x >> 2);
10256 // ...
10257 // x = x | (x >>16);
10258 // x = x | (x >>32); // for 64-bit input
10259 // return popcount(~x);
10260 //
10261 // Ref: "Hacker's Delight" by Henry Warren
10262 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
10263 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
10264 Op = DAG.getNode(ISD::OR, dl, VT, Op,
10265 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
10266 }
10267 Op = DAG.getNOT(dl, Op, VT);
10268 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
10269}
10270
10272 SDLoc dl(Node);
10273 EVT VT = Node->getValueType(0);
10274 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
10275 SDValue Op = Node->getOperand(0);
10276 SDValue Mask = Node->getOperand(1);
10277 SDValue VL = Node->getOperand(2);
10278 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10279
10280 // do this:
10281 // x = x | (x >> 1);
10282 // x = x | (x >> 2);
10283 // ...
10284 // x = x | (x >>16);
10285 // x = x | (x >>32); // for 64-bit input
10286 // return popcount(~x);
10287 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
10288 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
10289 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
10290 DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
10291 VL);
10292 }
10293 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getAllOnesConstant(dl, VT),
10294 Mask, VL);
10295 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
10296}
10297
10299 SDLoc dl(Node);
10300 EVT VT = Node->getValueType(0);
10301 SDValue Op = DAG.getFreeze(Node->getOperand(0));
10302 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10303
10304 // CTLS(x) = CTLZ(OR(SHL(XOR(x, SRA(x, BW-1)), 1), 1))
10305 // This transforms the sign bits into leading zeros that can be counted.
10306 SDValue ShiftAmt = DAG.getShiftAmountConstant(NumBitsPerElt - 1, VT, dl);
10307 SDValue SignBit = DAG.getNode(ISD::SRA, dl, VT, Op, ShiftAmt);
10308 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, SignBit);
10309 SDValue Shl =
10310 DAG.getNode(ISD::SHL, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
10311 SDValue Or = DAG.getNode(ISD::OR, dl, VT, Shl, DAG.getConstant(1, dl, VT));
10312 return DAG.getNode(ISD::CTLZ_ZERO_POISON, dl, VT, Or);
10313}
10314
10316 const SDLoc &DL, EVT VT, SDValue Op,
10317 unsigned BitWidth) const {
10318 if (BitWidth != 32 && BitWidth != 64)
10319 return SDValue();
10320
10321 const DataLayout &TD = DAG.getDataLayout();
10323 return SDValue();
10324
10325 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
10326 : APInt(64, 0x0218A392CD3D5DBFULL);
10327 MachinePointerInfo PtrInfo =
10329 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
10330 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
10331 SDValue Lookup = DAG.getNode(
10332 ISD::SRL, DL, VT,
10333 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
10334 DAG.getConstant(DeBruijn, DL, VT)),
10335 DAG.getShiftAmountConstant(ShiftAmt, VT, DL));
10337
10339 for (unsigned i = 0; i < BitWidth; i++) {
10340 APInt Shl = DeBruijn.shl(i);
10341 APInt Lshr = Shl.lshr(ShiftAmt);
10342 Table[Lshr.getZExtValue()] = i;
10343 }
10344
10345 // Create a ConstantArray in Constant Pool
10346 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
10347 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
10348 TD.getPrefTypeAlign(CA->getType()));
10349 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
10350 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
10351 PtrInfo, MVT::i8);
10352 if (Node->getOpcode() == ISD::CTTZ_ZERO_POISON)
10353 return ExtLoad;
10354
10355 EVT SetCCVT =
10356 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10357 SDValue Zero = DAG.getConstant(0, DL, VT);
10358 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
10359 return DAG.getSelect(DL, VT, SrcIsZero,
10360 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
10361}
10362
10364 SDLoc dl(Node);
10365 EVT VT = Node->getValueType(0);
10366 SDValue Op = Node->getOperand(0);
10367 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10368
10369 // If the non-ZERO_POISON version is supported we can use that instead.
10370 if (Node->getOpcode() == ISD::CTTZ_ZERO_POISON &&
10372 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
10373
10374 // If the ZERO_POISON version is supported use that and handle the zero case.
10376 EVT SetCCVT =
10377 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10378 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_POISON, dl, VT, Op);
10379 SDValue Zero = DAG.getConstant(0, dl, VT);
10380 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
10381 return DAG.getSelect(dl, VT, SrcIsZero,
10382 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
10383 }
10384
10385 // Only expand vector types if we have the appropriate vector bit operations.
10386 // This includes the operations needed to expand CTPOP if it isn't supported.
10387 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
10390 !canExpandVectorCTPOP(*this, VT)) ||
10394 return SDValue();
10395
10396 // Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
10397 // to be expanded or converted to a libcall.
10400 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
10401 return V;
10402
10403 // for now, we use: { return popcount(~x & (x - 1)); }
10404 // unless the target has ctlz but not ctpop, in which case we use:
10405 // { return 32 - nlz(~x & (x-1)); }
10406 // Ref: "Hacker's Delight" by Henry Warren
10407 SDValue Tmp = DAG.getNode(
10408 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
10409 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
10410
10411 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
10413 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
10414 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
10415 }
10416
10417 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
10418}
10419
10421 SDValue Op = Node->getOperand(0);
10422 SDValue Mask = Node->getOperand(1);
10423 SDValue VL = Node->getOperand(2);
10424 SDLoc dl(Node);
10425 EVT VT = Node->getValueType(0);
10426
10427 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
10428 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
10429 DAG.getAllOnesConstant(dl, VT), Mask, VL);
10430 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
10431 DAG.getConstant(1, dl, VT), Mask, VL);
10432 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
10433 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
10434}
10435
10437 SelectionDAG &DAG) const {
10438 // %cond = to_bool_vec %source
10439 // %splat = splat /*val=*/VL
10440 // %tz = step_vector
10441 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
10442 // %r = vp.reduce.umin %v
10443 SDLoc DL(N);
10444 SDValue Source = N->getOperand(0);
10445 SDValue Mask = N->getOperand(1);
10446 SDValue EVL = N->getOperand(2);
10447 EVT SrcVT = Source.getValueType();
10448 EVT ResVT = N->getValueType(0);
10449 EVT ResVecVT =
10450 EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
10451
10452 // Convert to boolean vector.
10453 if (SrcVT.getScalarType() != MVT::i1) {
10454 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
10455 SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
10456 SrcVT.getVectorElementCount());
10457 Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
10458 DAG.getCondCode(ISD::SETNE), Mask, EVL);
10459 }
10460
10461 SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
10462 SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
10463 SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
10464 SDValue Select =
10465 DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
10466 return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
10467}
10468
10469/// Returns a type-legalized version of \p Mask as the first item in the
10470/// pair. The second item contains a type-legalized step vector that's
10471/// guaranteed to fit the number of elements in \p Mask.
10472/// If the stepvector would require splitting, returns an empty SDValue
10473/// as the second item to signal that the operation should be split instead.
10474static std::pair<SDValue, SDValue>
10476 SelectionDAG &DAG) {
10477 EVT MaskVT = Mask.getValueType();
10478 EVT BoolVT = MaskVT.getScalarType();
10479
10480 // Find a suitable type for a stepvector.
10481 // If zero is poison, we can assume the upper limit of the result is VF-1.
10482 ConstantRange VScaleRange(1, /*isFullSet=*/true); // Fixed length default.
10483 if (MaskVT.isScalableVector())
10484 VScaleRange = getVScaleRange(&DAG.getMachineFunction().getFunction(), 64);
10485 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10486 uint64_t EltWidth = TLI.getBitWidthForCttzElements(
10487 EVT(TLI.getVectorIdxTy(DAG.getDataLayout())),
10488 MaskVT.getVectorElementCount(), ZeroIsPoison, &VScaleRange);
10489 // If the step vector element type is smaller than the mask element type,
10490 // use the mask type directly to avoid widening issues.
10491 EltWidth = std::max(EltWidth, BoolVT.getFixedSizeInBits());
10492 EVT StepVT = MVT::getIntegerVT(EltWidth);
10493 EVT StepVecVT = MaskVT.changeVectorElementType(*DAG.getContext(), StepVT);
10494
10495 // If promotion or widening is required to make the type legal, do it here.
10496 // Promotion of integers within LegalizeVectorOps is looking for types of
10497 // the same size but with a smaller number of larger elements, not the usual
10498 // larger size with the same number of larger elements.
10500 TLI.getTypeAction(*DAG.getContext(), StepVecVT);
10501 SDValue StepVec;
10502 if (TypeAction == TargetLowering::TypePromoteInteger) {
10503 StepVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
10504 StepVec = DAG.getStepVector(DL, StepVecVT);
10505 } else if (TypeAction == TargetLowering::TypeWidenVector) {
10506 // For widening, the element count changes. Create a step vector with only
10507 // the original elements valid and zeros for padding. Also widen the mask.
10508 EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
10509 unsigned WideNumElts = WideVecVT.getVectorNumElements();
10510
10511 // Build widened step vector: <0, 1, ..., OrigNumElts-1, poison, poison, ..>
10512 SDValue OrigStepVec = DAG.getStepVector(DL, StepVecVT);
10513 SDValue UndefStep = DAG.getPOISON(WideVecVT);
10514 StepVec = DAG.getInsertSubvector(DL, UndefStep, OrigStepVec, 0);
10515
10516 // Widen mask: pad with zeros.
10517 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), BoolVT, WideNumElts);
10518 SDValue ZeroMask = DAG.getConstant(0, DL, WideMaskVT);
10519 Mask = DAG.getInsertSubvector(DL, ZeroMask, Mask, 0);
10520 } else if (TypeAction == TargetLowering::TypeSplitVector) {
10521 // The stepvector type would require splitting. Signal to the caller
10522 // that the operation should be split instead of expanded.
10523 return {Mask, SDValue()};
10524 } else {
10525 StepVec = DAG.getStepVector(DL, StepVecVT);
10526 }
10527
10528 return {Mask, StepVec};
10529}
10530
10532 SelectionDAG &DAG) const {
10533 SDLoc DL(N);
10534 auto [Mask, StepVec] = getLegalMaskAndStepVector(
10535 N->getOperand(0), /*ZeroIsPoison=*/true, DL, DAG);
10536
10537 // If StepVec is empty, the stepvector would require splitting.
10538 // Split the operation instead and let it be recursively legalized.
10539 if (!StepVec) {
10540 EVT MaskVT = N->getOperand(0).getValueType();
10541 EVT ResVT = N->getValueType(0);
10542
10543 // Split the mask
10544 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(MaskVT);
10545 auto [MaskLo, MaskHi] = DAG.SplitVector(N->getOperand(0), DL);
10546
10547 // Create split VECTOR_FIND_LAST_ACTIVE operations
10548 SDValue LoResult =
10549 DAG.getNode(ISD::VECTOR_FIND_LAST_ACTIVE, DL, ResVT, MaskLo);
10550 SDValue HiResult =
10551 DAG.getNode(ISD::VECTOR_FIND_LAST_ACTIVE, DL, ResVT, MaskHi);
10552
10553 // Check if any lane is active in the high mask.
10554 SDValue AnyHiActive = DAG.getNode(ISD::VECREDUCE_OR, DL, MVT::i1, MaskHi);
10556 AnyHiActive, DL,
10557 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i1),
10558 MVT::i1);
10559
10560 // Adjust HiResult by adding the number of elements in Lo
10561 SDValue LoNumElts =
10562 DAG.getElementCount(DL, ResVT, LoVT.getVectorElementCount());
10563 SDValue AdjustedHiResult =
10564 DAG.getNode(ISD::ADD, DL, ResVT, HiResult, LoNumElts);
10565
10566 // Return: AnyHiActive ? AdjustedHiResult : LoResult;
10567 return DAG.getNode(ISD::SELECT, DL, ResVT, Cond, AdjustedHiResult,
10568 LoResult);
10569 }
10570
10571 EVT StepVecVT = StepVec.getValueType();
10572 EVT StepVT = StepVec.getValueType().getVectorElementType();
10573
10574 // Zero out lanes with inactive elements, then find the highest remaining
10575 // value from the stepvector.
10576 SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT);
10577 SDValue ActiveElts = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
10578 SDValue HighestIdx = DAG.getNode(ISD::VECREDUCE_UMAX, DL, StepVT, ActiveElts);
10579 return DAG.getZExtOrTrunc(HighestIdx, DL, N->getValueType(0));
10580}
10581
10583 SelectionDAG &DAG) const {
10584 SDLoc DL(N);
10585 EVT VT = N->getValueType(0);
10586 SDValue SourceValue = N->getOperand(0);
10587 SDValue SinkValue = N->getOperand(1);
10588 SDValue EltSizeInBytes = N->getOperand(2);
10589
10590 // Note: The lane offset is scalable if the mask is scalable.
10591 ElementCount LaneOffsetEC =
10592 ElementCount::get(N->getConstantOperandVal(3), VT.isScalableVT());
10593
10594 EVT AddrVT = SourceValue->getValueType(0);
10595 bool IsReadAfterWrite = N->getOpcode() == ISD::LOOP_DEPENDENCE_RAW_MASK;
10596
10597 // Take the difference between the pointers and divided by the element size,
10598 // to see how many lanes separate them.
10599 SDValue Diff = DAG.getNode(ISD::SUB, DL, AddrVT, SinkValue, SourceValue);
10600 if (IsReadAfterWrite)
10601 Diff = DAG.getNode(ISD::ABS, DL, AddrVT, Diff);
10602 Diff = DAG.getNode(ISD::SDIV, DL, AddrVT, Diff, EltSizeInBytes);
10603
10604 // The pointers do not alias if:
10605 // * Diff <= 0 (WAR_MASK)
10606 // * Diff == 0 (RAW_MASK)
10607 EVT CmpVT =
10608 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), AddrVT);
10609 SDValue Zero = DAG.getConstant(0, DL, AddrVT);
10610 SDValue Cmp = DAG.getSetCC(DL, CmpVT, Diff, Zero,
10611 IsReadAfterWrite ? ISD::SETEQ : ISD::SETLE);
10612
10613 // The pointers do not alias if:
10614 // Lane + LaneOffset < Diff (WAR/RAW_MASK)
10615 SDValue LaneOffset = DAG.getElementCount(DL, AddrVT, LaneOffsetEC);
10616 SDValue MaskN = DAG.getSelect(
10617 DL, AddrVT, Cmp,
10619 AddrVT),
10620 Diff);
10621
10622 return DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, VT, LaneOffset, MaskN);
10623}
10624
10626 bool IsNegative) const {
10627 SDLoc dl(N);
10628 EVT VT = N->getValueType(0);
10629 SDValue Op = N->getOperand(0);
10630
10631 // abs(x) -> smax(x,sub(0,x))
10632 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
10634 SDValue Zero = DAG.getConstant(0, dl, VT);
10635 Op = DAG.getFreeze(Op);
10636 return DAG.getNode(ISD::SMAX, dl, VT, Op,
10637 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
10638 }
10639
10640 // abs(x) -> umin(x,sub(0,x))
10641 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
10643 SDValue Zero = DAG.getConstant(0, dl, VT);
10644 Op = DAG.getFreeze(Op);
10645 return DAG.getNode(ISD::UMIN, dl, VT, Op,
10646 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
10647 }
10648
10649 // 0 - abs(x) -> smin(x, sub(0,x))
10650 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
10652 SDValue Zero = DAG.getConstant(0, dl, VT);
10653 Op = DAG.getFreeze(Op);
10654 return DAG.getNode(ISD::SMIN, dl, VT, Op,
10655 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
10656 }
10657
10658 // Only expand vector types if we have the appropriate vector operations.
10659 if (VT.isVector() &&
10661 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
10662 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
10664 return SDValue();
10665
10666 Op = DAG.getFreeze(Op);
10667 SDValue Shift = DAG.getNode(
10668 ISD::SRA, dl, VT, Op,
10669 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
10670 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
10671
10672 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
10673 if (!IsNegative)
10674 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
10675
10676 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
10677 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
10678}
10679
10681 SDLoc dl(N);
10682 EVT VT = N->getValueType(0);
10683 SDValue LHS = N->getOperand(0);
10684 SDValue RHS = N->getOperand(1);
10685 bool IsSigned = N->getOpcode() == ISD::ABDS;
10686
10687 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
10688 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
10689 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
10690 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
10691 if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
10692 LHS = DAG.getFreeze(LHS);
10693 RHS = DAG.getFreeze(RHS);
10694 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
10695 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
10696 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
10697 }
10698
10699 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
10700 if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT)) {
10701 LHS = DAG.getFreeze(LHS);
10702 RHS = DAG.getFreeze(RHS);
10703 return DAG.getNode(ISD::OR, dl, VT,
10704 DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
10705 DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
10706 }
10707
10708 // If the subtract doesn't overflow then just use abs(sub())
10709 bool IsNonNegative = DAG.SignBitIsZero(LHS) && DAG.SignBitIsZero(RHS);
10710
10711 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, LHS, RHS))
10712 return DAG.getNode(ISD::ABS, dl, VT,
10713 DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
10714
10715 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, RHS, LHS))
10716 return DAG.getNode(ISD::ABS, dl, VT,
10717 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
10718
10719 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10721 LHS = DAG.getFreeze(LHS);
10722 RHS = DAG.getFreeze(RHS);
10723 SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
10724
10725 // Branchless expansion iff cmp result is allbits:
10726 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
10727 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
10728 if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10729 SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
10730 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
10731 return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
10732 }
10733
10734 // Similar to the branchless expansion, if we don't prefer selects, use the
10735 // (sign-extended) usubo overflow flag if the (scalar) type is illegal as this
10736 // is more likely to legalize cleanly: abdu(lhs, rhs) -> sub(xor(sub(lhs,
10737 // rhs), uof(lhs, rhs)), uof(lhs, rhs))
10738 if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT) &&
10740 SDValue USubO =
10741 DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
10742 SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
10743 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
10744 return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
10745 }
10746
10747 // FIXME: Should really try to split the vector in case it's legal on a
10748 // subvector.
10750 return DAG.UnrollVectorOp(N);
10751
10752 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10753 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10754 return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
10755 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
10756}
10757
10759 SDLoc dl(N);
10760 EVT VT = N->getValueType(0);
10761 SDValue LHS = N->getOperand(0);
10762 SDValue RHS = N->getOperand(1);
10763
10764 unsigned Opc = N->getOpcode();
10765 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
10766 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
10767 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
10768 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
10769 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
10770 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10772 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
10773 "Unknown AVG node");
10774
10775 // If the operands are already extended, we can add+shift.
10776 bool IsExt =
10777 (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
10778 DAG.ComputeNumSignBits(RHS) >= 2) ||
10779 (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
10780 DAG.computeKnownBits(RHS).countMinLeadingZeros() >= 1);
10781 if (IsExt) {
10782 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
10783 if (!IsFloor)
10784 Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
10785 return DAG.getNode(ShiftOpc, dl, VT, Sum,
10786 DAG.getShiftAmountConstant(1, VT, dl));
10787 }
10788
10789 // For scalars, see if we can efficiently extend/truncate to use add+shift.
10790 if (VT.isScalarInteger()) {
10791 EVT ExtVT = VT.widenIntegerElementType(*DAG.getContext());
10792 if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
10793 LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
10794 RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
10795 SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
10796 if (!IsFloor)
10797 Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
10798 DAG.getConstant(1, dl, ExtVT));
10799 // Just use SRL as we will be truncating away the extended sign bits.
10800 Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
10801 DAG.getShiftAmountConstant(1, ExtVT, dl));
10802 return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
10803 }
10804 }
10805
10806 // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
10807 if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT) &&
10810 SDValue UAddWithOverflow =
10811 DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
10812
10813 SDValue Sum = UAddWithOverflow.getValue(0);
10814 SDValue Overflow = UAddWithOverflow.getValue(1);
10815
10816 // Right shift the sum by 1
10817 SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,
10818 DAG.getShiftAmountConstant(1, VT, dl));
10819
10820 SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
10821 SDValue OverflowShl = DAG.getNode(
10822 ISD::SHL, dl, VT, ZeroExtOverflow,
10823 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
10824
10825 return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
10826 }
10827
10828 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
10829 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
10830 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
10831 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
10832 LHS = DAG.getFreeze(LHS);
10833 RHS = DAG.getFreeze(RHS);
10834 SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
10835 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
10836 SDValue Shift =
10837 DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
10838 return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
10839}
10840
10842 SDLoc dl(N);
10843 EVT VT = N->getValueType(0);
10844 SDValue Op = N->getOperand(0);
10845
10846 if (!VT.isSimple())
10847 return SDValue();
10848
10849 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10850 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
10851 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
10852 default:
10853 return SDValue();
10854 case MVT::i16:
10855 // Use a rotate by 8. This can be further expanded if necessary.
10856 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10857 case MVT::i32:
10858 // This is meant for ARM specifically, which has ROTR but no ROTL.
10859 // t = x ^ rotr(x, 16)
10860 // t = bic(t, 0x00ff0000)
10861 // t = lshr(t, 8)
10862 // x = t ^ rotr(x, 8)
10864 SDValue Rotr16 =
10865 DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(16, dl, SHVT));
10866 SDValue Tmp = DAG.getNode(ISD::XOR, dl, VT, Op, Rotr16);
10867 Tmp = DAG.getNode(ISD::AND, dl, VT, Tmp,
10868 DAG.getConstant(0xFF00FFFF, dl, VT));
10869 Tmp = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(8, dl, SHVT));
10870 SDValue Rotr8 =
10871 DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10872 return DAG.getNode(ISD::XOR, dl, VT, Tmp, Rotr8);
10873 }
10874 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10875 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
10876 DAG.getConstant(0xFF00, dl, VT));
10877 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
10878 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10879 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
10880 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10881 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
10882 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
10883 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
10884 case MVT::i64:
10885 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
10886 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
10887 DAG.getConstant(255ULL<<8, dl, VT));
10888 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
10889 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
10890 DAG.getConstant(255ULL<<16, dl, VT));
10891 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
10892 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
10893 DAG.getConstant(255ULL<<24, dl, VT));
10894 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
10895 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10896 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
10897 DAG.getConstant(255ULL<<24, dl, VT));
10898 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10899 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
10900 DAG.getConstant(255ULL<<16, dl, VT));
10901 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
10902 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
10903 DAG.getConstant(255ULL<<8, dl, VT));
10904 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
10905 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
10906 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
10907 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
10908 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
10909 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
10910 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
10911 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
10912 }
10913}
10914
10916 SDLoc dl(N);
10917 EVT VT = N->getValueType(0);
10918 SDValue Op = N->getOperand(0);
10919 SDValue Mask = N->getOperand(1);
10920 SDValue EVL = N->getOperand(2);
10921
10922 if (!VT.isSimple())
10923 return SDValue();
10924
10925 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10926 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
10927 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
10928 default:
10929 return SDValue();
10930 case MVT::i16:
10931 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10932 Mask, EVL);
10933 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10934 Mask, EVL);
10935 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
10936 case MVT::i32:
10937 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10938 Mask, EVL);
10939 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
10940 Mask, EVL);
10941 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
10942 Mask, EVL);
10943 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10944 Mask, EVL);
10945 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10946 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
10947 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10948 Mask, EVL);
10949 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
10950 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
10951 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
10952 case MVT::i64:
10953 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
10954 Mask, EVL);
10955 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10956 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
10957 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
10958 Mask, EVL);
10959 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10960 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
10961 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
10962 Mask, EVL);
10963 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10964 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
10965 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
10966 Mask, EVL);
10967 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10968 Mask, EVL);
10969 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
10970 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
10971 Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10972 Mask, EVL);
10973 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
10974 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
10975 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
10976 Mask, EVL);
10977 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10978 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
10979 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
10980 Mask, EVL);
10981 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
10982 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
10983 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
10984 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
10985 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
10986 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
10987 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
10988 }
10989}
10990
10992 SDLoc dl(N);
10993 EVT VT = N->getValueType(0);
10994 SDValue Op = N->getOperand(0);
10995 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10996 unsigned Sz = VT.getScalarSizeInBits();
10997
10998 SDValue Tmp, Tmp2, Tmp3;
10999
11000 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
11001 // and finally the i1 pairs.
11002 // TODO: We can easily support i4/i2 legal types if any target ever does.
11003 if (Sz >= 8 && isPowerOf2_32(Sz)) {
11004 // Create the masks - repeating the pattern every byte.
11005 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
11006 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
11007 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
11008
11009 // BSWAP if the type is wider than a single byte.
11010 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
11011
11012 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
11013 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
11014 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
11015 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
11016 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
11017 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
11018
11019 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
11020 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
11021 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
11022 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
11023 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
11024 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
11025
11026 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
11027 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
11028 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
11029 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
11030 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
11031 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
11032 return Tmp;
11033 }
11034
11035 Tmp = DAG.getConstant(0, dl, VT);
11036 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
11037 if (I < J)
11038 Tmp2 =
11039 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
11040 else
11041 Tmp2 =
11042 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
11043
11044 APInt Shift = APInt::getOneBitSet(Sz, J);
11045 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
11046 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
11047 }
11048
11049 return Tmp;
11050}
11051
11053 assert(N->getOpcode() == ISD::VP_BITREVERSE);
11054
11055 SDLoc dl(N);
11056 EVT VT = N->getValueType(0);
11057 SDValue Op = N->getOperand(0);
11058 SDValue Mask = N->getOperand(1);
11059 SDValue EVL = N->getOperand(2);
11060 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
11061 unsigned Sz = VT.getScalarSizeInBits();
11062
11063 SDValue Tmp, Tmp2, Tmp3;
11064
11065 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
11066 // and finally the i1 pairs.
11067 // TODO: We can easily support i4/i2 legal types if any target ever does.
11068 if (Sz >= 8 && isPowerOf2_32(Sz)) {
11069 // Create the masks - repeating the pattern every byte.
11070 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
11071 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
11072 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
11073
11074 // BSWAP if the type is wider than a single byte.
11075 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
11076
11077 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
11078 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
11079 Mask, EVL);
11080 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
11081 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
11082 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
11083 Mask, EVL);
11084 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
11085 Mask, EVL);
11086 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
11087
11088 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
11089 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
11090 Mask, EVL);
11091 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
11092 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
11093 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
11094 Mask, EVL);
11095 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
11096 Mask, EVL);
11097 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
11098
11099 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
11100 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
11101 Mask, EVL);
11102 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
11103 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
11104 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
11105 Mask, EVL);
11106 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
11107 Mask, EVL);
11108 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
11109 return Tmp;
11110 }
11111 return SDValue();
11112}
11113
11114std::pair<SDValue, SDValue>
11116 SelectionDAG &DAG) const {
11117 SDLoc SL(LD);
11118 SDValue Chain = LD->getChain();
11119 SDValue BasePTR = LD->getBasePtr();
11120 EVT SrcVT = LD->getMemoryVT();
11121 EVT DstVT = LD->getValueType(0);
11122 ISD::LoadExtType ExtType = LD->getExtensionType();
11123
11124 if (SrcVT.isScalableVector())
11125 report_fatal_error("Cannot scalarize scalable vector loads");
11126
11127 unsigned NumElem = SrcVT.getVectorNumElements();
11128
11129 EVT SrcEltVT = SrcVT.getScalarType();
11130 EVT DstEltVT = DstVT.getScalarType();
11131
11132 // A vector must always be stored in memory as-is, i.e. without any padding
11133 // between the elements, since various code depend on it, e.g. in the
11134 // handling of a bitcast of a vector type to int, which may be done with a
11135 // vector store followed by an integer load. A vector that does not have
11136 // elements that are byte-sized must therefore be stored as an integer
11137 // built out of the extracted vector elements.
11138 if (!SrcEltVT.isByteSized()) {
11139 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
11140 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
11141
11142 unsigned NumSrcBits = SrcVT.getSizeInBits();
11143 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
11144
11145 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
11146 SDValue SrcEltBitMask = DAG.getConstant(
11147 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
11148
11149 // Load the whole vector and avoid masking off the top bits as it makes
11150 // the codegen worse.
11151 SDValue Load =
11152 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
11153 LD->getPointerInfo(), SrcIntVT, LD->getBaseAlign(),
11154 LD->getMemOperand()->getFlags(), LD->getAAInfo());
11155
11157 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11158 unsigned ShiftIntoIdx =
11159 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
11160 SDValue ShiftAmount = DAG.getShiftAmountConstant(
11161 ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
11162 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
11163 SDValue Elt =
11164 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
11165 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
11166
11167 if (ExtType != ISD::NON_EXTLOAD) {
11168 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
11169 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
11170 }
11171
11172 Vals.push_back(Scalar);
11173 }
11174
11175 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
11176 return std::make_pair(Value, Load.getValue(1));
11177 }
11178
11179 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
11180 assert(SrcEltVT.isByteSized());
11181
11183 SmallVector<SDValue, 8> LoadChains;
11184
11185 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11186 SDValue ScalarLoad = DAG.getExtLoad(
11187 ExtType, SL, DstEltVT, Chain, BasePTR,
11188 LD->getPointerInfo().getWithOffset(Idx * Stride), SrcEltVT,
11189 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
11190
11191 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
11192
11193 Vals.push_back(ScalarLoad.getValue(0));
11194 LoadChains.push_back(ScalarLoad.getValue(1));
11195 }
11196
11197 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
11198 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
11199
11200 return std::make_pair(Value, NewChain);
11201}
11202
11204 SelectionDAG &DAG) const {
11205 SDLoc SL(ST);
11206
11207 SDValue Chain = ST->getChain();
11208 SDValue BasePtr = ST->getBasePtr();
11209 SDValue Value = ST->getValue();
11210 EVT StVT = ST->getMemoryVT();
11211
11212 if (StVT.isScalableVector())
11213 report_fatal_error("Cannot scalarize scalable vector stores");
11214
11215 // The type of the data we want to save
11216 EVT RegVT = Value.getValueType();
11217 EVT RegSclVT = RegVT.getScalarType();
11218
11219 // The type of data as saved in memory.
11220 EVT MemSclVT = StVT.getScalarType();
11221
11222 unsigned NumElem = StVT.getVectorNumElements();
11223
11224 // A vector must always be stored in memory as-is, i.e. without any padding
11225 // between the elements, since various code depend on it, e.g. in the
11226 // handling of a bitcast of a vector type to int, which may be done with a
11227 // vector store followed by an integer load. A vector that does not have
11228 // elements that are byte-sized must therefore be stored as an integer
11229 // built out of the extracted vector elements.
11230 if (!MemSclVT.isByteSized()) {
11231 unsigned NumBits = StVT.getSizeInBits();
11232 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
11233
11234 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
11235
11236 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11237 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
11238 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
11239 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
11240 unsigned ShiftIntoIdx =
11241 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
11242 SDValue ShiftAmount =
11243 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
11244 SDValue ShiftedElt =
11245 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
11246 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
11247 }
11248
11249 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
11250 ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
11251 ST->getAAInfo());
11252 }
11253
11254 // Store Stride in bytes
11255 unsigned Stride = MemSclVT.getSizeInBits() / 8;
11256 assert(Stride && "Zero stride!");
11257 // Extract each of the elements from the original vector and save them into
11258 // memory individually.
11260 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11261 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
11262
11263 SDValue Ptr =
11264 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
11265
11266 // This scalar TruncStore may be illegal, but we legalize it later.
11267 SDValue Store = DAG.getTruncStore(
11268 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
11269 MemSclVT, ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
11270 ST->getAAInfo());
11271
11272 Stores.push_back(Store);
11273 }
11274
11275 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
11276}
11277
11278std::pair<SDValue, SDValue>
11280 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
11281 "unaligned indexed loads not implemented!");
11282 SDValue Chain = LD->getChain();
11283 SDValue Ptr = LD->getBasePtr();
11284 EVT VT = LD->getValueType(0);
11285 EVT LoadedVT = LD->getMemoryVT();
11286 SDLoc dl(LD);
11287 auto &MF = DAG.getMachineFunction();
11288
11289 if (VT.isFloatingPoint() || VT.isVector()) {
11290 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
11291 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
11292 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
11293 LoadedVT.isVector()) {
11294 // Scalarize the load and let the individual components be handled.
11295 return scalarizeVectorLoad(LD, DAG);
11296 }
11297
11298 // Expand to a (misaligned) integer load of the same size,
11299 // then bitconvert to floating point or vector.
11300 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
11301 LD->getMemOperand());
11302 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
11303 if (LoadedVT != VT)
11304 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
11305 ISD::ANY_EXTEND, dl, VT, Result);
11306
11307 return std::make_pair(Result, newLoad.getValue(1));
11308 }
11309
11310 // Copy the value to a (aligned) stack slot using (unaligned) integer
11311 // loads and stores, then do a (aligned) load from the stack slot.
11312 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
11313 unsigned LoadedBytes = LoadedVT.getStoreSize();
11314 unsigned RegBytes = RegVT.getSizeInBits() / 8;
11315 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
11316
11317 // Make sure the stack slot is also aligned for the register type.
11318 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
11319 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
11321 SDValue StackPtr = StackBase;
11322 unsigned Offset = 0;
11323
11324 EVT PtrVT = Ptr.getValueType();
11325 EVT StackPtrVT = StackPtr.getValueType();
11326
11327 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
11328 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
11329
11330 // Do all but one copies using the full register width.
11331 for (unsigned i = 1; i < NumRegs; i++) {
11332 // Load one integer register's worth from the original location.
11333 SDValue Load = DAG.getLoad(
11334 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
11335 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
11336 // Follow the load with a store to the stack slot. Remember the store.
11337 Stores.push_back(DAG.getStore(
11338 Load.getValue(1), dl, Load, StackPtr,
11339 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
11340 // Increment the pointers.
11341 Offset += RegBytes;
11342
11343 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
11344 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
11345 }
11346
11347 // The last copy may be partial. Do an extending load.
11348 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
11349 8 * (LoadedBytes - Offset));
11350 SDValue Load = DAG.getExtLoad(
11351 ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
11352 LD->getPointerInfo().getWithOffset(Offset), MemVT, LD->getBaseAlign(),
11353 LD->getMemOperand()->getFlags(), LD->getAAInfo());
11354 // Follow the load with a store to the stack slot. Remember the store.
11355 // On big-endian machines this requires a truncating store to ensure
11356 // that the bits end up in the right place.
11357 Stores.push_back(DAG.getTruncStore(
11358 Load.getValue(1), dl, Load, StackPtr,
11359 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
11360
11361 // The order of the stores doesn't matter - say it with a TokenFactor.
11362 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
11363
11364 // Finally, perform the original load only redirected to the stack slot.
11365 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
11366 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
11367 LoadedVT);
11368
11369 // Callers expect a MERGE_VALUES node.
11370 return std::make_pair(Load, TF);
11371 }
11372
11373 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
11374 "Unaligned load of unsupported type.");
11375
11376 // Compute the new VT that is half the size of the old one. This is an
11377 // integer MVT.
11378 unsigned NumBits = LoadedVT.getSizeInBits();
11379 EVT NewLoadedVT;
11380 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
11381 NumBits >>= 1;
11382
11383 Align Alignment = LD->getBaseAlign();
11384 unsigned IncrementSize = NumBits / 8;
11385 ISD::LoadExtType HiExtType = LD->getExtensionType();
11386
11387 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
11388 if (HiExtType == ISD::NON_EXTLOAD)
11389 HiExtType = ISD::ZEXTLOAD;
11390
11391 // Load the value in two parts
11392 SDValue Lo, Hi;
11393 if (DAG.getDataLayout().isLittleEndian()) {
11394 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
11395 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
11396 LD->getAAInfo());
11397
11398 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
11399 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
11400 LD->getPointerInfo().getWithOffset(IncrementSize),
11401 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
11402 LD->getAAInfo());
11403 } else {
11404 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
11405 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
11406 LD->getAAInfo());
11407
11408 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
11409 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
11410 LD->getPointerInfo().getWithOffset(IncrementSize),
11411 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
11412 LD->getAAInfo());
11413 }
11414
11415 // aggregate the two parts
11416 SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
11417 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
11418 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
11419
11420 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
11421 Hi.getValue(1));
11422
11423 return std::make_pair(Result, TF);
11424}
11425
11427 SelectionDAG &DAG) const {
11428 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
11429 "unaligned indexed stores not implemented!");
11430 SDValue Chain = ST->getChain();
11431 SDValue Ptr = ST->getBasePtr();
11432 SDValue Val = ST->getValue();
11433 EVT VT = Val.getValueType();
11434 Align Alignment = ST->getBaseAlign();
11435 auto &MF = DAG.getMachineFunction();
11436 EVT StoreMemVT = ST->getMemoryVT();
11437
11438 SDLoc dl(ST);
11439 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
11440 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
11441 if (isTypeLegal(intVT)) {
11442 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
11443 StoreMemVT.isVector()) {
11444 // Scalarize the store and let the individual components be handled.
11445 SDValue Result = scalarizeVectorStore(ST, DAG);
11446 return Result;
11447 }
11448 // Expand to a bitconvert of the value to the integer type of the
11449 // same size, then a (misaligned) int store.
11450 // FIXME: Does not handle truncating floating point stores!
11451 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
11452 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
11453 Alignment, ST->getMemOperand()->getFlags());
11454 return Result;
11455 }
11456 // Do a (aligned) store to a stack slot, then copy from the stack slot
11457 // to the final destination using (unaligned) integer loads and stores.
11458 MVT RegVT = getRegisterType(
11459 *DAG.getContext(),
11460 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
11461 EVT PtrVT = Ptr.getValueType();
11462 unsigned StoredBytes = StoreMemVT.getStoreSize();
11463 unsigned RegBytes = RegVT.getSizeInBits() / 8;
11464 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
11465
11466 // Make sure the stack slot is also aligned for the register type.
11467 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
11468 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11469
11470 // Perform the original store, only redirected to the stack slot.
11471 SDValue Store = DAG.getTruncStore(
11472 Chain, dl, Val, StackPtr,
11473 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
11474
11475 EVT StackPtrVT = StackPtr.getValueType();
11476
11477 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
11478 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
11480 unsigned Offset = 0;
11481
11482 // Do all but one copies using the full register width.
11483 for (unsigned i = 1; i < NumRegs; i++) {
11484 // Load one integer register's worth from the stack slot.
11485 SDValue Load = DAG.getLoad(
11486 RegVT, dl, Store, StackPtr,
11487 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
11488 // Store it to the final location. Remember the store.
11489 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
11490 ST->getPointerInfo().getWithOffset(Offset),
11491 ST->getBaseAlign(),
11492 ST->getMemOperand()->getFlags()));
11493 // Increment the pointers.
11494 Offset += RegBytes;
11495 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
11496 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
11497 }
11498
11499 // The last store may be partial. Do a truncating store. On big-endian
11500 // machines this requires an extending load from the stack slot to ensure
11501 // that the bits are in the right place.
11502 EVT LoadMemVT =
11503 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
11504
11505 // Load from the stack slot.
11506 SDValue Load = DAG.getExtLoad(
11507 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
11508 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
11509
11510 Stores.push_back(DAG.getTruncStore(
11511 Load.getValue(1), dl, Load, Ptr,
11512 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
11513 ST->getBaseAlign(), ST->getMemOperand()->getFlags(), ST->getAAInfo()));
11514 // The order of the stores doesn't matter - say it with a TokenFactor.
11515 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
11516 return Result;
11517 }
11518
11519 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
11520 "Unaligned store of unknown type.");
11521 // Get the half-size VT
11522 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
11523 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
11524 unsigned IncrementSize = NumBits / 8;
11525
11526 // Divide the stored value in two parts.
11527 SDValue ShiftAmount =
11528 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
11529 SDValue Lo = Val;
11530 // If Val is a constant, replace the upper bits with 0. The SRL will constant
11531 // fold and not use the upper bits. A smaller constant may be easier to
11532 // materialize.
11533 if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
11534 Lo = DAG.getNode(
11535 ISD::AND, dl, VT, Lo,
11536 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
11537 VT));
11538 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
11539
11540 // Store the two parts
11541 SDValue Store1, Store2;
11542 Store1 = DAG.getTruncStore(Chain, dl,
11543 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
11544 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
11545 ST->getMemOperand()->getFlags());
11546
11547 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
11548 Store2 = DAG.getTruncStore(
11549 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
11550 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
11551 ST->getMemOperand()->getFlags(), ST->getAAInfo());
11552
11553 SDValue Result =
11554 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
11555 return Result;
11556}
11557
11558SDValue
11560 const SDLoc &DL, EVT DataVT,
11561 SelectionDAG &DAG,
11562 bool IsCompressedMemory) const {
11564 EVT AddrVT = Addr.getValueType();
11565 EVT MaskVT = Mask.getValueType();
11566 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
11567 "Incompatible types of Data and Mask");
11568 if (IsCompressedMemory) {
11569 // Incrementing the pointer according to number of '1's in the mask.
11570 if (DataVT.isScalableVector()) {
11571 EVT MaskExtVT = MaskVT.changeElementType(*DAG.getContext(), MVT::i32);
11572 SDValue MaskExt = DAG.getNode(ISD::ZERO_EXTEND, DL, MaskExtVT, Mask);
11573 Increment = DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, MaskExt);
11574 } else {
11575 EVT MaskIntVT =
11576 EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
11577 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
11578 if (MaskIntVT.getSizeInBits() < 32) {
11579 MaskInIntReg =
11580 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
11581 MaskIntVT = MVT::i32;
11582 }
11583 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
11584 }
11585 // Scale is an element size in bytes.
11586 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
11587 AddrVT);
11588 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
11589 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
11590 } else
11591 Increment = DAG.getTypeSize(DL, AddrVT, DataVT.getStoreSize());
11592
11593 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
11594}
11595
11597 EVT VecVT, const SDLoc &dl,
11598 ElementCount SubEC) {
11599 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
11600 "Cannot index a scalable vector within a fixed-width vector");
11601
11602 unsigned NElts = VecVT.getVectorMinNumElements();
11603 unsigned NumSubElts = SubEC.getKnownMinValue();
11604 EVT IdxVT = Idx.getValueType();
11605
11606 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
11607 // If this is a constant index and we know the value plus the number of the
11608 // elements in the subvector minus one is less than the minimum number of
11609 // elements then it's safe to return Idx.
11610 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
11611 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
11612 return Idx;
11613 SDValue VS =
11614 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
11615 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
11616 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
11617 DAG.getConstant(NumSubElts, dl, IdxVT));
11618 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
11619 }
11620 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
11621 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
11622 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
11623 DAG.getConstant(Imm, dl, IdxVT));
11624 }
11625 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
11626 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
11627 DAG.getConstant(MaxIndex, dl, IdxVT));
11628}
11629
11630SDValue
11632 EVT VecVT, SDValue Index,
11633 const SDNodeFlags PtrArithFlags) const {
11635 DAG, VecPtr, VecVT,
11637 Index, PtrArithFlags);
11638}
11639
11640SDValue
11642 EVT VecVT, EVT SubVecVT, SDValue Index,
11643 const SDNodeFlags PtrArithFlags) const {
11644 SDLoc dl(Index);
11645 // Make sure the index type is big enough to compute in.
11646 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
11647
11648 EVT EltVT = VecVT.getVectorElementType();
11649
11650 // Calculate the element offset and add it to the pointer.
11651 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
11652 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
11653 "Converting bits to bytes lost precision");
11654 assert(SubVecVT.getVectorElementType() == EltVT &&
11655 "Sub-vector must be a vector with matching element type");
11656 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
11657 SubVecVT.getVectorElementCount());
11658
11659 EVT IdxVT = Index.getValueType();
11660 if (SubVecVT.isScalableVector())
11661 Index =
11662 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
11663 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
11664
11665 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
11666 DAG.getConstant(EltSize, dl, IdxVT));
11667 return DAG.getMemBasePlusOffset(VecPtr, Index, dl, PtrArithFlags);
11668}
11669
11670//===----------------------------------------------------------------------===//
11671// Implementation of Emulated TLS Model
11672//===----------------------------------------------------------------------===//
11673
11675 SelectionDAG &DAG) const {
11676 // Access to address of TLS varialbe xyz is lowered to a function call:
11677 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
11678 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11679 PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
11680 SDLoc dl(GA);
11681
11682 ArgListTy Args;
11683 const GlobalValue *GV =
11685 SmallString<32> NameString("__emutls_v.");
11686 NameString += GV->getName();
11687 StringRef EmuTlsVarName(NameString);
11688 const GlobalVariable *EmuTlsVar =
11689 GV->getParent()->getNamedGlobal(EmuTlsVarName);
11690 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
11691 Args.emplace_back(DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT), VoidPtrType);
11692
11693 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
11694
11696 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
11697 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
11698 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
11699
11700 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
11701 // At last for X86 targets, maybe good for other targets too?
11703 MFI.setAdjustsStack(true); // Is this only for X86 target?
11704 MFI.setHasCalls(true);
11705
11706 assert((GA->getOffset() == 0) &&
11707 "Emulated TLS must have zero offset in GlobalAddressSDNode");
11708 return CallResult.first;
11709}
11710
11712 SelectionDAG &DAG) const {
11713 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
11714 if (!isCtlzFast())
11715 return SDValue();
11716 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11717 SDLoc dl(Op);
11718 if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
11719 EVT VT = Op.getOperand(0).getValueType();
11720 SDValue Zext = Op.getOperand(0);
11721 if (VT.bitsLT(MVT::i32)) {
11722 VT = MVT::i32;
11723 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
11724 }
11725 unsigned Log2b = Log2_32(VT.getSizeInBits());
11726 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
11727 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
11728 DAG.getConstant(Log2b, dl, MVT::i32));
11729 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
11730 }
11731 return SDValue();
11732}
11733
11735 SDValue Op0 = Node->getOperand(0);
11736 SDValue Op1 = Node->getOperand(1);
11737 EVT VT = Op0.getValueType();
11738 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11739 unsigned Opcode = Node->getOpcode();
11740 SDLoc DL(Node);
11741
11742 // If both sign bits are zero, flip UMIN/UMAX <-> SMIN/SMAX if legal.
11743 unsigned AltOpcode = ISD::getOppositeSignednessMinMaxOpcode(Opcode);
11744 if (isOperationLegal(AltOpcode, VT) && DAG.SignBitIsZero(Op0) &&
11745 DAG.SignBitIsZero(Op1))
11746 return DAG.getNode(AltOpcode, DL, VT, Op0, Op1);
11747
11748 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
11749 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
11751 Op0 = DAG.getFreeze(Op0);
11752 SDValue Zero = DAG.getConstant(0, DL, VT);
11753 return DAG.getNode(ISD::SUB, DL, VT, Op0,
11754 DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
11755 }
11756
11757 // umin(x,y) -> sub(x,usubsat(x,y))
11758 // TODO: Missing freeze(Op0)?
11759 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
11761 return DAG.getNode(ISD::SUB, DL, VT, Op0,
11762 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
11763 }
11764
11765 // umax(x,y) -> add(x,usubsat(y,x))
11766 // TODO: Missing freeze(Op0)?
11767 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
11769 return DAG.getNode(ISD::ADD, DL, VT, Op0,
11770 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
11771 }
11772
11773 // FIXME: Should really try to split the vector in case it's legal on a
11774 // subvector.
11776 return DAG.UnrollVectorOp(Node);
11777
11778 // Attempt to find an existing SETCC node that we can reuse.
11779 // TODO: Do we need a generic doesSETCCNodeExist?
11780 // TODO: Missing freeze(Op0)/freeze(Op1)?
11781 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
11782 ISD::CondCode PrefCommuteCC,
11783 ISD::CondCode AltCommuteCC) {
11784 SDVTList BoolVTList = DAG.getVTList(BoolVT);
11785 for (ISD::CondCode CC : {PrefCC, AltCC}) {
11786 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
11787 {Op0, Op1, DAG.getCondCode(CC)})) {
11788 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
11789 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
11790 }
11791 }
11792 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
11793 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
11794 {Op0, Op1, DAG.getCondCode(CC)})) {
11795 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
11796 return DAG.getSelect(DL, VT, Cond, Op1, Op0);
11797 }
11798 }
11799 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
11800 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
11801 };
11802
11803 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
11804 // -> Y = (A < B) ? B : A
11805 // -> Y = (A >= B) ? A : B
11806 // -> Y = (A <= B) ? B : A
11807 switch (Opcode) {
11808 case ISD::SMAX:
11809 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
11810 case ISD::SMIN:
11811 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
11812 case ISD::UMAX:
11813 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
11814 case ISD::UMIN:
11815 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
11816 }
11817
11818 llvm_unreachable("How did we get here?");
11819}
11820
11822 unsigned Opcode = Node->getOpcode();
11823 SDValue LHS = Node->getOperand(0);
11824 SDValue RHS = Node->getOperand(1);
11825 EVT VT = LHS.getValueType();
11826 SDLoc dl(Node);
11827
11828 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
11829 assert(VT.isInteger() && "Expected operands to be integers");
11830
11831 // usub.sat(a, b) -> umax(a, b) - b
11832 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
11833 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
11834 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
11835 }
11836
11837 // usub.sat(a, 1) -> sub(a, zext(a != 0))
11838 // Prefer this on targets without legal/cost-effective overflow-carry nodes.
11839 if (Opcode == ISD::USUBSAT && isOneOrOneSplat(RHS) &&
11841 LHS = DAG.getFreeze(LHS);
11842 SDValue Zero = DAG.getConstant(0, dl, VT);
11843 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11844 SDValue IsNonZero = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETNE);
11845 SDValue Subtrahend = DAG.getBoolExtOrTrunc(IsNonZero, dl, VT, BoolVT);
11846 Subtrahend =
11847 DAG.getNode(ISD::AND, dl, VT, Subtrahend, DAG.getConstant(1, dl, VT));
11848 return DAG.getNode(ISD::SUB, dl, VT, LHS, Subtrahend);
11849 }
11850
11851 // uadd.sat(a, b) -> umin(a, ~b) + b
11852 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
11853 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
11854 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
11855 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
11856 }
11857
11858 unsigned OverflowOp;
11859 switch (Opcode) {
11860 case ISD::SADDSAT:
11861 OverflowOp = ISD::SADDO;
11862 break;
11863 case ISD::UADDSAT:
11864 OverflowOp = ISD::UADDO;
11865 break;
11866 case ISD::SSUBSAT:
11867 OverflowOp = ISD::SSUBO;
11868 break;
11869 case ISD::USUBSAT:
11870 OverflowOp = ISD::USUBO;
11871 break;
11872 default:
11873 llvm_unreachable("Expected method to receive signed or unsigned saturation "
11874 "addition or subtraction node.");
11875 }
11876
11877 // FIXME: Should really try to split the vector in case it's legal on a
11878 // subvector.
11880 return DAG.UnrollVectorOp(Node);
11881
11882 unsigned BitWidth = LHS.getScalarValueSizeInBits();
11883 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11884 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11885 SDValue SumDiff = Result.getValue(0);
11886 SDValue Overflow = Result.getValue(1);
11887 SDValue Zero = DAG.getConstant(0, dl, VT);
11888 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
11889
11890 if (Opcode == ISD::UADDSAT) {
11892 // (LHS + RHS) | OverflowMask
11893 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
11894 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
11895 }
11896 // Overflow ? 0xffff.... : (LHS + RHS)
11897 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
11898 }
11899
11900 if (Opcode == ISD::USUBSAT) {
11902 // (LHS - RHS) & ~OverflowMask
11903 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
11904 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
11905 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
11906 }
11907 // Overflow ? 0 : (LHS - RHS)
11908 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
11909 }
11910
11911 assert((Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) &&
11912 "Expected signed saturating add/sub opcode");
11913
11914 const APInt MinVal = APInt::getSignedMinValue(BitWidth);
11915 const APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
11916
11917 KnownBits KnownLHS = DAG.computeKnownBits(LHS);
11918 KnownBits KnownRHS = DAG.computeKnownBits(RHS);
11919
11920 // If either of the operand signs are known, then they are guaranteed to
11921 // only saturate in one direction. If non-negative they will saturate
11922 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
11923 //
11924 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
11925 // sign of 'y' has to be flipped.
11926
11927 bool LHSIsNonNegative = KnownLHS.isNonNegative();
11928 bool RHSIsNonNegative =
11929 Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative() : KnownRHS.isNegative();
11930 if (LHSIsNonNegative || RHSIsNonNegative) {
11931 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11932 return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
11933 }
11934
11935 bool LHSIsNegative = KnownLHS.isNegative();
11936 bool RHSIsNegative =
11937 Opcode == ISD::SADDSAT ? KnownRHS.isNegative() : KnownRHS.isNonNegative();
11938 if (LHSIsNegative || RHSIsNegative) {
11939 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11940 return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
11941 }
11942
11943 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
11944 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11945 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
11946 DAG.getConstant(BitWidth - 1, dl, VT));
11947 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
11948 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
11949}
11950
11952 unsigned Opcode = Node->getOpcode();
11953 SDValue LHS = Node->getOperand(0);
11954 SDValue RHS = Node->getOperand(1);
11955 EVT VT = LHS.getValueType();
11956 EVT ResVT = Node->getValueType(0);
11957 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11958 SDLoc dl(Node);
11959
11960 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
11961 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
11962 SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
11963 SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
11964
11965 // We can't perform arithmetic on i1 values. Extending them would
11966 // probably result in worse codegen, so let's just use two selects instead.
11967 // Some targets are also just better off using selects rather than subtraction
11968 // because one of the conditions can be merged with one of the selects.
11969 // And finally, if we don't know the contents of high bits of a boolean value
11970 // we can't perform any arithmetic either.
11972 BoolVT.getScalarSizeInBits() == 1 ||
11974 SDValue SelectZeroOrOne =
11975 DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
11976 DAG.getConstant(0, dl, ResVT));
11977 return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
11978 SelectZeroOrOne);
11979 }
11980
11982 std::swap(IsGT, IsLT);
11983 return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
11984 ResVT);
11985}
11986
11988 unsigned Opcode = Node->getOpcode();
11989 bool IsSigned = Opcode == ISD::SSHLSAT;
11990 SDValue LHS = Node->getOperand(0);
11991 SDValue RHS = Node->getOperand(1);
11992 EVT VT = LHS.getValueType();
11993 SDLoc dl(Node);
11994
11995 assert((Node->getOpcode() == ISD::SSHLSAT ||
11996 Node->getOpcode() == ISD::USHLSAT) &&
11997 "Expected a SHLSAT opcode");
11998 assert(VT.isInteger() && "Expected operands to be integers");
11999
12001 return DAG.UnrollVectorOp(Node);
12002
12003 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
12004
12005 unsigned BW = VT.getScalarSizeInBits();
12006 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12007 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
12008 SDValue Orig =
12009 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
12010
12011 SDValue SatVal;
12012 if (IsSigned) {
12013 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
12014 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
12015 SDValue Cond =
12016 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
12017 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
12018 } else {
12019 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
12020 }
12021 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
12022 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
12023}
12024
12026 bool Signed, SDValue &Lo, SDValue &Hi,
12027 SDValue LHS, SDValue RHS,
12028 SDValue HiLHS, SDValue HiRHS) const {
12029 EVT VT = LHS.getValueType();
12030 assert(RHS.getValueType() == VT && "Mismatching operand types");
12031
12032 assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));
12033 assert((!Signed || !HiLHS) &&
12034 "Signed flag should only be set when HiLHS and RiRHS are null");
12035
12036 // We'll expand the multiplication by brute force because we have no other
12037 // options. This is a trivially-generalized version of the code from
12038 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
12039 // 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
12040 // sign bits while calculating the Hi half.
12041 unsigned Bits = VT.getSizeInBits();
12042 unsigned HalfBits = Bits / 2;
12043 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
12044 SDValue LL = DAG.getNode(ISD::AND, dl, VT, LHS, Mask);
12045 SDValue RL = DAG.getNode(ISD::AND, dl, VT, RHS, Mask);
12046
12047 SDValue T = DAG.getNode(ISD::MUL, dl, VT, LL, RL);
12048 SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
12049
12050 SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
12051 // This is always an unsigned shift.
12052 SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
12053
12054 unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
12055 SDValue LH = DAG.getNode(ShiftOpc, dl, VT, LHS, Shift);
12056 SDValue RH = DAG.getNode(ShiftOpc, dl, VT, RHS, Shift);
12057
12058 SDValue U =
12059 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RL), TH);
12060 SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
12061 SDValue UH = DAG.getNode(ShiftOpc, dl, VT, U, Shift);
12062
12063 SDValue V =
12064 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LL, RH), UL);
12065 SDValue VH = DAG.getNode(ShiftOpc, dl, VT, V, Shift);
12066
12067 Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
12068 DAG.getNode(ISD::SHL, dl, VT, V, Shift));
12069
12070 Hi = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RH),
12071 DAG.getNode(ISD::ADD, dl, VT, UH, VH));
12072
12073 // If HiLHS and HiRHS are set, multiply them by the opposite low part and add
12074 // the products to Hi.
12075 if (HiLHS) {
12076 SDValue RHLL = DAG.getNode(ISD::MUL, dl, VT, HiRHS, LHS);
12077 SDValue RLLH = DAG.getNode(ISD::MUL, dl, VT, RHS, HiLHS);
12078 Hi = DAG.getNode(ISD::ADD, dl, VT, Hi,
12079 DAG.getNode(ISD::ADD, dl, VT, RHLL, RLLH));
12080 }
12081}
12082
12084 bool Signed, const SDValue LHS,
12085 const SDValue RHS, SDValue &Lo,
12086 SDValue &Hi) const {
12087 EVT VT = LHS.getValueType();
12088 assert(RHS.getValueType() == VT && "Mismatching operand types");
12089 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
12090 // We can fall back to a libcall with an illegal type for the MUL if we
12091 // have a libcall big enough.
12092 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
12093 if (WideVT == MVT::i16)
12094 LC = RTLIB::MUL_I16;
12095 else if (WideVT == MVT::i32)
12096 LC = RTLIB::MUL_I32;
12097 else if (WideVT == MVT::i64)
12098 LC = RTLIB::MUL_I64;
12099 else if (WideVT == MVT::i128)
12100 LC = RTLIB::MUL_I128;
12101
12102 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC);
12103 if (LibcallImpl == RTLIB::Unsupported) {
12104 forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS);
12105 return;
12106 }
12107
12108 SDValue HiLHS, HiRHS;
12109 if (Signed) {
12110 // The high part is obtained by SRA'ing all but one of the bits of low
12111 // part.
12112 unsigned LoSize = VT.getFixedSizeInBits();
12113 SDValue Shift = DAG.getShiftAmountConstant(LoSize - 1, VT, dl);
12114 HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, Shift);
12115 HiRHS = DAG.getNode(ISD::SRA, dl, VT, RHS, Shift);
12116 } else {
12117 HiLHS = DAG.getConstant(0, dl, VT);
12118 HiRHS = DAG.getConstant(0, dl, VT);
12119 }
12120
12121 // Attempt a libcall.
12122 SDValue Ret;
12124 CallOptions.setIsSigned(Signed);
12125 CallOptions.setIsPostTypeLegalization(true);
12127 // Halves of WideVT are packed into registers in different order
12128 // depending on platform endianness. This is usually handled by
12129 // the C calling convention, but we can't defer to it in
12130 // the legalizer.
12131 SDValue Args[] = {LHS, HiLHS, RHS, HiRHS};
12132 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
12133 } else {
12134 SDValue Args[] = {HiLHS, LHS, HiRHS, RHS};
12135 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
12136 }
12138 "Ret value is a collection of constituent nodes holding result.");
12139 if (DAG.getDataLayout().isLittleEndian()) {
12140 // Same as above.
12141 Lo = Ret.getOperand(0);
12142 Hi = Ret.getOperand(1);
12143 } else {
12144 Lo = Ret.getOperand(1);
12145 Hi = Ret.getOperand(0);
12146 }
12147}
12148
12149SDValue
12151 assert((Node->getOpcode() == ISD::SMULFIX ||
12152 Node->getOpcode() == ISD::UMULFIX ||
12153 Node->getOpcode() == ISD::SMULFIXSAT ||
12154 Node->getOpcode() == ISD::UMULFIXSAT) &&
12155 "Expected a fixed point multiplication opcode");
12156
12157 SDLoc dl(Node);
12158 SDValue LHS = Node->getOperand(0);
12159 SDValue RHS = Node->getOperand(1);
12160 EVT VT = LHS.getValueType();
12161 unsigned Scale = Node->getConstantOperandVal(2);
12162 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
12163 Node->getOpcode() == ISD::UMULFIXSAT);
12164 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
12165 Node->getOpcode() == ISD::SMULFIXSAT);
12166 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12167 unsigned VTSize = VT.getScalarSizeInBits();
12168
12169 if (!Scale) {
12170 // [us]mul.fix(a, b, 0) -> mul(a, b)
12171 if (!Saturating) {
12173 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
12174 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
12175 SDValue Result =
12176 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
12177 SDValue Product = Result.getValue(0);
12178 SDValue Overflow = Result.getValue(1);
12179 SDValue Zero = DAG.getConstant(0, dl, VT);
12180
12181 APInt MinVal = APInt::getSignedMinValue(VTSize);
12182 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
12183 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
12184 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
12185 // Xor the inputs, if resulting sign bit is 0 the product will be
12186 // positive, else negative.
12187 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
12188 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
12189 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
12190 return DAG.getSelect(dl, VT, Overflow, Result, Product);
12191 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
12192 SDValue Result =
12193 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
12194 SDValue Product = Result.getValue(0);
12195 SDValue Overflow = Result.getValue(1);
12196
12197 APInt MaxVal = APInt::getMaxValue(VTSize);
12198 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
12199 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
12200 }
12201 }
12202
12203 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
12204 "Expected scale to be less than the number of bits if signed or at "
12205 "most the number of bits if unsigned.");
12206 assert(LHS.getValueType() == RHS.getValueType() &&
12207 "Expected both operands to be the same type");
12208
12209 // Get the upper and lower bits of the result.
12210 SDValue Lo, Hi;
12211 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
12212 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
12213 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
12214 if (isOperationLegalOrCustom(LoHiOp, VT)) {
12215 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
12216 Lo = Result.getValue(0);
12217 Hi = Result.getValue(1);
12218 } else if (isOperationLegalOrCustom(HiOp, VT)) {
12219 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
12220 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
12221 } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
12222 // Try for a multiplication using a wider type.
12223 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
12224 SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
12225 SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
12226 SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
12227 Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
12228 SDValue Shifted =
12229 DAG.getNode(ISD::SRA, dl, WideVT, Res,
12230 DAG.getShiftAmountConstant(VTSize, WideVT, dl));
12231 Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
12232 } else if (VT.isVector()) {
12233 return SDValue();
12234 } else {
12235 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
12236 }
12237
12238 if (Scale == VTSize)
12239 // Result is just the top half since we'd be shifting by the width of the
12240 // operand. Overflow impossible so this works for both UMULFIX and
12241 // UMULFIXSAT.
12242 return Hi;
12243
12244 // The result will need to be shifted right by the scale since both operands
12245 // are scaled. The result is given to us in 2 halves, so we only want part of
12246 // both in the result.
12247 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
12248 DAG.getShiftAmountConstant(Scale, VT, dl));
12249 if (!Saturating)
12250 return Result;
12251
12252 if (!Signed) {
12253 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
12254 // widened multiplication) aren't all zeroes.
12255
12256 // Saturate to max if ((Hi >> Scale) != 0),
12257 // which is the same as if (Hi > ((1 << Scale) - 1))
12258 APInt MaxVal = APInt::getMaxValue(VTSize);
12259 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
12260 dl, VT);
12261 Result = DAG.getSelectCC(dl, Hi, LowMask,
12262 DAG.getConstant(MaxVal, dl, VT), Result,
12263 ISD::SETUGT);
12264
12265 return Result;
12266 }
12267
12268 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
12269 // widened multiplication) aren't all ones or all zeroes.
12270
12271 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
12272 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
12273
12274 if (Scale == 0) {
12275 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
12276 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
12277 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
12278 // Saturated to SatMin if wide product is negative, and SatMax if wide
12279 // product is positive ...
12280 SDValue Zero = DAG.getConstant(0, dl, VT);
12281 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
12282 ISD::SETLT);
12283 // ... but only if we overflowed.
12284 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
12285 }
12286
12287 // We handled Scale==0 above so all the bits to examine is in Hi.
12288
12289 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
12290 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
12291 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
12292 dl, VT);
12293 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
12294 // Saturate to min if (Hi >> (Scale - 1)) < -1),
12295 // which is the same as if (HI < (-1 << (Scale - 1))
12296 SDValue HighMask =
12297 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
12298 dl, VT);
12299 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
12300 return Result;
12301}
12302
12303SDValue
12305 SDValue LHS, SDValue RHS,
12306 unsigned Scale, SelectionDAG &DAG) const {
12307 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
12308 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
12309 "Expected a fixed point division opcode");
12310
12311 EVT VT = LHS.getValueType();
12312 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
12313 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
12314 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12315
12316 // If there is enough room in the type to upscale the LHS or downscale the
12317 // RHS before the division, we can perform it in this type without having to
12318 // resize. For signed operations, the LHS headroom is the number of
12319 // redundant sign bits, and for unsigned ones it is the number of zeroes.
12320 // The headroom for the RHS is the number of trailing zeroes.
12321 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
12323 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
12324
12325 // For signed saturating operations, we need to be able to detect true integer
12326 // division overflow; that is, when you have MIN / -EPS. However, this
12327 // is undefined behavior and if we emit divisions that could take such
12328 // values it may cause undesired behavior (arithmetic exceptions on x86, for
12329 // example).
12330 // Avoid this by requiring an extra bit so that we never get this case.
12331 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
12332 // signed saturating division, we need to emit a whopping 32-bit division.
12333 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
12334 return SDValue();
12335
12336 unsigned LHSShift = std::min(LHSLead, Scale);
12337 unsigned RHSShift = Scale - LHSShift;
12338
12339 // At this point, we know that if we shift the LHS up by LHSShift and the
12340 // RHS down by RHSShift, we can emit a regular division with a final scaling
12341 // factor of Scale.
12342
12343 if (LHSShift)
12344 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
12345 DAG.getShiftAmountConstant(LHSShift, VT, dl));
12346 if (RHSShift)
12347 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
12348 DAG.getShiftAmountConstant(RHSShift, VT, dl));
12349
12350 SDValue Quot;
12351 if (Signed) {
12352 // For signed operations, if the resulting quotient is negative and the
12353 // remainder is nonzero, subtract 1 from the quotient to round towards
12354 // negative infinity.
12355 SDValue Rem;
12356 // FIXME: Ideally we would always produce an SDIVREM here, but if the
12357 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
12358 // we couldn't just form a libcall, but the type legalizer doesn't do it.
12359 if (isTypeLegal(VT) &&
12361 Quot = DAG.getNode(ISD::SDIVREM, dl,
12362 DAG.getVTList(VT, VT),
12363 LHS, RHS);
12364 Rem = Quot.getValue(1);
12365 Quot = Quot.getValue(0);
12366 } else {
12367 Quot = DAG.getNode(ISD::SDIV, dl, VT,
12368 LHS, RHS);
12369 Rem = DAG.getNode(ISD::SREM, dl, VT,
12370 LHS, RHS);
12371 }
12372 SDValue Zero = DAG.getConstant(0, dl, VT);
12373 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
12374 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
12375 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
12376 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
12377 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
12378 DAG.getConstant(1, dl, VT));
12379 Quot = DAG.getSelect(dl, VT,
12380 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
12381 Sub1, Quot);
12382 } else
12383 Quot = DAG.getNode(ISD::UDIV, dl, VT,
12384 LHS, RHS);
12385
12386 return Quot;
12387}
12388
12390 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
12391 SDLoc dl(Node);
12392 SDValue LHS = Node->getOperand(0);
12393 SDValue RHS = Node->getOperand(1);
12394 bool IsAdd = Node->getOpcode() == ISD::UADDO;
12395
12396 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
12397 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
12398 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
12399 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
12400 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
12401 { LHS, RHS, CarryIn });
12402 Result = SDValue(NodeCarry.getNode(), 0);
12403 Overflow = SDValue(NodeCarry.getNode(), 1);
12404 return;
12405 }
12406
12407 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
12408 LHS.getValueType(), LHS, RHS);
12409
12410 EVT ResultType = Node->getValueType(1);
12411 EVT SetCCType = getSetCCResultType(
12412 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
12413 SDValue SetCC;
12414 if (IsAdd && isOneConstant(RHS)) {
12415 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
12416 // the live range of X. We assume comparing with 0 is cheap.
12417 // The general case (X + C) < C is not necessarily beneficial. Although we
12418 // reduce the live range of X, we may introduce the materialization of
12419 // constant C.
12420 SetCC =
12421 DAG.getSetCC(dl, SetCCType, Result,
12422 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
12423 } else if (IsAdd && isAllOnesConstant(RHS)) {
12424 // Special case: uaddo X, -1 overflows if X != 0.
12425 SetCC =
12426 DAG.getSetCC(dl, SetCCType, LHS,
12427 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
12428 } else {
12429 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
12430 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
12431 }
12432 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
12433}
12434
12436 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
12437 SDLoc dl(Node);
12438 SDValue LHS = Node->getOperand(0);
12439 SDValue RHS = Node->getOperand(1);
12440 bool IsAdd = Node->getOpcode() == ISD::SADDO;
12441
12442 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
12443 LHS.getValueType(), LHS, RHS);
12444
12445 EVT ResultType = Node->getValueType(1);
12446 EVT OType = getSetCCResultType(
12447 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
12448
12449 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
12450 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
12451 if (isOperationLegal(OpcSat, LHS.getValueType())) {
12452 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
12453 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
12454 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
12455 return;
12456 }
12457
12458 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
12459
12460 if (IsAdd) {
12461 // For an addition, the result should be less than one of the operands (LHS)
12462 // if and only if the other operand (RHS) is negative, otherwise there will
12463 // be overflow.
12464 SDValue ResultLowerThanLHS =
12465 DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
12466 SDValue RHSNegative = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETLT);
12467 Overflow = DAG.getBoolExtOrTrunc(
12468 DAG.getNode(ISD::XOR, dl, OType, RHSNegative, ResultLowerThanLHS), dl,
12469 ResultType, ResultType);
12470 } else {
12471 // For subtraction, overflow occurs when the signed comparison of operands
12472 // doesn't match the sign of the result.
12473 SDValue LHSLessThanRHS = DAG.getSetCC(dl, OType, LHS, RHS, ISD::SETLT);
12474 SDValue ResultNegative = DAG.getSetCC(dl, OType, Result, Zero, ISD::SETLT);
12475 Overflow = DAG.getBoolExtOrTrunc(
12476 DAG.getNode(ISD::XOR, dl, OType, LHSLessThanRHS, ResultNegative), dl,
12477 ResultType, ResultType);
12478 }
12479}
12480
12482 SDValue &Overflow, SelectionDAG &DAG) const {
12483 SDLoc dl(Node);
12484 EVT VT = Node->getValueType(0);
12485 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12486 SDValue LHS = Node->getOperand(0);
12487 SDValue RHS = Node->getOperand(1);
12488 bool isSigned = Node->getOpcode() == ISD::SMULO;
12489
12490 // For power-of-two multiplications we can use a simpler shift expansion.
12491 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
12492 const APInt &C = RHSC->getAPIntValue();
12493 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
12494 if (C.isPowerOf2()) {
12495 // smulo(x, signed_min) is same as umulo(x, signed_min).
12496 bool UseArithShift = isSigned && !C.isMinSignedValue();
12497 SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
12498 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
12499 Overflow = DAG.getSetCC(dl, SetCCVT,
12500 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
12501 dl, VT, Result, ShiftAmt),
12502 LHS, ISD::SETNE);
12503 return true;
12504 }
12505 }
12506
12507 SDValue BottomHalf;
12508 SDValue TopHalf;
12509 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
12510
12511 static const unsigned Ops[2][3] =
12514 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
12515 BottomHalf = DAG.getNode(Ops[isSigned][0], dl, DAG.getVTList(VT, VT), LHS,
12516 RHS);
12517 TopHalf = BottomHalf.getValue(1);
12518 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
12519 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
12520 TopHalf = DAG.getNode(Ops[isSigned][1], dl, VT, LHS, RHS);
12521 } else if (isTypeLegal(WideVT)) {
12522 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
12523 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
12524 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
12525 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
12526 SDValue ShiftAmt =
12527 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
12528 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
12529 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
12530 } else {
12531 if (VT.isVector())
12532 return false;
12533
12534 forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
12535 }
12536
12537 Result = BottomHalf;
12538 if (isSigned) {
12539 SDValue ShiftAmt = DAG.getShiftAmountConstant(
12540 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
12541 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
12542 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
12543 } else {
12544 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
12545 DAG.getConstant(0, dl, VT), ISD::SETNE);
12546 }
12547
12548 // Truncate the result if SetCC returns a larger type than needed.
12549 EVT RType = Node->getValueType(1);
12550 if (RType.bitsLT(Overflow.getValueType()))
12551 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
12552
12553 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
12554 "Unexpected result type for S/UMULO legalization");
12555 return true;
12556}
12557
12559 SDLoc dl(Node);
12560 ISD::NodeType BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
12561 SDValue Op = Node->getOperand(0);
12562 SDNodeFlags Flags = Node->getFlags();
12563 EVT VT = Op.getValueType();
12564
12565 // Try to use a shuffle reduction for power of two vectors.
12566 if (VT.isPow2VectorType()) {
12567 // See if the reduction opcode is safe to use with widened types.
12568 bool WidenSrc = false;
12569 switch (Node->getOpcode()) {
12572 case ISD::VECREDUCE_ADD:
12573 case ISD::VECREDUCE_MUL:
12574 case ISD::VECREDUCE_AND:
12575 case ISD::VECREDUCE_OR:
12576 case ISD::VECREDUCE_XOR:
12581 WidenSrc = VT.isFixedLengthVector();
12582 break;
12583 }
12584
12586 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
12587 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT)) {
12588 if (WidenSrc && Op.getOpcode() != ISD::BUILD_VECTOR) {
12589 // Attempt to widen the source vectors to a legal op.
12590 EVT WideVT = getTypeToTransformTo(*DAG.getContext(), HalfVT);
12591 if (WideVT.isVector() &&
12592 WideVT.getScalarType() == HalfVT.getScalarType() &&
12593 WideVT.getVectorNumElements() >= HalfVT.getVectorNumElements() &&
12594 isOperationLegalOrCustom(BaseOpcode, WideVT)) {
12595 SDValue Lo, Hi;
12596 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
12597 Lo = DAG.getInsertSubvector(dl, DAG.getPOISON(WideVT), Lo, 0);
12598 Hi = DAG.getInsertSubvector(dl, DAG.getPOISON(WideVT), Hi, 0);
12599 Op = DAG.getNode(BaseOpcode, dl, WideVT, Lo, Hi, Flags);
12600 Op = DAG.getExtractSubvector(dl, HalfVT, Op, 0);
12601 VT = HalfVT;
12602 continue;
12603 }
12604 }
12605 break;
12606 }
12607
12608 SDValue Lo, Hi;
12609 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
12610 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Flags);
12611 VT = HalfVT;
12612
12613 // Stop if splitting is enough to make the reduction legal.
12614 if (isOperationLegalOrCustom(Node->getOpcode(), HalfVT))
12615 return DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Op,
12616 Flags);
12617 }
12618 }
12619
12620 if (VT.isScalableVector())
12622 "Expanding reductions for scalable vectors is undefined.");
12623
12624 EVT EltVT = VT.getVectorElementType();
12625 unsigned NumElts = VT.getVectorNumElements();
12626
12628 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
12629
12630 SDValue Res = Ops[0];
12631 for (unsigned i = 1; i < NumElts; i++)
12632 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
12633
12634 // Result type may be wider than element type.
12635 if (EltVT != Node->getValueType(0))
12636 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
12637 return Res;
12638}
12639
12641 SDLoc dl(Node);
12642 SDValue AccOp = Node->getOperand(0);
12643 SDValue VecOp = Node->getOperand(1);
12644 SDNodeFlags Flags = Node->getFlags();
12645
12646 EVT VT = VecOp.getValueType();
12647 EVT EltVT = VT.getVectorElementType();
12648
12649 if (VT.isScalableVector())
12651 "Expanding reductions for scalable vectors is undefined.");
12652
12653 unsigned NumElts = VT.getVectorNumElements();
12654
12656 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
12657
12658 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
12659
12660 SDValue Res = AccOp;
12661 for (unsigned i = 0; i < NumElts; i++)
12662 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
12663
12664 return Res;
12665}
12666
12668 SelectionDAG &DAG) const {
12669 EVT VT = Node->getValueType(0);
12670 SDLoc dl(Node);
12671 bool isSigned = Node->getOpcode() == ISD::SREM;
12672 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
12673 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
12674 SDValue Dividend = Node->getOperand(0);
12675 SDValue Divisor = Node->getOperand(1);
12676 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
12677 SDVTList VTs = DAG.getVTList(VT, VT);
12678 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
12679 return true;
12680 }
12681 if (isOperationLegalOrCustom(DivOpc, VT)) {
12682 // X % Y -> X-X/Y*Y
12683 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
12684 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
12685 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
12686 return true;
12687 }
12688 return false;
12689}
12690
12692 SelectionDAG &DAG) const {
12693 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
12694 SDLoc dl(SDValue(Node, 0));
12695 SDValue Src = Node->getOperand(0);
12696
12697 // DstVT is the result type, while SatVT is the size to which we saturate
12698 EVT SrcVT = Src.getValueType();
12699 EVT DstVT = Node->getValueType(0);
12700
12701 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
12702 unsigned SatWidth = SatVT.getScalarSizeInBits();
12703 unsigned DstWidth = DstVT.getScalarSizeInBits();
12704 assert(SatWidth <= DstWidth &&
12705 "Expected saturation width smaller than result width");
12706
12707 // Determine minimum and maximum integer values and their corresponding
12708 // floating-point values.
12709 APInt MinInt, MaxInt;
12710 if (IsSigned) {
12711 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
12712 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
12713 } else {
12714 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
12715 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
12716 }
12717
12718 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
12719 // libcall emission cannot handle this. Large result types will fail.
12720 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
12721 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
12722 SrcVT = Src.getValueType();
12723 }
12724
12725 const fltSemantics &Sem = SrcVT.getFltSemantics();
12726 APFloat MinFloat(Sem);
12727 APFloat MaxFloat(Sem);
12728
12729 APFloat::opStatus MinStatus =
12730 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
12731 APFloat::opStatus MaxStatus =
12732 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
12733 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
12734 !(MaxStatus & APFloat::opStatus::opInexact);
12735
12736 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
12737 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
12738
12739 // If the integer bounds are exactly representable as floats and min/max are
12740 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
12741 // of comparisons and selects.
12742 auto EmitMinMax = [&](unsigned MinOpcode, unsigned MaxOpcode,
12743 bool MayPropagateNaN) {
12744 bool MinMaxLegal = isOperationLegalOrCustom(MinOpcode, SrcVT) &&
12745 isOperationLegalOrCustom(MaxOpcode, SrcVT);
12746 if (!MinMaxLegal)
12747 return SDValue();
12748
12749 SDValue Clamped = Src;
12750
12751 // Clamp Src by MinFloat from below. If !MayPropagateNaN and Src is NaN
12752 // then the result is MinFloat.
12753 Clamped = DAG.getNode(MaxOpcode, dl, SrcVT, Clamped, MinFloatNode);
12754 // Clamp by MaxFloat from above. If !MayPropagateNaN then NaN cannot occur.
12755 Clamped = DAG.getNode(MinOpcode, dl, SrcVT, Clamped, MaxFloatNode);
12756 // Convert clamped value to integer.
12757 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
12758 dl, DstVT, Clamped);
12759
12760 // If !MayPropagateNan and the conversion is unsigned case we're done,
12761 // because we mapped NaN to MinFloat, which will cast to zero.
12762 if (!MayPropagateNaN && !IsSigned)
12763 return FpToInt;
12764
12765 // Otherwise, select 0 if Src is NaN.
12766 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
12767 EVT SetCCVT =
12768 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
12769 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
12770 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
12771 };
12772 if (AreExactFloatBounds) {
12773 if (SDValue Res = EmitMinMax(ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM,
12774 /*MayPropagateNaN=*/false))
12775 return Res;
12776 // These may propagate NaN for sNaN operands.
12777 if (SDValue Res =
12778 EmitMinMax(ISD::FMINNUM, ISD::FMAXNUM, /*MayPropagateNaN=*/true))
12779 return Res;
12780 // These always propagate NaN.
12781 if (SDValue Res =
12782 EmitMinMax(ISD::FMINIMUM, ISD::FMAXIMUM, /*MayPropagateNaN=*/true))
12783 return Res;
12784 }
12785
12786 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
12787 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
12788
12789 // Result of direct conversion. The assumption here is that the operation is
12790 // non-trapping and it's fine to apply it to an out-of-range value if we
12791 // select it away later.
12792 SDValue FpToInt =
12793 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
12794
12795 SDValue Select = FpToInt;
12796
12797 EVT SetCCVT =
12798 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
12799
12800 // If Src ULT MinFloat, select MinInt. In particular, this also selects
12801 // MinInt if Src is NaN.
12802 SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
12803 Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
12804 // If Src OGT MaxFloat, select MaxInt.
12805 SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
12806 Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
12807
12808 // In the unsigned case we are done, because we mapped NaN to MinInt, which
12809 // is already zero.
12810 if (!IsSigned)
12811 return Select;
12812
12813 // Otherwise, select 0 if Src is NaN.
12814 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
12815 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
12816 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
12817}
12818
12820 const SDLoc &dl,
12821 SelectionDAG &DAG) const {
12822 EVT OperandVT = Op.getValueType();
12823 if (OperandVT.getScalarType() == ResultVT.getScalarType())
12824 return Op;
12825 EVT ResultIntVT = ResultVT.changeTypeToInteger();
12826 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
12827 // can induce double-rounding which may alter the results. We can
12828 // correct for this using a trick explained in: Boldo, Sylvie, and
12829 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
12830 // World Congress. 2005.
12831 SDValue Narrow = DAG.getFPExtendOrRound(Op, dl, ResultVT);
12832 SDValue NarrowAsWide = DAG.getFPExtendOrRound(Narrow, dl, OperandVT);
12833
12834 // We can keep the narrow value as-is if narrowing was exact (no
12835 // rounding error), the wide value was NaN (the narrow value is also
12836 // NaN and should be preserved) or if we rounded to the odd value.
12837 SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, Narrow);
12838 SDValue One = DAG.getConstant(1, dl, ResultIntVT);
12839 SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
12840 SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
12841 EVT ResultIntVTCCVT = getSetCCResultType(
12842 DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
12843 SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
12844 // The result is already odd so we don't need to do anything.
12845 SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
12846
12847 EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12848 Op.getValueType());
12849 // We keep results which are exact, odd or NaN.
12850 SDValue KeepNarrow =
12851 DAG.getSetCC(dl, WideSetCCVT, Op, NarrowAsWide, ISD::SETUEQ);
12852 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
12853 // We morally performed a round-down if AbsNarrow is smaller than
12854 // AbsWide.
12855 SDValue AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
12856 SDValue AbsNarrowAsWide = DAG.getNode(ISD::FABS, dl, OperandVT, NarrowAsWide);
12857 SDValue NarrowIsRd =
12858 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
12859 // If the narrow value is odd or exact, pick it.
12860 // Otherwise, narrow is even and corresponds to either the rounded-up
12861 // or rounded-down value. If narrow is the rounded-down value, we want
12862 // the rounded-up value as it will be odd.
12863 SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
12864 SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
12865 Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
12866 return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
12867}
12868
12870 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
12871 SDValue Op = Node->getOperand(0);
12872 EVT VT = Node->getValueType(0);
12873 SDLoc dl(Node);
12874 if (VT.getScalarType() == MVT::bf16) {
12875 if (Node->getConstantOperandVal(1) == 1) {
12876 return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
12877 }
12878 EVT OperandVT = Op.getValueType();
12879 SDValue IsNaN = DAG.getSetCC(
12880 dl,
12881 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
12882 Op, Op, ISD::SETUO);
12883
12884 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
12885 // can induce double-rounding which may alter the results. We can
12886 // correct for this using a trick explained in: Boldo, Sylvie, and
12887 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
12888 // World Congress. 2005.
12889 EVT F32 = VT.changeElementType(*DAG.getContext(), MVT::f32);
12890 EVT I32 = F32.changeTypeToInteger();
12891 Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
12892 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
12893
12894 // Conversions should set NaN's quiet bit. This also prevents NaNs from
12895 // turning into infinities.
12896 SDValue NaN =
12897 DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
12898
12899 // Factor in the contribution of the low 16 bits.
12900 SDValue One = DAG.getConstant(1, dl, I32);
12901 SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
12902 DAG.getShiftAmountConstant(16, I32, dl));
12903 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
12904 SDValue RoundingBias =
12905 DAG.getNode(ISD::ADD, dl, I32, Lsb, DAG.getConstant(0x7fff, dl, I32));
12906 SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
12907
12908 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
12909 // 0x80000000.
12910 Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
12911
12912 // Now that we have rounded, shift the bits into position.
12913 Op = DAG.getNode(ISD::SRL, dl, I32, Op,
12914 DAG.getShiftAmountConstant(16, I32, dl));
12915 EVT I16 = I32.changeElementType(*DAG.getContext(), MVT::i16);
12916 Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
12917 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
12918 }
12919 return SDValue();
12920}
12921
12923 SelectionDAG &DAG) const {
12924 assert((Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT ||
12925 Node->getOpcode() == ISD::VECTOR_SPLICE_RIGHT) &&
12926 "Unexpected opcode!");
12927 assert((Node->getValueType(0).isScalableVector() ||
12928 !isa<ConstantSDNode>(Node->getOperand(2))) &&
12929 "Fixed length vector types with constant offsets expected to use "
12930 "SHUFFLE_VECTOR!");
12931
12932 EVT VT = Node->getValueType(0);
12933 SDValue V1 = Node->getOperand(0);
12934 SDValue V2 = Node->getOperand(1);
12935 SDValue Offset = Node->getOperand(2);
12936 SDLoc DL(Node);
12937
12938 // Expand through memory thusly:
12939 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
12940 // Store V1, Ptr
12941 // Store V2, Ptr + sizeof(V1)
12942 // if (VECTOR_SPLICE_LEFT)
12943 // Ptr = Ptr + (Offset * sizeof(VT.Elt))
12944 // else
12945 // Ptr = Ptr + sizeof(V1) - (Offset * size(VT.Elt))
12946 // Res = Load Ptr
12947
12948 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
12949
12951 VT.getVectorElementCount() * 2);
12952 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
12953 EVT PtrVT = StackPtr.getValueType();
12954 auto &MF = DAG.getMachineFunction();
12955 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12956 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12957
12958 // Store the lo part of CONCAT_VECTORS(V1, V2)
12959 SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
12960 // Store the hi part of CONCAT_VECTORS(V1, V2)
12961 SDValue VTBytes = DAG.getTypeSize(DL, PtrVT, VT.getStoreSize());
12962 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, VTBytes);
12963 SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
12964
12965 // NOTE: TrailingBytes must be clamped so as not to read outside of V1:V2.
12966 SDValue EltByteSize =
12967 DAG.getTypeSize(DL, PtrVT, VT.getVectorElementType().getStoreSize());
12968 Offset = DAG.getZExtOrTrunc(Offset, DL, PtrVT);
12969 SDValue TrailingBytes = DAG.getNode(ISD::MUL, DL, PtrVT, Offset, EltByteSize);
12970
12971 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VTBytes);
12972
12973 if (Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT)
12974 StackPtr = DAG.getMemBasePlusOffset(StackPtr, TrailingBytes, DL);
12975 else
12976 StackPtr = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
12977
12978 // Load the spliced result
12979 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
12981}
12982
12984 SelectionDAG &DAG) const {
12985 SDLoc DL(Node);
12986 SDValue Vec = Node->getOperand(0);
12987 SDValue Mask = Node->getOperand(1);
12988 SDValue Passthru = Node->getOperand(2);
12989
12990 EVT VecVT = Vec.getValueType();
12991 EVT ScalarVT = VecVT.getScalarType();
12992 EVT MaskVT = Mask.getValueType();
12993 EVT MaskScalarVT = MaskVT.getScalarType();
12994
12995 // Needs to be handled by targets that have scalable vector types.
12996 if (VecVT.isScalableVector())
12997 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
12998
12999 SDValue StackPtr = DAG.CreateStackTemporary(
13000 VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
13001 int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
13002 MachinePointerInfo PtrInfo =
13004
13005 MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
13006 SDValue Chain = DAG.getEntryNode();
13007 SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
13008
13009 bool HasPassthru = !Passthru.isUndef();
13010
13011 // If we have a passthru vector, store it on the stack, overwrite the matching
13012 // positions and then re-write the last element that was potentially
13013 // overwritten even though mask[i] = false.
13014 if (HasPassthru)
13015 Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo);
13016
13017 SDValue LastWriteVal;
13018 APInt PassthruSplatVal;
13019 bool IsSplatPassthru =
13020 ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
13021
13022 if (IsSplatPassthru) {
13023 // As we do not know which position we wrote to last, we cannot simply
13024 // access that index from the passthru vector. So we first check if passthru
13025 // is a splat vector, to use any element ...
13026 LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
13027 } else if (HasPassthru) {
13028 // ... if it is not a splat vector, we need to get the passthru value at
13029 // position = popcount(mask) and re-load it from the stack before it is
13030 // overwritten in the loop below.
13031 EVT PopcountVT = ScalarVT.changeTypeToInteger();
13032 SDValue Popcount = DAG.getNode(
13034 MaskVT.changeVectorElementType(*DAG.getContext(), MVT::i1), Mask);
13035 Popcount = DAG.getNode(
13037 MaskVT.changeVectorElementType(*DAG.getContext(), PopcountVT),
13038 Popcount);
13039 Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, PopcountVT, Popcount);
13040 SDValue LastElmtPtr =
13041 getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
13042 LastWriteVal = DAG.getLoad(
13043 ScalarVT, DL, Chain, LastElmtPtr,
13045 Chain = LastWriteVal.getValue(1);
13046 }
13047
13048 unsigned NumElms = VecVT.getVectorNumElements();
13049 for (unsigned I = 0; I < NumElms; I++) {
13050 SDValue ValI = DAG.getExtractVectorElt(DL, ScalarVT, Vec, I);
13051 SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
13052 Chain = DAG.getStore(
13053 Chain, DL, ValI, OutPtr,
13055
13056 // Get the mask value and add it to the current output position. This
13057 // either increments by 1 if MaskI is true or adds 0 otherwise.
13058 // Freeze in case we have poison/undef mask entries.
13059 SDValue MaskI = DAG.getExtractVectorElt(DL, MaskScalarVT, Mask, I);
13060 MaskI = DAG.getFreeze(MaskI);
13061 MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
13062 MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
13063 OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
13064
13065 if (HasPassthru && I == NumElms - 1) {
13066 SDValue EndOfVector =
13067 DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
13068 SDValue AllLanesSelected =
13069 DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
13070 OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
13071 OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
13072
13073 // Re-write the last ValI if all lanes were selected. Otherwise,
13074 // overwrite the last write it with the passthru value.
13075 LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,
13076 LastWriteVal, SDNodeFlags::Unpredictable);
13077 Chain = DAG.getStore(
13078 Chain, DL, LastWriteVal, OutPtr,
13080 }
13081 }
13082
13083 return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
13084}
13085
13087 SDLoc DL(Node);
13088 EVT VT = Node->getValueType(0);
13089
13090 bool ZeroIsPoison = Node->getOpcode() == ISD::CTTZ_ELTS_ZERO_POISON;
13091 auto [Mask, StepVec] =
13092 getLegalMaskAndStepVector(Node->getOperand(0), ZeroIsPoison, DL, DAG);
13093 EVT StepVecVT = StepVec.getValueType();
13094 EVT StepVT = StepVecVT.getVectorElementType();
13095
13096 // Promote the scalar result type early to avoid redundant zexts.
13098 StepVT = getTypeToTransformTo(*DAG.getContext(), StepVT);
13099
13100 SDValue VL =
13101 DAG.getElementCount(DL, StepVT, StepVecVT.getVectorElementCount());
13102 SDValue SplatVL = DAG.getSplat(StepVecVT, DL, VL);
13103 StepVec = DAG.getNode(ISD::SUB, DL, StepVecVT, SplatVL, StepVec);
13104 SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT);
13105 SDValue Select = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
13107 StepVecVT.getVectorElementType(), Select);
13108 SDValue Sub = DAG.getNode(ISD::SUB, DL, StepVT, VL,
13109 DAG.getZExtOrTrunc(Max, DL, StepVT));
13110
13111 return DAG.getZExtOrTrunc(Sub, DL, VT);
13112}
13113
13115 SelectionDAG &DAG) const {
13116 SDLoc DL(N);
13117 SDValue Acc = N->getOperand(0);
13118 SDValue MulLHS = N->getOperand(1);
13119 SDValue MulRHS = N->getOperand(2);
13120 EVT AccVT = Acc.getValueType();
13121 EVT MulOpVT = MulLHS.getValueType();
13122
13123 EVT ExtMulOpVT =
13125 MulOpVT.getVectorElementCount());
13126
13127 unsigned ExtOpcLHS, ExtOpcRHS;
13128 switch (N->getOpcode()) {
13129 default:
13130 llvm_unreachable("Unexpected opcode");
13132 ExtOpcLHS = ExtOpcRHS = ISD::ZERO_EXTEND;
13133 break;
13135 ExtOpcLHS = ExtOpcRHS = ISD::SIGN_EXTEND;
13136 break;
13138 ExtOpcLHS = ExtOpcRHS = ISD::FP_EXTEND;
13139 break;
13140 }
13141
13142 if (ExtMulOpVT != MulOpVT) {
13143 MulLHS = DAG.getNode(ExtOpcLHS, DL, ExtMulOpVT, MulLHS);
13144 MulRHS = DAG.getNode(ExtOpcRHS, DL, ExtMulOpVT, MulRHS);
13145 }
13146 SDValue Input = MulLHS;
13147 if (N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA) {
13148 if (!llvm::isOneOrOneSplatFP(MulRHS))
13149 Input = DAG.getNode(ISD::FMUL, DL, ExtMulOpVT, MulLHS, MulRHS);
13150 } else if (!llvm::isOneOrOneSplat(MulRHS)) {
13151 Input = DAG.getNode(ISD::MUL, DL, ExtMulOpVT, MulLHS, MulRHS);
13152 }
13153
13154 unsigned Stride = AccVT.getVectorMinNumElements();
13155 unsigned ScaleFactor = MulOpVT.getVectorMinNumElements() / Stride;
13156
13157 // Collect all of the subvectors
13158 std::deque<SDValue> Subvectors = {Acc};
13159 for (unsigned I = 0; I < ScaleFactor; I++)
13160 Subvectors.push_back(DAG.getExtractSubvector(DL, AccVT, Input, I * Stride));
13161
13162 unsigned FlatNode =
13163 N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA ? ISD::FADD : ISD::ADD;
13164
13165 // Flatten the subvector tree
13166 while (Subvectors.size() > 1) {
13167 Subvectors.push_back(
13168 DAG.getNode(FlatNode, DL, AccVT, {Subvectors[0], Subvectors[1]}));
13169 Subvectors.pop_front();
13170 Subvectors.pop_front();
13171 }
13172
13173 assert(Subvectors.size() == 1 &&
13174 "There should only be one subvector after tree flattening");
13175
13176 return Subvectors[0];
13177}
13178
13179/// Given a store node \p StoreNode, return true if it is safe to fold that node
13180/// into \p FPNode, which expands to a library call with output pointers.
13182 SDNode *FPNode) {
13184 SmallVector<const SDNode *, 8> DeferredNodes;
13186
13187 // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode).
13188 for (SDValue Op : StoreNode->ops())
13189 if (Op.getNode() != FPNode)
13190 Worklist.push_back(Op.getNode());
13191
13193 while (!Worklist.empty()) {
13194 const SDNode *Node = Worklist.pop_back_val();
13195 auto [_, Inserted] = Visited.insert(Node);
13196 if (!Inserted)
13197 continue;
13198
13199 if (MaxSteps > 0 && Visited.size() >= MaxSteps)
13200 return false;
13201
13202 // Reached the FPNode (would result in a cycle).
13203 // OR Reached CALLSEQ_START (would result in nested call sequences).
13204 if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START)
13205 return false;
13206
13207 if (Node->getOpcode() == ISD::CALLSEQ_END) {
13208 // Defer looking into call sequences (so we can check we're outside one).
13209 // We still need to look through these for the predecessor check.
13210 DeferredNodes.push_back(Node);
13211 continue;
13212 }
13213
13214 for (SDValue Op : Node->ops())
13215 Worklist.push_back(Op.getNode());
13216 }
13217
13218 // True if we're outside a call sequence and don't have the FPNode as a
13219 // predecessor. No cycles or nested call sequences possible.
13220 return !SDNode::hasPredecessorHelper(FPNode, Visited, DeferredNodes,
13221 MaxSteps);
13222}
13223
13225 SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node,
13227 std::optional<unsigned> CallRetResNo) const {
13228 if (LC == RTLIB::UNKNOWN_LIBCALL)
13229 return false;
13230
13231 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC);
13232 if (LibcallImpl == RTLIB::Unsupported)
13233 return false;
13234
13235 LLVMContext &Ctx = *DAG.getContext();
13236 EVT VT = Node->getValueType(0);
13237 unsigned NumResults = Node->getNumValues();
13238
13239 // Find users of the node that store the results (and share input chains). The
13240 // destination pointers can be used instead of creating stack allocations.
13241 SDValue StoresInChain;
13242 SmallVector<StoreSDNode *, 2> ResultStores(NumResults);
13243 for (SDNode *User : Node->users()) {
13245 continue;
13246 auto *ST = cast<StoreSDNode>(User);
13247 SDValue StoreValue = ST->getValue();
13248 unsigned ResNo = StoreValue.getResNo();
13249 // Ensure the store corresponds to an output pointer.
13250 if (CallRetResNo == ResNo)
13251 continue;
13252 // Ensure the store to the default address space and not atomic or volatile.
13253 if (!ST->isSimple() || ST->getAddressSpace() != 0)
13254 continue;
13255 // Ensure all store chains are the same (so they don't alias).
13256 if (StoresInChain && ST->getChain() != StoresInChain)
13257 continue;
13258 // Ensure the store is properly aligned.
13259 Type *StoreType = StoreValue.getValueType().getTypeForEVT(Ctx);
13260 if (ST->getAlign() <
13261 DAG.getDataLayout().getABITypeAlign(StoreType->getScalarType()))
13262 continue;
13263 // Avoid:
13264 // 1. Creating cyclic dependencies.
13265 // 2. Expanding the node to a call within a call sequence.
13267 continue;
13268 ResultStores[ResNo] = ST;
13269 StoresInChain = ST->getChain();
13270 }
13271
13272 ArgListTy Args;
13273
13274 // Pass the arguments.
13275 for (const SDValue &Op : Node->op_values()) {
13276 EVT ArgVT = Op.getValueType();
13277 Type *ArgTy = ArgVT.getTypeForEVT(Ctx);
13278 Args.emplace_back(Op, ArgTy);
13279 }
13280
13281 // Pass the output pointers.
13282 SmallVector<SDValue, 2> ResultPtrs(NumResults);
13284 for (auto [ResNo, ST] : llvm::enumerate(ResultStores)) {
13285 if (ResNo == CallRetResNo)
13286 continue;
13287 EVT ResVT = Node->getValueType(ResNo);
13288 SDValue ResultPtr = ST ? ST->getBasePtr() : DAG.CreateStackTemporary(ResVT);
13289 ResultPtrs[ResNo] = ResultPtr;
13290 Args.emplace_back(ResultPtr, PointerTy);
13291 }
13292
13293 SDLoc DL(Node);
13294
13296 // Pass the vector mask (if required).
13297 EVT MaskVT = getSetCCResultType(DAG.getDataLayout(), Ctx, VT);
13298 SDValue Mask = DAG.getBoolConstant(true, DL, MaskVT, VT);
13299 Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx));
13300 }
13301
13302 Type *RetType = CallRetResNo.has_value()
13303 ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx)
13304 : Type::getVoidTy(Ctx);
13305 SDValue InChain = StoresInChain ? StoresInChain : DAG.getEntryNode();
13306 SDValue Callee =
13307 DAG.getExternalSymbol(LibcallImpl, getPointerTy(DAG.getDataLayout()));
13309 CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(
13310 getLibcallImplCallingConv(LibcallImpl), RetType, Callee, std::move(Args));
13311
13312 auto [Call, CallChain] = LowerCallTo(CLI);
13313
13314 for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) {
13315 if (ResNo == CallRetResNo) {
13316 Results.push_back(Call);
13317 continue;
13318 }
13319 MachinePointerInfo PtrInfo;
13320 SDValue LoadResult = DAG.getLoad(Node->getValueType(ResNo), DL, CallChain,
13321 ResultPtr, PtrInfo);
13322 SDValue OutChain = LoadResult.getValue(1);
13323
13324 if (StoreSDNode *ST = ResultStores[ResNo]) {
13325 // Replace store with the library call.
13326 DAG.ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain);
13327 PtrInfo = ST->getPointerInfo();
13328 } else {
13330 DAG.getMachineFunction(),
13331 cast<FrameIndexSDNode>(ResultPtr)->getIndex());
13332 }
13333
13334 Results.push_back(LoadResult);
13335 }
13336
13337 return true;
13338}
13339
13341 SDValue &LHS, SDValue &RHS,
13342 SDValue &CC, SDValue Mask,
13343 SDValue EVL, bool &NeedInvert,
13344 const SDLoc &dl, SDValue &Chain,
13345 bool IsSignaling) const {
13346 MVT OpVT = LHS.getSimpleValueType();
13347 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
13348 NeedInvert = false;
13349 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
13350 bool IsNonVP = !EVL;
13351 switch (getCondCodeAction(CCCode, OpVT)) {
13352 default:
13353 llvm_unreachable("Unknown condition code action!");
13355 // Nothing to do.
13356 break;
13359 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
13360 std::swap(LHS, RHS);
13361 CC = DAG.getCondCode(InvCC);
13362 return true;
13363 }
13364 // Swapping operands didn't work. Try inverting the condition.
13365 bool NeedSwap = false;
13366 InvCC = getSetCCInverse(CCCode, OpVT);
13367 if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
13368 // If inverting the condition is not enough, try swapping operands
13369 // on top of it.
13370 InvCC = ISD::getSetCCSwappedOperands(InvCC);
13371 NeedSwap = true;
13372 }
13373 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
13374 CC = DAG.getCondCode(InvCC);
13375 NeedInvert = true;
13376 if (NeedSwap)
13377 std::swap(LHS, RHS);
13378 return true;
13379 }
13380
13381 // Special case: expand i1 comparisons using logical operations.
13382 if (OpVT == MVT::i1) {
13383 SDValue Ret;
13384 switch (CCCode) {
13385 default:
13386 llvm_unreachable("Unknown integer setcc!");
13387 case ISD::SETEQ: // X == Y --> ~(X ^ Y)
13388 Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS),
13389 MVT::i1);
13390 break;
13391 case ISD::SETNE: // X != Y --> (X ^ Y)
13392 Ret = DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS);
13393 break;
13394 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
13395 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
13396 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, RHS,
13397 DAG.getNOT(dl, LHS, MVT::i1));
13398 break;
13399 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
13400 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
13401 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, LHS,
13402 DAG.getNOT(dl, RHS, MVT::i1));
13403 break;
13404 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
13405 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
13406 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, RHS,
13407 DAG.getNOT(dl, LHS, MVT::i1));
13408 break;
13409 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
13410 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
13411 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, LHS,
13412 DAG.getNOT(dl, RHS, MVT::i1));
13413 break;
13414 }
13415
13416 LHS = DAG.getZExtOrTrunc(Ret, dl, VT);
13417 RHS = SDValue();
13418 CC = SDValue();
13419 return true;
13420 }
13421
13423 unsigned Opc = 0;
13424 switch (CCCode) {
13425 default:
13426 llvm_unreachable("Don't know how to expand this condition!");
13427 case ISD::SETUO:
13428 if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
13429 CC1 = ISD::SETUNE;
13430 CC2 = ISD::SETUNE;
13431 Opc = ISD::OR;
13432 break;
13433 }
13435 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
13436 NeedInvert = true;
13437 [[fallthrough]];
13438 case ISD::SETO:
13440 "If SETO is expanded, SETOEQ must be legal!");
13441 CC1 = ISD::SETOEQ;
13442 CC2 = ISD::SETOEQ;
13443 Opc = ISD::AND;
13444 break;
13445 case ISD::SETONE:
13446 case ISD::SETUEQ:
13447 // If the SETUO or SETO CC isn't legal, we might be able to use
13448 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
13449 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
13450 // the operands.
13451 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
13452 if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
13453 isCondCodeLegal(ISD::SETOLT, OpVT))) {
13454 CC1 = ISD::SETOGT;
13455 CC2 = ISD::SETOLT;
13456 Opc = ISD::OR;
13457 NeedInvert = ((unsigned)CCCode & 0x8U);
13458 break;
13459 }
13460 [[fallthrough]];
13461 case ISD::SETOEQ:
13462 case ISD::SETOGT:
13463 case ISD::SETOGE:
13464 case ISD::SETOLT:
13465 case ISD::SETOLE:
13466 case ISD::SETUNE:
13467 case ISD::SETUGT:
13468 case ISD::SETUGE:
13469 case ISD::SETULT:
13470 case ISD::SETULE:
13471 // If we are floating point, assign and break, otherwise fall through.
13472 if (!OpVT.isInteger()) {
13473 // We can use the 4th bit to tell if we are the unordered
13474 // or ordered version of the opcode.
13475 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
13476 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
13477 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
13478 break;
13479 }
13480 // Fallthrough if we are unsigned integer.
13481 [[fallthrough]];
13482 case ISD::SETLE:
13483 case ISD::SETGT:
13484 case ISD::SETGE:
13485 case ISD::SETLT:
13486 case ISD::SETNE:
13487 case ISD::SETEQ:
13488 // If all combinations of inverting the condition and swapping operands
13489 // didn't work then we have no means to expand the condition.
13490 llvm_unreachable("Don't know how to expand this condition!");
13491 }
13492
13493 SDValue SetCC1, SetCC2;
13494 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
13495 // If we aren't the ordered or unorder operation,
13496 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
13497 if (IsNonVP) {
13498 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
13499 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
13500 } else {
13501 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
13502 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
13503 }
13504 } else {
13505 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
13506 if (IsNonVP) {
13507 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
13508 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
13509 } else {
13510 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
13511 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
13512 }
13513 }
13514 if (Chain)
13515 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
13516 SetCC2.getValue(1));
13517 if (IsNonVP)
13518 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
13519 else {
13520 // Transform the binary opcode to the VP equivalent.
13521 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
13522 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
13523 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
13524 }
13525 RHS = SDValue();
13526 CC = SDValue();
13527 return true;
13528 }
13529 }
13530 return false;
13531}
13532
13534 SelectionDAG &DAG) const {
13535 EVT VT = Node->getValueType(0);
13536 // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
13537 // split into two equal parts.
13538 if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))
13539 return SDValue();
13540
13541 // Restrict expansion to cases where both parts can be concatenated.
13542 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
13543 if (LoVT != HiVT || !isTypeLegal(LoVT))
13544 return SDValue();
13545
13546 SDLoc DL(Node);
13547 unsigned Opcode = Node->getOpcode();
13548
13549 // Don't expand if the result is likely to be unrolled anyway.
13550 if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))
13551 return SDValue();
13552
13553 SmallVector<SDValue, 4> LoOps, HiOps;
13554 for (const SDValue &V : Node->op_values()) {
13555 auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT);
13556 LoOps.push_back(Lo);
13557 HiOps.push_back(Hi);
13558 }
13559
13560 SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps, Node->getFlags());
13561 SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps, Node->getFlags());
13562 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi);
13563}
13564
13566 const SDLoc &DL,
13567 EVT InVecVT, SDValue EltNo,
13568 LoadSDNode *OriginalLoad,
13569 SelectionDAG &DAG) const {
13570 assert(OriginalLoad->isSimple());
13571
13572 EVT VecEltVT = InVecVT.getVectorElementType();
13573
13574 // If the vector element type is not a multiple of a byte then we are unable
13575 // to correctly compute an address to load only the extracted element as a
13576 // scalar.
13577 if (!VecEltVT.isByteSized())
13578 return SDValue();
13579
13580 ISD::LoadExtType ExtTy =
13581 ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
13582 if (!isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
13583 return SDValue();
13584
13585 std::optional<unsigned> ByteOffset;
13586 Align Alignment = OriginalLoad->getAlign();
13588 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
13589 int Elt = ConstEltNo->getZExtValue();
13590 ByteOffset = VecEltVT.getSizeInBits() * Elt / 8;
13591 MPI = OriginalLoad->getPointerInfo().getWithOffset(*ByteOffset);
13592 Alignment = commonAlignment(Alignment, *ByteOffset);
13593 } else {
13594 // Discard the pointer info except the address space because the memory
13595 // operand can't represent this new access since the offset is variable.
13596 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
13597 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
13598 }
13599
13600 if (!shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT, ByteOffset))
13601 return SDValue();
13602
13603 unsigned IsFast = 0;
13604 if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
13605 OriginalLoad->getAddressSpace(), Alignment,
13606 OriginalLoad->getMemOperand()->getFlags(), &IsFast) ||
13607 !IsFast)
13608 return SDValue();
13609
13610 // The original DAG loaded the entire vector from memory, so arithmetic
13611 // within it must be inbounds.
13613 DAG, OriginalLoad->getBasePtr(), InVecVT, EltNo);
13614
13615 // We are replacing a vector load with a scalar load. The new load must have
13616 // identical memory op ordering to the original.
13617 SDValue Load;
13618 if (ResultVT.bitsGT(VecEltVT)) {
13619 // If the result type of vextract is wider than the load, then issue an
13620 // extending load instead.
13621 ISD::LoadExtType ExtType =
13622 isLoadLegal(ResultVT, VecEltVT, Alignment,
13623 OriginalLoad->getAddressSpace(), ISD::ZEXTLOAD, false)
13625 : ISD::EXTLOAD;
13626 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
13627 NewPtr, MPI, VecEltVT, Alignment,
13628 OriginalLoad->getMemOperand()->getFlags(),
13629 OriginalLoad->getAAInfo());
13630 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
13631 } else {
13632 // The result type is narrower or the same width as the vector element
13633 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
13634 Alignment, OriginalLoad->getMemOperand()->getFlags(),
13635 OriginalLoad->getAAInfo());
13636 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
13637 if (ResultVT.bitsLT(VecEltVT))
13638 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
13639 else
13640 Load = DAG.getBitcast(ResultVT, Load);
13641 }
13642
13643 return Load;
13644}
13645
13646// Set type id for call site info and metadata 'call_target'.
13647// We are filtering for:
13648// a) The call-graph-section use case that wants to know about indirect
13649// calls, or
13650// b) We want to annotate indirect calls.
13652 const CallBase *CB, MachineFunction &MF,
13653 MachineFunction::CallSiteInfo &CSInfo) const {
13654 if (CB && CB->isIndirectCall() &&
13657 CSInfo = MachineFunction::CallSiteInfo(*CB);
13658}
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT F32
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool isSigned(unsigned Opcode)
#define _
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
lazy value info
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static bool isUndef(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
#define T
#define T1
uint64_t High
#define P(N)
Function const char * Passes
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static std::pair< SDValue, SDValue > getLegalMaskAndStepVector(SDValue Mask, bool ZeroIsPoison, SDLoc DL, SelectionDAG &DAG)
Returns a type-legalized version of Mask as the first item in the pair.
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact UDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static bool canNarrowCLMULToLegal(const TargetLowering &TLI, LLVMContext &Ctx, EVT VT, unsigned HalveDepth=0, unsigned TotalDepth=0)
Check if CLMUL on VT can eventually reach a type with legal CLMUL through a chain of halving decompos...
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode, SDNode *FPNode)
Given a store node StoreNode, return true if it is safe to fold that node into FPNode,...
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineShiftToAVG(SDValue Op, TargetLowering::TargetLoweringOpt &TLO, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
This file describes how to lower LLVM code to machine code.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static LLVM_ABI const llvm::fltSemantics & EnumToSemantics(Semantics S)
Definition APFloat.cpp:98
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static LLVM_ABI ExponentType semanticsMinExponent(const fltSemantics &)
Definition APFloat.cpp:222
static LLVM_ABI unsigned getSizeInBits(const fltSemantics &Sem)
Returns the size of the floating point number (in bits) in the given semantics.
Definition APFloat.cpp:278
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:214
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition APFloat.cpp:255
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1406
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.h:1217
APInt bitcastToAPInt() const
Definition APFloat.h:1430
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1197
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1157
void changeSign()
Definition APFloat.h:1356
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1168
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1616
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1810
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1429
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition APInt.h:424
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1414
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1408
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1076
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1535
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1709
void setSignBit()
Set the sign bit to 1.
Definition APInt.h:1363
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:217
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1256
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1419
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:841
void negate()
Negate this APInt in place.
Definition APInt.h:1491
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1621
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:652
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1554
unsigned countLeadingZeros() const
Definition APInt.h:1629
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:398
void clearLowBits(unsigned loBits)
Set bottom loBits bits to 0.
Definition APInt.h:1458
unsigned logBase2() const
Definition APInt.h:1784
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:834
void setAllBits()
Set every bit to 1.
Definition APInt.h:1342
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1317
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition APInt.h:406
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1157
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1028
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition APInt.h:1390
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:880
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
void clearBits(unsigned LoBit, unsigned HiBit)
Clear the bits from LoBit (inclusive) to HiBit (exclusive) to 0.
Definition APInt.h:1440
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1411
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:483
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
void clearHighBits(unsigned hiBits)
Set top hiBits bits to 0.
Definition APInt.h:1465
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1679
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
Definition APInt.h:1366
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:865
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
This class represents a range of values.
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:217
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
std::vector< std::string > ConstraintCodeVector
Definition InlineAsm.h:104
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:354
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Machine Value Type.
SimpleValueType SimpleTy
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Function & getFunction()
Return the LLVM function that this machine code represents.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
Flags getFlags() const
Return the raw flags of the source value,.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MCRegister getLiveInPhysReg(Register VReg) const
getLiveInPhysReg - If VReg is a live-in virtual register, return the corresponding live-in physical r...
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition Module.h:447
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
iterator end() const
Definition ArrayRef.h:339
iterator begin() const
Definition ArrayRef.h:338
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC)
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI bool isKnownNeverLogicalZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Test whether the given floating point SDValue (or all elements of it, if it is a vector) is known to ...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
LLVM_ABI SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl, SDNodeFlags Flags={})
Constant fold a setcc to true or false.
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
LLVM_ABI std::optional< unsigned > getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has shift amounts that are all less than the element bit-width of the shift n...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
static LLVM_ABI unsigned getHasPredecessorMaxSteps()
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
LLVM_ABI bool shouldOptForSize() const
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI std::optional< unsigned > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getTypeSize(const SDLoc &DL, EVT VT, TypeSize TS)
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, UndefPoisonKind Kind=UndefPoisonKind::UndefOrPoison, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, Kind can be used to track poison ...
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI bool isKnownToBeAPowerOfTwo(SDValue Val, bool OrZero=false, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:138
iterator end() const
Definition StringRef.h:116
Class to represent struct types.
LLVM_ABI void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
unsigned getBitWidthForCttzElements(EVT RetVT, ElementCount EC, bool ZeroIsPoison, const ConstantRange *VScaleRange) const
Return the minimum number of bits required to hold the maximum possible number of trailing zero vecto...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool preferSelectsOverBooleanArithmetic(EVT VT) const
Should we prefer selects to doing arithmetic on boolean types.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const
Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
EVT getLegalTypeToTransformTo(LLVMContext &Context, EVT VT) const
Perform getTypeToTransformTo repeatedly until a legal type is obtained.
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
CallingConv::ID getLibcallImplCallingConv(RTLIB::LibcallImpl Call) const
Get the CallingConv that should be used for the specified libcall implementation.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
ISD::CondCode getSoftFloatCmpLibcallPredicate(RTLIB::LibcallImpl Call) const
Get the comparison predicate that's to be used to test the result of the comparison libcall against z...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
TargetLoweringBase(const TargetMachine &TM, const TargetSubtargetInfo &STI)
NOTE: The TargetMachine owns TLOF.
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
BooleanContent
Enum that describes how the target represents true/false values.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
virtual EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom for a comparison of the specified type...
bool isLoadLegal(EVT ValVT, EVT MemVT, Align Alignment, unsigned AddrSpace, unsigned ExtType, bool Atomic) const
Return true if the specified load with extension is legal on this target.
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
bool expandMultipleResultFPLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl< SDValue > &Results, std::optional< unsigned > CallRetResNo={}) const
Expands a node with multiple results to an FP or vector libcall.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_POISON nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
SmallVector< ConstraintPair > ConstraintGroup
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
virtual Align computeKnownAlignForTargetInstr(GISelValueTracking &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, const SDValue LHS, const SDValue RHS, SDValue &Lo, SDValue &Hi) const
Calculate full product of LHS and RHS either via a libcall or through brute force expansion of the mu...
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandFCANONICALIZE(SDNode *Node, SelectionDAG &DAG) const
Expand FCANONICALIZE to FMUL with 1.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_POISON nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_POISON nodes.
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue expandCLMUL(SDNode *N, SelectionDAG &DAG) const
Expand carryless multiply.
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
virtual bool isTargetCanonicalConstantNode(SDValue Op) const
Returns true if the given Opc is considered a canonical constant for the target, which should not be ...
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
SDValue expandCttzElts(SDNode *Node, SelectionDAG &DAG) const
Expand a CTTZ_ELTS or CTTZ_ELTS_ZERO_POISON by calculating (VL - i) for each active lane (i),...
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual unsigned computeNumSignBitsForTargetInstr(GISelValueTracking &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, EVT *LargestVT=nullptr) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual SDValue unwrapAddress(SDValue N) const
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, UndefPoisonKind Kind, unsigned Depth) const
Return true if this function can prove that Op is never poison and, Kind can be used to track poison ...
SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_POISON nodes.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimumnum/fmaximumnum into multiple comparison with selects.
void forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl, bool Signed, SDValue &Lo, SDValue &Hi, SDValue LHS, SDValue RHS, SDValue HiLHS=SDValue(), SDValue HiRHS=SDValue()) const
Calculate the product twice the width of LHS and RHS.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index, const SDNodeFlags PtrArithFlags=SDNodeFlags()) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
SDValue expandLoopDependenceMask(SDNode *N, SelectionDAG &DAG) const
Expand LOOP_DEPENDENCE_MASK nodes.
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
virtual void computeKnownBitsForTargetInstr(GISelValueTracking &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const
~TargetLowering() override
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using an n/2-bit algorithm.
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isPositionIndependent() const
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
TargetLowering(const TargetLowering &)=delete
virtual bool shouldSimplifyDemandedVectorElts(SDValue Op, const TargetLoweringOpt &TLO) const
Return true if the target supports simplifying demanded vector elements by converting them to undefs.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
virtual bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const
For most targets, an LLVM type must be broken down into multiple smaller types.
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, UndefPoisonKind Kind, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_POISON nodes.
SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const
Expand a vector VECTOR_COMPRESS into a sequence of extract element, store temporarily,...
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
SDValue expandCONVERT_FROM_ARBITRARY_FP(SDNode *Node, SelectionDAG &DAG) const
Expand CONVERT_FROM_ARBITRARY_FP using bit manipulation.
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode, SDNodeFlags Flags={}) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index, const SDNodeFlags PtrArithFlags=SDNodeFlags()) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual void computeKnownFPClassForTargetNode(const SDValue Op, KnownFPClass &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine floating-point class information for a target node.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual void computeKnownFPClassForTargetInstr(GISelValueTracking &Analysis, Register R, KnownFPClass &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]CMP.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue getInboundsVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const
Expand VECTOR_FIND_LAST_ACTIVE nodes.
SDValue expandPartialReduceMLA(SDNode *Node, SelectionDAG &DAG) const
Expands PARTIAL_REDUCE_S/UMLA nodes to a series of simpler operations, consisting of zext/sext,...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad, SelectionDAG &DAG) const
Replace an extraction of a load with a narrowed load.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const
Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
SDValue expandCTLS(SDNode *N, SelectionDAG &DAG) const
Expand CTLS (count leading sign bits) nodes.
void setTypeIdForCallsiteInfo(const CallBase *CB, MachineFunction &MF, MachineFunction::CallSiteInfo &CSInfo) const
Primary interface to the complete machine description for the target machine.
bool isPositionIndependent() const
const Triple & getTargetTriple() const
TargetOptions Options
unsigned EmitCallSiteInfo
The flag enables call site info production.
unsigned EmitCallGraphSection
Emit section containing call graph metadata.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< regclass_iterator > regclasses() const
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition Triple.h:785
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition Type.h:313
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:286
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:370
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:328
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:130
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:110
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:713
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition APInt.cpp:3061
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ PTRADD
PTRADD represents pointer arithmetic semantics, for targets that opt in using shouldPreservePtrArith(...
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ LOOP_DEPENDENCE_RAW_MASK
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition ISDOpcodes.h:538
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:394
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:294
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:522
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:400
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ VECTOR_FIND_LAST_ACTIVE
Finds the index of the last active mask element Operands: Mask.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:910
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:528
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ CLMUL
Carry-less multiplication operations.
Definition ISDOpcodes.h:774
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition ISDOpcodes.h:407
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ CTLZ_ZERO_POISON
Definition ISDOpcodes.h:788
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
@ PARTIAL_REDUCE_FMLA
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ BRIND
BRIND - Indirect branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ GET_ACTIVE_LANE_MASK
GET_ACTIVE_LANE_MASK - this corrosponds to the llvm.get.active.lane.mask intrinsic.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:386
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:356
@ VECTOR_SPLICE_LEFT
VECTOR_SPLICE_LEFT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1, VEC2) left by OFFSET elements an...
Definition ISDOpcodes.h:653
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:899
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:413
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:805
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:328
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:179
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:710
@ VECTOR_SPLICE_RIGHT
VECTOR_SPLICE_RIGHT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1,VEC2) right by OFFSET elements a...
Definition ISDOpcodes.h:657
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:304
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ CTTZ_ZERO_POISON
Bit counting operators with a poisoned result for zero inputs.
Definition ISDOpcodes.h:787
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:921
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:945
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:833
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
@ CTTZ_ELTS_ZERO_POISON
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
LLVM_ABI NodeType getOppositeSignednessMinMaxOpcode(unsigned MinMaxOpc)
Given a MinMaxOpc of ISD::(U|S)MIN or ISD::(U|S)MAX, returns the corresponding opcode with the opposi...
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
LLVM_ABI NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_ABI bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTruncation=false)
Hook for matching ConstantSDNode predicate.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Or< Preds... > m_AnyOf(const Preds &...preds)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
NUses_match< 1, Value_match > m_OneUse()
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:557
void stable_sort(R &&Range)
Definition STLExtras.h:2115
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
InstructionCost Cost
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1565
@ Undef
Value of the register doesn't matter.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
LLVM_ABI bool isOneOrOneSplatFP(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant floating-point value, or a splatted vector of a constant float...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
void * PointerTy
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1547
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:362
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:173
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1776
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ Other
Any other memory.
Definition ModRef.h:68
@ AfterLegalizeTypes
Definition DAGCombine.h:17
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
fltNonfiniteBehavior
Definition APFloat.h:952
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isZeroOrZeroSplat(SDValue N, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
UndefPoisonKind
Enumeration to track whether we are interested in Undef, Poison, or both.
Definition UndefPoison.h:20
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition APFloat.h:1666
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:403
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:129
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:292
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:308
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
ElementCount getVectorElementCount() const
Definition ValueTypes.h:358
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:479
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:251
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:367
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition ValueTypes.h:438
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:486
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:420
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
bool isScalableVT() const
Return true if the type is a scalable type.
Definition ValueTypes.h:195
bool isFixedLengthVector() const
Definition ValueTypes.h:189
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT widenIntegerElementType(LLVMContext &Context) const
Return a VT for an integer element type with doubled bit width.
Definition ValueTypes.h:452
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:182
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
EVT changeElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a type whose attributes match ourselves with the exception of the element type that i...
Definition ValueTypes.h:121
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:316
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:469
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition KnownBits.h:315
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:190
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition KnownBits.h:269
static LLVM_ABI KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:106
bool isZero() const
Returns true if value is all zero.
Definition KnownBits.h:78
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:256
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:64
void setAllConflict()
Make all bits known to be both zero and one.
Definition KnownBits.h:97
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:165
KnownBits byteSwap() const
Definition KnownBits.h:553
static LLVM_ABI std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:303
KnownBits reverseBits() const
Definition KnownBits.h:557
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
Definition KnownBits.h:247
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
static LLVM_ABI KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:176
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:72
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition KnownBits.h:335
bool isSignUnknown() const
Returns true if we don't know the sign bit.
Definition KnownBits.h:67
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:325
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:184
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:146
static LLVM_ABI KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
static LLVM_ABI std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
static LLVM_ABI std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
static LLVM_ABI KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition KnownBits.cpp:61
static LLVM_ABI std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
static LLVM_ABI std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:103
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition KnownBits.h:171
static LLVM_ABI std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
static LLVM_ABI std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:300
static LLVM_ABI std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
static LLVM_ABI KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static bool hasVectorMaskArgument(RTLIB::LibcallImpl Impl)
Returns true if the function has a vector mask argument, which is assumed to be the last argument.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
void setNoSignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This contains information for each constraint that we are lowering.
std::string ConstraintCode
This contains the actual string for the code, like "m".
LLVM_ABI unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
LLVM_ABI bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
MakeLibCallOptions & setIsSigned(bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true, bool AllowWidenOptimization=false)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...
fltNonfiniteBehavior nonFiniteBehavior
Definition APFloat.h:1013
fltNanEncoding nanEncoding
Definition APFloat.h:1015