LLVM 23.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
27#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/MC/MCAsmInfo.h"
32#include "llvm/MC/MCExpr.h"
38#include <cctype>
39#include <deque>
40using namespace llvm;
41using namespace llvm::SDPatternMatch;
42
43/// NOTE: The TargetMachine owns TLOF.
47
48// Define the virtual destructor out-of-line for build efficiency.
50
51const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
52 return nullptr;
53}
54
58
59/// Check whether a given call node is in tail position within its function. If
60/// so, it sets Chain to the input chain of the tail call.
62 SDValue &Chain) const {
64
65 // First, check if tail calls have been disabled in this function.
66 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
67 return false;
68
69 // Conservatively require the attributes of the call to match those of
70 // the return. Ignore following attributes because they don't affect the
71 // call sequence.
72 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
73 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
74 Attribute::DereferenceableOrNull, Attribute::NoAlias,
75 Attribute::NonNull, Attribute::NoUndef,
76 Attribute::Range, Attribute::NoFPClass})
77 CallerAttrs.removeAttribute(Attr);
78
79 if (CallerAttrs.hasAttributes())
80 return false;
81
82 // It's not safe to eliminate the sign / zero extension of the return value.
83 if (CallerAttrs.contains(Attribute::ZExt) ||
84 CallerAttrs.contains(Attribute::SExt))
85 return false;
86
87 // Check if the only use is a function return node.
88 return isUsedByReturnOnly(Node, Chain);
89}
90
92 const uint32_t *CallerPreservedMask,
93 const SmallVectorImpl<CCValAssign> &ArgLocs,
94 const SmallVectorImpl<SDValue> &OutVals) const {
95 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
96 const CCValAssign &ArgLoc = ArgLocs[I];
97 if (!ArgLoc.isRegLoc())
98 continue;
99 MCRegister Reg = ArgLoc.getLocReg();
100 // Only look at callee saved registers.
101 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
102 continue;
103 // Check that we pass the value used for the caller.
104 // (We look for a CopyFromReg reading a virtual register that is used
105 // for the function live-in value of register Reg)
106 SDValue Value = OutVals[I];
107 if (Value->getOpcode() == ISD::AssertZext)
108 Value = Value.getOperand(0);
109 if (Value->getOpcode() != ISD::CopyFromReg)
110 return false;
111 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
112 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
113 return false;
114 }
115 return true;
116}
117
118/// Set CallLoweringInfo attribute flags based on a call instruction
119/// and called function attributes.
121 unsigned ArgIdx) {
122 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
123 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
124 IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
125 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
126 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
127 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
128 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
129 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
130 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
131 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
132 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
133 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
134 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
135 Alignment = Call->getParamStackAlign(ArgIdx);
136 IndirectType = nullptr;
138 "multiple ABI attributes?");
139 if (IsByVal) {
140 IndirectType = Call->getParamByValType(ArgIdx);
141 if (!Alignment)
142 Alignment = Call->getParamAlign(ArgIdx);
143 }
144 if (IsPreallocated)
145 IndirectType = Call->getParamPreallocatedType(ArgIdx);
146 if (IsInAlloca)
147 IndirectType = Call->getParamInAllocaType(ArgIdx);
148 if (IsSRet)
149 IndirectType = Call->getParamStructRetType(ArgIdx);
150}
151
152/// Generate a libcall taking the given operands as arguments and returning a
153/// result of type RetVT.
154std::pair<SDValue, SDValue>
155TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl,
157 MakeLibCallOptions CallOptions, const SDLoc &dl,
158 SDValue InChain) const {
159 if (LibcallImpl == RTLIB::Unsupported)
160 reportFatalInternalError("unsupported library call operation");
161
162 if (!InChain)
163 InChain = DAG.getEntryNode();
164
166 Args.reserve(Ops.size());
167
168 ArrayRef<Type *> OpsTypeOverrides = CallOptions.OpsTypeOverrides;
169 for (unsigned i = 0; i < Ops.size(); ++i) {
170 SDValue NewOp = Ops[i];
171 Type *Ty = i < OpsTypeOverrides.size() && OpsTypeOverrides[i]
172 ? OpsTypeOverrides[i]
173 : NewOp.getValueType().getTypeForEVT(*DAG.getContext());
174 TargetLowering::ArgListEntry Entry(NewOp, Ty);
175 if (CallOptions.IsSoften)
176 Entry.OrigTy =
177 CallOptions.OpsVTBeforeSoften[i].getTypeForEVT(*DAG.getContext());
178
179 Entry.IsSExt =
180 shouldSignExtendTypeInLibCall(Entry.Ty, CallOptions.IsSigned);
181 Entry.IsZExt = !Entry.IsSExt;
182
183 if (CallOptions.IsSoften &&
185 Entry.IsSExt = Entry.IsZExt = false;
186 }
187 Args.push_back(Entry);
188 }
189
190 SDValue Callee =
191 DAG.getExternalSymbol(LibcallImpl, getPointerTy(DAG.getDataLayout()));
192
193 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
194 Type *OrigRetTy = RetTy;
196 bool signExtend = shouldSignExtendTypeInLibCall(RetTy, CallOptions.IsSigned);
197 bool zeroExtend = !signExtend;
198
199 if (CallOptions.IsSoften) {
200 OrigRetTy = CallOptions.RetVTBeforeSoften.getTypeForEVT(*DAG.getContext());
202 signExtend = zeroExtend = false;
203 }
204
205 CLI.setDebugLoc(dl)
206 .setChain(InChain)
207 .setLibCallee(getLibcallImplCallingConv(LibcallImpl), RetTy, OrigRetTy,
208 Callee, std::move(Args))
209 .setNoReturn(CallOptions.DoesNotReturn)
212 .setSExtResult(signExtend)
213 .setZExtResult(zeroExtend);
214 return LowerCallTo(CLI);
215}
216
218 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
219 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
220 const AttributeList &FuncAttributes, EVT *LargestVT) const {
221 EVT VT = getOptimalMemOpType(Context, Op, FuncAttributes);
222
223 if (VT == MVT::Other) {
224 // Use the largest integer type whose alignment constraints are satisfied.
225 VT = MVT::LAST_INTEGER_VALUETYPE;
226 if (Op.isFixedDstAlign()) {
227 bool LoadsFromSrc = Op.isMemcpy() && !Op.isMemcpyStrSrc();
228 while (VT != MVT::i8) {
229 unsigned VTSize = VT.getSizeInBits() / 8;
230 bool DstOk =
231 Op.getDstAlign() >= VTSize ||
232 allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign());
233 bool SrcOk =
234 !LoadsFromSrc || Op.getSrcAlign() >= VTSize ||
235 allowsMisalignedMemoryAccesses(VT, SrcAS, Op.getSrcAlign());
236 if (DstOk && SrcOk)
237 break;
239 }
240 }
241 assert(VT.isInteger());
242
243 // Find the largest legal integer type.
244 MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
245 while (!isTypeLegal(LVT))
246 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
247 assert(LVT.isInteger());
248
249 // If the type we've chosen is larger than the largest legal integer type
250 // then use the largest legal type.
251 if (VT.bitsGT(LVT))
252 VT = LVT;
253 }
254
255 unsigned NumMemOps = 0;
256 uint64_t Size = Op.size();
257 while (Size) {
258 unsigned VTSize = VT.getSizeInBits() / 8;
259 while (VTSize > Size) {
260 // For now, only use non-vector load / store's for the left-over pieces.
261 EVT NewVT = VT;
262 unsigned NewVTSize;
263
264 bool Found = false;
265 if (VT.isVector() || VT.isFloatingPoint()) {
266 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
269 Found = true;
270 else if (NewVT == MVT::i64 &&
272 isSafeMemOpType(MVT::f64)) {
273 // i64 is usually not legal on 32-bit targets, but f64 may be.
274 NewVT = MVT::f64;
275 Found = true;
276 }
277 }
278
279 if (!Found) {
280 do {
281 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
282 if (NewVT == MVT::i8)
283 break;
284 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
285 }
286 NewVTSize = NewVT.getSizeInBits() / 8;
287
288 // If the new VT cannot cover all of the remaining bits, then consider
289 // issuing a (or a pair of) unaligned and overlapping load / store.
290 unsigned Fast;
291 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
293 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
295 Fast)
296 VTSize = Size;
297 else {
298 VT = NewVT;
299 VTSize = NewVTSize;
300 }
301 }
302
303 if (++NumMemOps > Limit)
304 return false;
305
306 MemOps.push_back(VT);
307 Size -= VTSize;
308 }
309
310 return true;
311}
312
313/// Soften the operands of a comparison. This code is shared among BR_CC,
314/// SELECT_CC, and SETCC handlers.
316 SDValue &NewLHS, SDValue &NewRHS,
317 ISD::CondCode &CCCode,
318 const SDLoc &dl, const SDValue OldLHS,
319 const SDValue OldRHS) const {
320 SDValue Chain;
321 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
322 OldRHS, Chain);
323}
324
326 SDValue &NewLHS, SDValue &NewRHS,
327 ISD::CondCode &CCCode,
328 const SDLoc &dl, const SDValue OldLHS,
329 const SDValue OldRHS,
330 SDValue &Chain,
331 bool IsSignaling) const {
332 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
333 // not supporting it. We can update this code when libgcc provides such
334 // functions.
335
336 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
337 && "Unsupported setcc type!");
338
339 // Expand into one or more soft-fp libcall(s).
340 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
341 bool ShouldInvertCC = false;
342 switch (CCCode) {
343 case ISD::SETEQ:
344 case ISD::SETOEQ:
345 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
346 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
347 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
348 break;
349 case ISD::SETNE:
350 case ISD::SETUNE:
351 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
352 (VT == MVT::f64) ? RTLIB::UNE_F64 :
353 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
354 break;
355 case ISD::SETGE:
356 case ISD::SETOGE:
357 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
358 (VT == MVT::f64) ? RTLIB::OGE_F64 :
359 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
360 break;
361 case ISD::SETLT:
362 case ISD::SETOLT:
363 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
364 (VT == MVT::f64) ? RTLIB::OLT_F64 :
365 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
366 break;
367 case ISD::SETLE:
368 case ISD::SETOLE:
369 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
370 (VT == MVT::f64) ? RTLIB::OLE_F64 :
371 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
372 break;
373 case ISD::SETGT:
374 case ISD::SETOGT:
375 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
376 (VT == MVT::f64) ? RTLIB::OGT_F64 :
377 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
378 break;
379 case ISD::SETO:
380 ShouldInvertCC = true;
381 [[fallthrough]];
382 case ISD::SETUO:
383 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
384 (VT == MVT::f64) ? RTLIB::UO_F64 :
385 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
386 break;
387 case ISD::SETONE:
388 // SETONE = O && UNE
389 ShouldInvertCC = true;
390 [[fallthrough]];
391 case ISD::SETUEQ:
392 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
393 (VT == MVT::f64) ? RTLIB::UO_F64 :
394 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
395 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
396 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
397 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
398 break;
399 default:
400 // Invert CC for unordered comparisons
401 ShouldInvertCC = true;
402 switch (CCCode) {
403 case ISD::SETULT:
404 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
405 (VT == MVT::f64) ? RTLIB::OGE_F64 :
406 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
407 break;
408 case ISD::SETULE:
409 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
410 (VT == MVT::f64) ? RTLIB::OGT_F64 :
411 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
412 break;
413 case ISD::SETUGT:
414 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
415 (VT == MVT::f64) ? RTLIB::OLE_F64 :
416 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
417 break;
418 case ISD::SETUGE:
419 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
420 (VT == MVT::f64) ? RTLIB::OLT_F64 :
421 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
422 break;
423 default: llvm_unreachable("Do not know how to soften this setcc!");
424 }
425 }
426
427 // Use the target specific return value for comparison lib calls.
429 SDValue Ops[2] = {NewLHS, NewRHS};
431 EVT OpsVT[2] = { OldLHS.getValueType(),
432 OldRHS.getValueType() };
433 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT);
434 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
435 NewLHS = Call.first;
436 NewRHS = DAG.getConstant(0, dl, RetVT);
437
438 RTLIB::LibcallImpl LC1Impl = getLibcallImpl(LC1);
439 if (LC1Impl == RTLIB::Unsupported) {
441 "no libcall available to soften floating-point compare");
442 }
443
444 CCCode = getSoftFloatCmpLibcallPredicate(LC1Impl);
445 if (ShouldInvertCC) {
446 assert(RetVT.isInteger());
447 CCCode = getSetCCInverse(CCCode, RetVT);
448 }
449
450 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
451 // Update Chain.
452 Chain = Call.second;
453 } else {
454 RTLIB::LibcallImpl LC2Impl = getLibcallImpl(LC2);
455 if (LC2Impl == RTLIB::Unsupported) {
457 "no libcall available to soften floating-point compare");
458 }
459
460 assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
461 "unordered call should be simple boolean");
462
463 EVT SetCCVT =
464 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
466 NewLHS = DAG.getNode(ISD::AssertZext, dl, RetVT, Call.first,
467 DAG.getValueType(MVT::i1));
468 }
469
470 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
471 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
472 CCCode = getSoftFloatCmpLibcallPredicate(LC2Impl);
473 if (ShouldInvertCC)
474 CCCode = getSetCCInverse(CCCode, RetVT);
475 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
476 if (Chain)
477 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
478 Call2.second);
479 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
480 Tmp.getValueType(), Tmp, NewLHS);
481 NewRHS = SDValue();
482 }
483}
484
485/// Return the entry encoding for a jump table in the current function. The
486/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
488 // In non-pic modes, just use the address of a block.
491
492 // Otherwise, use a label difference.
494}
495
497 SelectionDAG &DAG) const {
498 return Table;
499}
500
501/// This returns the relocation base for the given PIC jumptable, the same as
502/// getPICJumpTableRelocBase, but as an MCExpr.
503const MCExpr *
505 unsigned JTI,MCContext &Ctx) const{
506 // The normal PIC reloc base is the label at the start of the jump table.
507 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
508}
509
511 SDValue Addr, int JTI,
512 SelectionDAG &DAG) const {
513 SDValue Chain = Value;
514 // Jump table debug info is only needed if CodeView is enabled.
516 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
517 }
518 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
519}
520
521bool
523 const TargetMachine &TM = getTargetMachine();
524 const GlobalValue *GV = GA->getGlobal();
525
526 // If the address is not even local to this DSO we will have to load it from
527 // a got and then add the offset.
528 if (!TM.shouldAssumeDSOLocal(GV))
529 return false;
530
531 // If the code is position independent we will have to add a base register.
533 return false;
534
535 // Otherwise we can do it.
536 return true;
537}
538
539//===----------------------------------------------------------------------===//
540// Optimization Methods
541//===----------------------------------------------------------------------===//
542
543/// If the specified instruction has a constant integer operand and there are
544/// bits set in that constant that are not demanded, then clear those bits and
545/// return true.
547 const APInt &DemandedBits,
548 const APInt &DemandedElts,
549 TargetLoweringOpt &TLO) const {
550 SDLoc DL(Op);
551 unsigned Opcode = Op.getOpcode();
552
553 // Early-out if we've ended up calling an undemanded node, leave this to
554 // constant folding.
555 if (DemandedBits.isZero() || DemandedElts.isZero())
556 return false;
557
558 // Do target-specific constant optimization.
559 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
560 return TLO.New.getNode();
561
562 // FIXME: ISD::SELECT, ISD::SELECT_CC
563 switch (Opcode) {
564 default:
565 break;
566 case ISD::XOR:
567 case ISD::AND:
568 case ISD::OR: {
569 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
570 if (!Op1C || Op1C->isOpaque())
571 return false;
572
573 // If this is a 'not' op, don't touch it because that's a canonical form.
574 const APInt &C = Op1C->getAPIntValue();
575 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
576 return false;
577
578 if (!C.isSubsetOf(DemandedBits)) {
579 EVT VT = Op.getValueType();
580 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
581 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
582 Op->getFlags());
583 return TLO.CombineTo(Op, NewOp);
584 }
585
586 break;
587 }
588 }
589
590 return false;
591}
592
594 const APInt &DemandedBits,
595 TargetLoweringOpt &TLO) const {
596 EVT VT = Op.getValueType();
597 APInt DemandedElts = VT.isVector()
599 : APInt(1, 1);
600 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
601}
602
603/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
604/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
605/// but it could be generalized for targets with other types of implicit
606/// widening casts.
608 const APInt &DemandedBits,
609 TargetLoweringOpt &TLO) const {
610 assert(Op.getNumOperands() == 2 &&
611 "ShrinkDemandedOp only supports binary operators!");
612 assert(Op.getNode()->getNumValues() == 1 &&
613 "ShrinkDemandedOp only supports nodes with one result!");
614
615 EVT VT = Op.getValueType();
616 SelectionDAG &DAG = TLO.DAG;
617 SDLoc dl(Op);
618
619 // Early return, as this function cannot handle vector types.
620 if (VT.isVector())
621 return false;
622
623 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
624 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
625 "ShrinkDemandedOp only supports operands that have the same size!");
626
627 // Don't do this if the node has another user, which may require the
628 // full value.
629 if (!Op.getNode()->hasOneUse())
630 return false;
631
632 // Search for the smallest integer type with free casts to and from
633 // Op's type. For expedience, just check power-of-2 integer types.
634 unsigned DemandedSize = DemandedBits.getActiveBits();
635 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
636 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
637 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
638 if (isTruncateFree(Op, SmallVT) && isZExtFree(SmallVT, VT)) {
639 // We found a type with free casts.
640
641 // If the operation has the 'disjoint' flag, then the
642 // operands on the new node are also disjoint.
643 SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
645 unsigned Opcode = Op.getOpcode();
646 if (Opcode == ISD::PTRADD) {
647 // It isn't a ptradd anymore if it doesn't operate on the entire
648 // pointer.
649 Opcode = ISD::ADD;
650 }
651 SDValue X = DAG.getNode(
652 Opcode, dl, SmallVT,
653 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
654 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags);
655 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
656 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
657 return TLO.CombineTo(Op, Z);
658 }
659 }
660 return false;
661}
662
664 DAGCombinerInfo &DCI) const {
665 SelectionDAG &DAG = DCI.DAG;
666 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
667 !DCI.isBeforeLegalizeOps());
668 KnownBits Known;
669
670 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
671 if (Simplified) {
672 DCI.AddToWorklist(Op.getNode());
674 }
675 return Simplified;
676}
677
679 const APInt &DemandedElts,
680 DAGCombinerInfo &DCI) const {
681 SelectionDAG &DAG = DCI.DAG;
682 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
683 !DCI.isBeforeLegalizeOps());
684 KnownBits Known;
685
686 bool Simplified =
687 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
688 if (Simplified) {
689 DCI.AddToWorklist(Op.getNode());
691 }
692 return Simplified;
693}
694
696 KnownBits &Known,
698 unsigned Depth,
699 bool AssumeSingleUse) const {
700 EVT VT = Op.getValueType();
701
702 // Since the number of lanes in a scalable vector is unknown at compile time,
703 // we track one bit which is implicitly broadcast to all lanes. This means
704 // that all lanes in a scalable vector are considered demanded.
705 APInt DemandedElts = VT.isFixedLengthVector()
707 : APInt(1, 1);
708 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
709 AssumeSingleUse);
710}
711
712// TODO: Under what circumstances can we create nodes? Constant folding?
714 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
715 SelectionDAG &DAG, unsigned Depth) const {
716 EVT VT = Op.getValueType();
717
718 // Limit search depth.
720 return SDValue();
721
722 // Ignore UNDEFs.
723 if (Op.isUndef())
724 return SDValue();
725
726 // Not demanding any bits/elts from Op.
727 if (DemandedBits == 0 || DemandedElts == 0)
728 return DAG.getUNDEF(VT);
729
730 bool IsLE = DAG.getDataLayout().isLittleEndian();
731 unsigned NumElts = DemandedElts.getBitWidth();
732 unsigned BitWidth = DemandedBits.getBitWidth();
733 KnownBits LHSKnown, RHSKnown;
734 switch (Op.getOpcode()) {
735 case ISD::BITCAST: {
736 if (VT.isScalableVector())
737 return SDValue();
738
739 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
740 EVT SrcVT = Src.getValueType();
741 EVT DstVT = Op.getValueType();
742 if (SrcVT == DstVT)
743 return Src;
744
745 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
746 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
747 if (NumSrcEltBits == NumDstEltBits)
749 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
750 return DAG.getBitcast(DstVT, V);
751
752 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
753 unsigned Scale = NumDstEltBits / NumSrcEltBits;
754 unsigned NumSrcElts = SrcVT.getVectorNumElements();
755 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
756 for (unsigned i = 0; i != Scale; ++i) {
757 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
758 unsigned BitOffset = EltOffset * NumSrcEltBits;
759 DemandedSrcBits |= DemandedBits.extractBits(NumSrcEltBits, BitOffset);
760 }
761 // Recursive calls below may turn not demanded elements into poison, so we
762 // need to demand all smaller source elements that maps to a demanded
763 // destination element.
764 APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
765
767 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
768 return DAG.getBitcast(DstVT, V);
769 }
770
771 // TODO - bigendian once we have test coverage.
772 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
773 unsigned Scale = NumSrcEltBits / NumDstEltBits;
774 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
775 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
776 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
777 for (unsigned i = 0; i != NumElts; ++i)
778 if (DemandedElts[i]) {
779 unsigned Offset = (i % Scale) * NumDstEltBits;
780 DemandedSrcBits.insertBits(DemandedBits, Offset);
781 DemandedSrcElts.setBit(i / Scale);
782 }
783
785 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
786 return DAG.getBitcast(DstVT, V);
787 }
788
789 break;
790 }
791 case ISD::AND: {
792 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
793 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
794
795 // If all of the demanded bits are known 1 on one side, return the other.
796 // These bits cannot contribute to the result of the 'and' in this
797 // context.
798 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
799 return Op.getOperand(0);
800 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
801 return Op.getOperand(1);
802 break;
803 }
804 case ISD::OR: {
805 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
806 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
807
808 // If all of the demanded bits are known zero on one side, return the
809 // other. These bits cannot contribute to the result of the 'or' in this
810 // context.
811 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
812 return Op.getOperand(0);
813 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
814 return Op.getOperand(1);
815 break;
816 }
817 case ISD::XOR: {
818 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
819 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
820
821 // If all of the demanded bits are known zero on one side, return the
822 // other.
823 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
824 return Op.getOperand(0);
825 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
826 return Op.getOperand(1);
827 break;
828 }
829 case ISD::ADD:
830 case ISD::MUL:
831 case ISD::SMIN:
832 case ISD::SMAX:
833 case ISD::UMIN:
834 case ISD::UMAX: {
835 if (DAG.isIdentityElement(Op.getOpcode(), Op->getFlags(), Op.getOperand(1),
836 DemandedElts, 1, Depth + 1))
837 return Op.getOperand(0);
838
839 if (DAG.isIdentityElement(Op.getOpcode(), Op->getFlags(), Op.getOperand(0),
840 DemandedElts, 0, Depth + 1))
841 return Op.getOperand(1);
842 break;
843 }
844 case ISD::SHL: {
845 // If we are only demanding sign bits then we can use the shift source
846 // directly.
847 if (std::optional<unsigned> MaxSA =
848 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
849 SDValue Op0 = Op.getOperand(0);
850 unsigned ShAmt = *MaxSA;
851 unsigned NumSignBits =
852 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
853 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
854 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
855 return Op0;
856 }
857 break;
858 }
859 case ISD::SRL: {
860 // If we are only demanding sign bits then we can use the shift source
861 // directly.
862 if (std::optional<unsigned> MaxSA =
863 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
864 SDValue Op0 = Op.getOperand(0);
865 unsigned ShAmt = *MaxSA;
866 // Must already be signbits in DemandedBits bounds, and can't demand any
867 // shifted in zeroes.
868 if (DemandedBits.countl_zero() >= ShAmt) {
869 unsigned NumSignBits =
870 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
871 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
872 return Op0;
873 }
874 }
875 break;
876 }
877 case ISD::SETCC: {
878 SDValue Op0 = Op.getOperand(0);
879 SDValue Op1 = Op.getOperand(1);
880 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
881 // If (1) we only need the sign-bit, (2) the setcc operands are the same
882 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
883 // -1, we may be able to bypass the setcc.
884 if (DemandedBits.isSignMask() &&
888 // If we're testing X < 0, then this compare isn't needed - just use X!
889 // FIXME: We're limiting to integer types here, but this should also work
890 // if we don't care about FP signed-zero. The use of SETLT with FP means
891 // that we don't care about NaNs.
892 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
894 return Op0;
895 }
896 break;
897 }
899 // If none of the extended bits are demanded, eliminate the sextinreg.
900 SDValue Op0 = Op.getOperand(0);
901 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
902 unsigned ExBits = ExVT.getScalarSizeInBits();
903 if (DemandedBits.getActiveBits() <= ExBits &&
905 return Op0;
906 // If the input is already sign extended, just drop the extension.
907 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
908 if (NumSignBits >= (BitWidth - ExBits + 1))
909 return Op0;
910 break;
911 }
915 if (VT.isScalableVector())
916 return SDValue();
917
918 // If we only want the lowest element and none of extended bits, then we can
919 // return the bitcasted source vector.
920 SDValue Src = Op.getOperand(0);
921 EVT SrcVT = Src.getValueType();
922 EVT DstVT = Op.getValueType();
923 if (IsLE && DemandedElts == 1 &&
924 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
925 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
926 return DAG.getBitcast(DstVT, Src);
927 }
928 break;
929 }
931 if (VT.isScalableVector())
932 return SDValue();
933
934 // If we don't demand the inserted element, return the base vector.
935 SDValue Vec = Op.getOperand(0);
936 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
937 EVT VecVT = Vec.getValueType();
938 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
939 !DemandedElts[CIdx->getZExtValue()])
940 return Vec;
941 break;
942 }
944 if (VT.isScalableVector())
945 return SDValue();
946
947 SDValue Vec = Op.getOperand(0);
948 SDValue Sub = Op.getOperand(1);
949 uint64_t Idx = Op.getConstantOperandVal(2);
950 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
951 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
952 // If we don't demand the inserted subvector, return the base vector.
953 if (DemandedSubElts == 0)
954 return Vec;
955 break;
956 }
957 case ISD::VECTOR_SHUFFLE: {
959 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
960
961 // If all the demanded elts are from one operand and are inline,
962 // then we can use the operand directly.
963 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
964 for (unsigned i = 0; i != NumElts; ++i) {
965 int M = ShuffleMask[i];
966 if (M < 0 || !DemandedElts[i])
967 continue;
968 AllUndef = false;
969 IdentityLHS &= (M == (int)i);
970 IdentityRHS &= ((M - NumElts) == i);
971 }
972
973 if (AllUndef)
974 return DAG.getUNDEF(Op.getValueType());
975 if (IdentityLHS)
976 return Op.getOperand(0);
977 if (IdentityRHS)
978 return Op.getOperand(1);
979 break;
980 }
981 default:
982 // TODO: Probably okay to remove after audit; here to reduce change size
983 // in initial enablement patch for scalable vectors
984 if (VT.isScalableVector())
985 return SDValue();
986
987 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
989 Op, DemandedBits, DemandedElts, DAG, Depth))
990 return V;
991 break;
992 }
993 return SDValue();
994}
995
998 unsigned Depth) const {
999 EVT VT = Op.getValueType();
1000 // Since the number of lanes in a scalable vector is unknown at compile time,
1001 // we track one bit which is implicitly broadcast to all lanes. This means
1002 // that all lanes in a scalable vector are considered demanded.
1003 APInt DemandedElts = VT.isFixedLengthVector()
1005 : APInt(1, 1);
1006 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1007 Depth);
1008}
1009
1011 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
1012 unsigned Depth) const {
1013 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
1014 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1015 Depth);
1016}
1017
1018// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
1019// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
1022 const TargetLowering &TLI,
1023 const APInt &DemandedBits,
1024 const APInt &DemandedElts, unsigned Depth) {
1025 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
1026 "SRL or SRA node is required here!");
1027 // Is the right shift using an immediate value of 1?
1028 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
1029 if (!N1C || !N1C->isOne())
1030 return SDValue();
1031
1032 // We are looking for an avgfloor
1033 // add(ext, ext)
1034 // or one of these as a avgceil
1035 // add(add(ext, ext), 1)
1036 // add(add(ext, 1), ext)
1037 // add(ext, add(ext, 1))
1038 SDValue Add = Op.getOperand(0);
1039 if (Add.getOpcode() != ISD::ADD)
1040 return SDValue();
1041
1042 SDValue ExtOpA = Add.getOperand(0);
1043 SDValue ExtOpB = Add.getOperand(1);
1044 SDValue Add2;
1045 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1046 ConstantSDNode *ConstOp;
1047 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
1048 ConstOp->isOne()) {
1049 ExtOpA = Op1;
1050 ExtOpB = Op3;
1051 Add2 = A;
1052 return true;
1053 }
1054 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
1055 ConstOp->isOne()) {
1056 ExtOpA = Op1;
1057 ExtOpB = Op2;
1058 Add2 = A;
1059 return true;
1060 }
1061 return false;
1062 };
1063 bool IsCeil =
1064 (ExtOpA.getOpcode() == ISD::ADD &&
1065 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1066 (ExtOpB.getOpcode() == ISD::ADD &&
1067 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1068
1069 // If the shift is signed (sra):
1070 // - Needs >= 2 sign bit for both operands.
1071 // - Needs >= 2 zero bits.
1072 // If the shift is unsigned (srl):
1073 // - Needs >= 1 zero bit for both operands.
1074 // - Needs 1 demanded bit zero and >= 2 sign bits.
1075 SelectionDAG &DAG = TLO.DAG;
1076 unsigned ShiftOpc = Op.getOpcode();
1077 bool IsSigned = false;
1078 unsigned KnownBits;
1079 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1080 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1081 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1082 unsigned NumZeroA =
1083 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1084 unsigned NumZeroB =
1085 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1086 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1087
1088 switch (ShiftOpc) {
1089 default:
1090 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1091 case ISD::SRA: {
1092 if (NumZero >= 2 && NumSigned < NumZero) {
1093 IsSigned = false;
1094 KnownBits = NumZero;
1095 break;
1096 }
1097 if (NumSigned >= 1) {
1098 IsSigned = true;
1099 KnownBits = NumSigned;
1100 break;
1101 }
1102 return SDValue();
1103 }
1104 case ISD::SRL: {
1105 if (NumZero >= 1 && NumSigned < NumZero) {
1106 IsSigned = false;
1107 KnownBits = NumZero;
1108 break;
1109 }
1110 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1111 IsSigned = true;
1112 KnownBits = NumSigned;
1113 break;
1114 }
1115 return SDValue();
1116 }
1117 }
1118
1119 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1120 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1121
1122 // Find the smallest power-2 type that is legal for this vector size and
1123 // operation, given the original type size and the number of known sign/zero
1124 // bits.
1125 EVT VT = Op.getValueType();
1126 unsigned MinWidth =
1127 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1128 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1130 return SDValue();
1131 if (VT.isVector())
1132 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1133 if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1134 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1135 // larger type size to do the transform.
1136 if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1137 return SDValue();
1138 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1139 Add.getOperand(1)) &&
1140 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1141 Add2.getOperand(1))))
1142 NVT = VT;
1143 else
1144 return SDValue();
1145 }
1146
1147 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1148 // this is likely to stop other folds (reassociation, value tracking etc.)
1149 if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1150 (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1151 return SDValue();
1152
1153 SDLoc DL(Op);
1154 SDValue ResultAVG =
1155 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1156 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1157 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1158}
1159
1160/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1161/// result of Op are ever used downstream. If we can use this information to
1162/// simplify Op, create a new simplified DAG node and return true, returning the
1163/// original and new nodes in Old and New. Otherwise, analyze the expression and
1164/// return a mask of Known bits for the expression (used to simplify the
1165/// caller). The Known bits may only be accurate for those bits in the
1166/// OriginalDemandedBits and OriginalDemandedElts.
1168 SDValue Op, const APInt &OriginalDemandedBits,
1169 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1170 unsigned Depth, bool AssumeSingleUse) const {
1171 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1172 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1173 "Mask size mismatches value type size!");
1174
1175 // Don't know anything.
1176 Known = KnownBits(BitWidth);
1177
1178 EVT VT = Op.getValueType();
1179 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1180 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1181 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1182 "Unexpected vector size");
1183
1184 APInt DemandedBits = OriginalDemandedBits;
1185 APInt DemandedElts = OriginalDemandedElts;
1186 SDLoc dl(Op);
1187
1188 // Undef operand.
1189 if (Op.isUndef())
1190 return false;
1191
1192 // We can't simplify target constants.
1193 if (Op.getOpcode() == ISD::TargetConstant)
1194 return false;
1195
1196 if (Op.getOpcode() == ISD::Constant) {
1197 // We know all of the bits for a constant!
1198 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1199 return false;
1200 }
1201
1202 if (Op.getOpcode() == ISD::ConstantFP) {
1203 // We know all of the bits for a floating point constant!
1205 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1206 return false;
1207 }
1208
1209 // Other users may use these bits.
1210 bool HasMultiUse = false;
1211 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1213 // Limit search depth.
1214 return false;
1215 }
1216 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1218 DemandedElts = APInt::getAllOnes(NumElts);
1219 HasMultiUse = true;
1220 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1221 // Not demanding any bits/elts from Op.
1222 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1223 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1224 // Limit search depth.
1225 return false;
1226 }
1227
1228 KnownBits Known2;
1229 switch (Op.getOpcode()) {
1230 case ISD::SCALAR_TO_VECTOR: {
1231 if (VT.isScalableVector())
1232 return false;
1233 if (!DemandedElts[0])
1234 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1235
1236 KnownBits SrcKnown;
1237 SDValue Src = Op.getOperand(0);
1238 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1239 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1240 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1241 return true;
1242
1243 // Upper elements are undef, so only get the knownbits if we just demand
1244 // the bottom element.
1245 if (DemandedElts == 1)
1246 Known = SrcKnown.anyextOrTrunc(BitWidth);
1247 break;
1248 }
1249 case ISD::BUILD_VECTOR:
1250 // Collect the known bits that are shared by every demanded element.
1251 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1252 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1253 return false; // Don't fall through, will infinitely loop.
1254 case ISD::SPLAT_VECTOR: {
1255 SDValue Scl = Op.getOperand(0);
1256 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1257 KnownBits KnownScl;
1258 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1259 return true;
1260
1261 // Implicitly truncate the bits to match the official semantics of
1262 // SPLAT_VECTOR.
1263 Known = KnownScl.trunc(BitWidth);
1264 break;
1265 }
1266 case ISD::FREEZE: {
1267 SDValue N0 = Op.getOperand(0);
1269 N0, DemandedElts, UndefPoisonKind::UndefOrPoison, Depth + 1))
1270 return TLO.CombineTo(Op, N0);
1271 break;
1272 }
1273 case ISD::LOAD: {
1274 auto *LD = cast<LoadSDNode>(Op);
1275 if (getTargetConstantFromLoad(LD)) {
1276 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1277 return false; // Don't fall through, will infinitely loop.
1278 }
1279 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1280 // If this is a ZEXTLoad and we are looking at the loaded value.
1281 EVT MemVT = LD->getMemoryVT();
1282 unsigned MemBits = MemVT.getScalarSizeInBits();
1283 Known.Zero.setBitsFrom(MemBits);
1284 return false; // Don't fall through, will infinitely loop.
1285 }
1286 break;
1287 }
1289 if (VT.isScalableVector())
1290 return false;
1291 SDValue Vec = Op.getOperand(0);
1292 SDValue Scl = Op.getOperand(1);
1293 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1294 EVT VecVT = Vec.getValueType();
1295
1296 // If index isn't constant, assume we need all vector elements AND the
1297 // inserted element.
1298 APInt DemandedVecElts(DemandedElts);
1299 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1300 unsigned Idx = CIdx->getZExtValue();
1301 DemandedVecElts.clearBit(Idx);
1302
1303 // Inserted element is not required.
1304 if (!DemandedElts[Idx])
1305 return TLO.CombineTo(Op, Vec);
1306 }
1307
1308 KnownBits KnownScl;
1309 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1310 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1311 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1312 return true;
1313
1314 Known = KnownScl.anyextOrTrunc(BitWidth);
1315
1316 KnownBits KnownVec;
1317 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1318 Depth + 1))
1319 return true;
1320
1321 if (!!DemandedVecElts)
1322 Known = Known.intersectWith(KnownVec);
1323
1324 return false;
1325 }
1326 case ISD::INSERT_SUBVECTOR: {
1327 if (VT.isScalableVector())
1328 return false;
1329 // Demand any elements from the subvector and the remainder from the src its
1330 // inserted into.
1331 SDValue Src = Op.getOperand(0);
1332 SDValue Sub = Op.getOperand(1);
1333 uint64_t Idx = Op.getConstantOperandVal(2);
1334 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1335 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1336 APInt DemandedSrcElts = DemandedElts;
1337 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
1338
1339 KnownBits KnownSub, KnownSrc;
1340 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1341 Depth + 1))
1342 return true;
1343 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1344 Depth + 1))
1345 return true;
1346
1347 Known.setAllConflict();
1348 if (!!DemandedSubElts)
1349 Known = Known.intersectWith(KnownSub);
1350 if (!!DemandedSrcElts)
1351 Known = Known.intersectWith(KnownSrc);
1352
1353 // Attempt to avoid multi-use src if we don't need anything from it.
1354 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1355 !DemandedSrcElts.isAllOnes()) {
1357 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1359 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1360 if (NewSub || NewSrc) {
1361 NewSub = NewSub ? NewSub : Sub;
1362 NewSrc = NewSrc ? NewSrc : Src;
1363 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1364 Op.getOperand(2));
1365 return TLO.CombineTo(Op, NewOp);
1366 }
1367 }
1368 break;
1369 }
1371 if (VT.isScalableVector())
1372 return false;
1373 // Offset the demanded elts by the subvector index.
1374 SDValue Src = Op.getOperand(0);
1375 if (Src.getValueType().isScalableVector())
1376 break;
1377 uint64_t Idx = Op.getConstantOperandVal(1);
1378 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1379 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1380
1381 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1382 Depth + 1))
1383 return true;
1384
1385 // Attempt to avoid multi-use src if we don't need anything from it.
1386 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1388 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1389 if (DemandedSrc) {
1390 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1391 Op.getOperand(1));
1392 return TLO.CombineTo(Op, NewOp);
1393 }
1394 }
1395 break;
1396 }
1397 case ISD::CONCAT_VECTORS: {
1398 if (VT.isScalableVector())
1399 return false;
1400 Known.setAllConflict();
1401 EVT SubVT = Op.getOperand(0).getValueType();
1402 unsigned NumSubVecs = Op.getNumOperands();
1403 unsigned NumSubElts = SubVT.getVectorNumElements();
1404 for (unsigned i = 0; i != NumSubVecs; ++i) {
1405 APInt DemandedSubElts =
1406 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1407 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1408 Known2, TLO, Depth + 1))
1409 return true;
1410 // Known bits are shared by every demanded subvector element.
1411 if (!!DemandedSubElts)
1412 Known = Known.intersectWith(Known2);
1413 }
1414 break;
1415 }
1416 case ISD::VECTOR_SHUFFLE: {
1417 assert(!VT.isScalableVector());
1418 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1419
1420 // Collect demanded elements from shuffle operands..
1421 APInt DemandedLHS, DemandedRHS;
1422 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1423 DemandedRHS))
1424 break;
1425
1426 if (!!DemandedLHS || !!DemandedRHS) {
1427 SDValue Op0 = Op.getOperand(0);
1428 SDValue Op1 = Op.getOperand(1);
1429
1430 Known.setAllConflict();
1431 if (!!DemandedLHS) {
1432 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1433 Depth + 1))
1434 return true;
1435 Known = Known.intersectWith(Known2);
1436 }
1437 if (!!DemandedRHS) {
1438 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1439 Depth + 1))
1440 return true;
1441 Known = Known.intersectWith(Known2);
1442 }
1443
1444 // Attempt to avoid multi-use ops if we don't need anything from them.
1446 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1448 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1449 if (DemandedOp0 || DemandedOp1) {
1450 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1451 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1452 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1453 return TLO.CombineTo(Op, NewOp);
1454 }
1455 }
1456 break;
1457 }
1458 case ISD::AND: {
1459 SDValue Op0 = Op.getOperand(0);
1460 SDValue Op1 = Op.getOperand(1);
1461
1462 // If the RHS is a constant, check to see if the LHS would be zero without
1463 // using the bits from the RHS. Below, we use knowledge about the RHS to
1464 // simplify the LHS, here we're using information from the LHS to simplify
1465 // the RHS.
1466 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1467 // Do not increment Depth here; that can cause an infinite loop.
1468 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1469 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1470 if ((LHSKnown.Zero & DemandedBits) ==
1471 (~RHSC->getAPIntValue() & DemandedBits))
1472 return TLO.CombineTo(Op, Op0);
1473
1474 // If any of the set bits in the RHS are known zero on the LHS, shrink
1475 // the constant.
1476 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1477 DemandedElts, TLO))
1478 return true;
1479
1480 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1481 // constant, but if this 'and' is only clearing bits that were just set by
1482 // the xor, then this 'and' can be eliminated by shrinking the mask of
1483 // the xor. For example, for a 32-bit X:
1484 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1485 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1486 LHSKnown.One == ~RHSC->getAPIntValue()) {
1487 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1488 return TLO.CombineTo(Op, Xor);
1489 }
1490 }
1491
1492 // (X +/- Y) & Y --> ~X & Y when Y is a power of 2 (or zero).
1493 SDValue X, Y;
1494 if (sd_match(Op,
1495 m_And(m_Value(Y),
1497 m_Sub(m_Value(X), m_Deferred(Y)))))) &&
1498 TLO.DAG.isKnownToBeAPowerOfTwo(Y, DemandedElts, /*OrZero=*/true)) {
1499 return TLO.CombineTo(
1500 Op, TLO.DAG.getNode(ISD::AND, dl, VT, TLO.DAG.getNOT(dl, X, VT), Y));
1501 }
1502
1503 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1504 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1505 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1506 (Op0.getOperand(0).isUndef() ||
1508 Op0->hasOneUse()) {
1509 unsigned NumSubElts =
1511 unsigned SubIdx = Op0.getConstantOperandVal(2);
1512 APInt DemandedSub =
1513 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1514 KnownBits KnownSubMask =
1515 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1516 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1517 SDValue NewAnd =
1518 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1519 SDValue NewInsert =
1520 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1521 Op0.getOperand(1), Op0.getOperand(2));
1522 return TLO.CombineTo(Op, NewInsert);
1523 }
1524 }
1525
1526 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1527 Depth + 1))
1528 return true;
1529 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1530 Known2, TLO, Depth + 1))
1531 return true;
1532
1533 // If all of the demanded bits are known one on one side, return the other.
1534 // These bits cannot contribute to the result of the 'and'.
1535 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1536 return TLO.CombineTo(Op, Op0);
1537 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1538 return TLO.CombineTo(Op, Op1);
1539 // If all of the demanded bits in the inputs are known zeros, return zero.
1540 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1541 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1542 // If the RHS is a constant, see if we can simplify it.
1543 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1544 TLO))
1545 return true;
1546 // If the operation can be done in a smaller type, do so.
1548 return true;
1549
1550 // Attempt to avoid multi-use ops if we don't need anything from them.
1551 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1553 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1555 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1556 if (DemandedOp0 || DemandedOp1) {
1557 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1558 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1559 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1560 return TLO.CombineTo(Op, NewOp);
1561 }
1562 }
1563
1564 Known &= Known2;
1565 break;
1566 }
1567 case ISD::OR: {
1568 SDValue Op0 = Op.getOperand(0);
1569 SDValue Op1 = Op.getOperand(1);
1570 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1571 Depth + 1)) {
1572 Op->dropFlags(SDNodeFlags::Disjoint);
1573 return true;
1574 }
1575
1576 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1577 Known2, TLO, Depth + 1)) {
1578 Op->dropFlags(SDNodeFlags::Disjoint);
1579 return true;
1580 }
1581
1582 // If all of the demanded bits are known zero on one side, return the other.
1583 // These bits cannot contribute to the result of the 'or'.
1584 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1585 return TLO.CombineTo(Op, Op0);
1586 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1587 return TLO.CombineTo(Op, Op1);
1588 // If the RHS is a constant, see if we can simplify it.
1589 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1590 return true;
1591 // If the operation can be done in a smaller type, do so.
1593 return true;
1594
1595 // Attempt to avoid multi-use ops if we don't need anything from them.
1596 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1598 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1600 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1601 if (DemandedOp0 || DemandedOp1) {
1602 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1603 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1604 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1605 return TLO.CombineTo(Op, NewOp);
1606 }
1607 }
1608
1609 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1610 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1611 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1612 Op0->hasOneUse() && Op1->hasOneUse()) {
1613 // Attempt to match all commutations - m_c_Or would've been useful!
1614 for (int I = 0; I != 2; ++I) {
1615 SDValue X = Op.getOperand(I).getOperand(0);
1616 SDValue C1 = Op.getOperand(I).getOperand(1);
1617 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1618 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1619 if (Alt.getOpcode() == ISD::OR) {
1620 for (int J = 0; J != 2; ++J) {
1621 if (X == Alt.getOperand(J)) {
1622 SDValue Y = Alt.getOperand(1 - J);
1623 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1624 {C1, C2})) {
1625 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1626 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1627 return TLO.CombineTo(
1628 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1629 }
1630 }
1631 }
1632 }
1633 }
1634 }
1635
1636 Known |= Known2;
1637 break;
1638 }
1639 case ISD::XOR: {
1640 SDValue Op0 = Op.getOperand(0);
1641 SDValue Op1 = Op.getOperand(1);
1642
1643 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1644 Depth + 1))
1645 return true;
1646 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1647 Depth + 1))
1648 return true;
1649
1650 // If all of the demanded bits are known zero on one side, return the other.
1651 // These bits cannot contribute to the result of the 'xor'.
1652 if (DemandedBits.isSubsetOf(Known.Zero))
1653 return TLO.CombineTo(Op, Op0);
1654 if (DemandedBits.isSubsetOf(Known2.Zero))
1655 return TLO.CombineTo(Op, Op1);
1656 // If the operation can be done in a smaller type, do so.
1658 return true;
1659
1660 // If all of the unknown bits are known to be zero on one side or the other
1661 // turn this into an *inclusive* or.
1662 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1663 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1664 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1665
1666 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1667 if (C) {
1668 // If one side is a constant, and all of the set bits in the constant are
1669 // also known set on the other side, turn this into an AND, as we know
1670 // the bits will be cleared.
1671 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1672 // NB: it is okay if more bits are known than are requested
1673 if (C->getAPIntValue() == Known2.One) {
1674 SDValue ANDC =
1675 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1676 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1677 }
1678
1679 // If the RHS is a constant, see if we can change it. Don't alter a -1
1680 // constant because that's a 'not' op, and that is better for combining
1681 // and codegen.
1682 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1683 // We're flipping all demanded bits. Flip the undemanded bits too.
1684 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1685 return TLO.CombineTo(Op, New);
1686 }
1687
1688 unsigned Op0Opcode = Op0.getOpcode();
1689 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1690 if (ConstantSDNode *ShiftC =
1691 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1692 // Don't crash on an oversized shift. We can not guarantee that a
1693 // bogus shift has been simplified to undef.
1694 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1695 uint64_t ShiftAmt = ShiftC->getZExtValue();
1697 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1698 : Ones.lshr(ShiftAmt);
1699 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1701 // If the xor constant is a demanded mask, do a 'not' before the
1702 // shift:
1703 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1704 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1705 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1706 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1707 Op0.getOperand(1)));
1708 }
1709 }
1710 }
1711 }
1712 }
1713
1714 // If we can't turn this into a 'not', try to shrink the constant.
1715 if (!C || !C->isAllOnes())
1716 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1717 return true;
1718
1719 // Attempt to avoid multi-use ops if we don't need anything from them.
1720 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1722 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1724 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1725 if (DemandedOp0 || DemandedOp1) {
1726 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1727 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1728 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1729 return TLO.CombineTo(Op, NewOp);
1730 }
1731 }
1732
1733 Known ^= Known2;
1734 break;
1735 }
1736 case ISD::SELECT:
1737 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1738 Known, TLO, Depth + 1))
1739 return true;
1740 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1741 Known2, TLO, Depth + 1))
1742 return true;
1743
1744 // If the operands are constants, see if we can simplify them.
1745 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1746 return true;
1747
1748 // Only known if known in both the LHS and RHS.
1749 Known = Known.intersectWith(Known2);
1750 break;
1751 case ISD::VSELECT:
1752 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1753 Known, TLO, Depth + 1))
1754 return true;
1755 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1756 Known2, TLO, Depth + 1))
1757 return true;
1758
1759 // Only known if known in both the LHS and RHS.
1760 Known = Known.intersectWith(Known2);
1761 break;
1762 case ISD::SELECT_CC:
1763 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1764 Known, TLO, Depth + 1))
1765 return true;
1766 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1767 Known2, TLO, Depth + 1))
1768 return true;
1769
1770 // If the operands are constants, see if we can simplify them.
1771 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1772 return true;
1773
1774 // Only known if known in both the LHS and RHS.
1775 Known = Known.intersectWith(Known2);
1776 break;
1777 case ISD::SETCC: {
1778 SDValue Op0 = Op.getOperand(0);
1779 SDValue Op1 = Op.getOperand(1);
1780 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1781 // If we're testing X < 0, X >= 0, X <= -1 or X > -1
1782 // (X is of integer type) then we only need the sign mask of the previous
1783 // result
1784 if (Op1.getValueType().isInteger() &&
1785 (((CC == ISD::SETLT || CC == ISD::SETGE) && isNullOrNullSplat(Op1)) ||
1786 ((CC == ISD::SETLE || CC == ISD::SETGT) &&
1787 isAllOnesOrAllOnesSplat(Op1)))) {
1788 KnownBits KnownOp0;
1791 DemandedElts, KnownOp0, TLO, Depth + 1))
1792 return true;
1793 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1794 // width as the setcc result, and (3) the result of a setcc conforms to 0
1795 // or -1, we may be able to bypass the setcc.
1796 if (DemandedBits.isSignMask() &&
1800 // If we remove a >= 0 or > -1 (for integers), we need to introduce a
1801 // NOT Operation
1802 if (CC == ISD::SETGE || CC == ISD::SETGT) {
1803 SDLoc DL(Op);
1804 EVT VT = Op0.getValueType();
1805 SDValue NotOp0 = TLO.DAG.getNOT(DL, Op0, VT);
1806 return TLO.CombineTo(Op, NotOp0);
1807 }
1808 return TLO.CombineTo(Op, Op0);
1809 }
1810 }
1811 if (getBooleanContents(Op0.getValueType()) ==
1813 BitWidth > 1)
1814 Known.Zero.setBitsFrom(1);
1815 break;
1816 }
1817 case ISD::SHL: {
1818 SDValue Op0 = Op.getOperand(0);
1819 SDValue Op1 = Op.getOperand(1);
1820 EVT ShiftVT = Op1.getValueType();
1821
1822 if (std::optional<unsigned> KnownSA =
1823 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1824 unsigned ShAmt = *KnownSA;
1825 if (ShAmt == 0)
1826 return TLO.CombineTo(Op, Op0);
1827
1828 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1829 // single shift. We can do this if the bottom bits (which are shifted
1830 // out) are never demanded.
1831 // TODO - support non-uniform vector amounts.
1832 if (Op0.getOpcode() == ISD::SRL) {
1833 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1834 if (std::optional<unsigned> InnerSA =
1835 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1836 unsigned C1 = *InnerSA;
1837 unsigned Opc = ISD::SHL;
1838 int Diff = ShAmt - C1;
1839 if (Diff < 0) {
1840 Diff = -Diff;
1841 Opc = ISD::SRL;
1842 }
1843 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1844 return TLO.CombineTo(
1845 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1846 }
1847 }
1848 }
1849
1850 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1851 // are not demanded. This will likely allow the anyext to be folded away.
1852 // TODO - support non-uniform vector amounts.
1853 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1854 SDValue InnerOp = Op0.getOperand(0);
1855 EVT InnerVT = InnerOp.getValueType();
1856 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1857 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1858 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1859 SDValue NarrowShl = TLO.DAG.getNode(
1860 ISD::SHL, dl, InnerVT, InnerOp,
1861 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1862 return TLO.CombineTo(
1863 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1864 }
1865
1866 // Repeat the SHL optimization above in cases where an extension
1867 // intervenes: (shl (anyext (shr x, c1)), c2) to
1868 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1869 // aren't demanded (as above) and that the shifted upper c1 bits of
1870 // x aren't demanded.
1871 // TODO - support non-uniform vector amounts.
1872 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1873 InnerOp.hasOneUse()) {
1874 if (std::optional<unsigned> SA2 = TLO.DAG.getValidShiftAmount(
1875 InnerOp, DemandedElts, Depth + 2)) {
1876 unsigned InnerShAmt = *SA2;
1877 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1878 DemandedBits.getActiveBits() <=
1879 (InnerBits - InnerShAmt + ShAmt) &&
1880 DemandedBits.countr_zero() >= ShAmt) {
1881 SDValue NewSA =
1882 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1883 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1884 InnerOp.getOperand(0));
1885 return TLO.CombineTo(
1886 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1887 }
1888 }
1889 }
1890 }
1891
1892 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1893 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1894 Depth + 1)) {
1895 // Disable the nsw and nuw flags. We can no longer guarantee that we
1896 // won't wrap after simplification.
1897 Op->dropFlags(SDNodeFlags::NoWrap);
1898 return true;
1899 }
1900 Known <<= ShAmt;
1901 // low bits known zero.
1902 Known.Zero.setLowBits(ShAmt);
1903
1904 // Attempt to avoid multi-use ops if we don't need anything from them.
1905 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1907 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1908 if (DemandedOp0) {
1909 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1910 return TLO.CombineTo(Op, NewOp);
1911 }
1912 }
1913
1914 // TODO: Can we merge this fold with the one below?
1915 // Try shrinking the operation as long as the shift amount will still be
1916 // in range.
1917 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1918 Op.getNode()->hasOneUse()) {
1919 // Search for the smallest integer type with free casts to and from
1920 // Op's type. For expedience, just check power-of-2 integer types.
1921 unsigned DemandedSize = DemandedBits.getActiveBits();
1922 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1923 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1924 EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1925 if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&
1926 isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1927 isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1928 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1929 assert(DemandedSize <= SmallVTBits &&
1930 "Narrowed below demanded bits?");
1931 // We found a type with free casts.
1932 SDValue NarrowShl = TLO.DAG.getNode(
1933 ISD::SHL, dl, SmallVT,
1934 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1935 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1936 return TLO.CombineTo(
1937 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1938 }
1939 }
1940 }
1941
1942 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1943 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1944 // Only do this if we demand the upper half so the knownbits are correct.
1945 unsigned HalfWidth = BitWidth / 2;
1946 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1947 DemandedBits.countLeadingOnes() >= HalfWidth) {
1948 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1949 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1950 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1951 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1952 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1953 // If we're demanding the upper bits at all, we must ensure
1954 // that the upper bits of the shift result are known to be zero,
1955 // which is equivalent to the narrow shift being NUW.
1956 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1957 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1958 SDNodeFlags Flags;
1959 Flags.setNoSignedWrap(IsNSW);
1960 Flags.setNoUnsignedWrap(IsNUW);
1961 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1962 SDValue NewShiftAmt =
1963 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1964 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1965 NewShiftAmt, Flags);
1966 SDValue NewExt =
1967 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1968 return TLO.CombineTo(Op, NewExt);
1969 }
1970 }
1971 }
1972 } else {
1973 // This is a variable shift, so we can't shift the demand mask by a known
1974 // amount. But if we are not demanding high bits, then we are not
1975 // demanding those bits from the pre-shifted operand either.
1976 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1977 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1978 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1979 Depth + 1)) {
1980 // Disable the nsw and nuw flags. We can no longer guarantee that we
1981 // won't wrap after simplification.
1982 Op->dropFlags(SDNodeFlags::NoWrap);
1983 return true;
1984 }
1985 Known.resetAll();
1986 }
1987 }
1988
1989 // If we are only demanding sign bits then we can use the shift source
1990 // directly.
1991 if (std::optional<unsigned> MaxSA =
1992 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1993 unsigned ShAmt = *MaxSA;
1994 unsigned NumSignBits =
1995 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1996 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1997 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1998 return TLO.CombineTo(Op, Op0);
1999 }
2000 break;
2001 }
2002 case ISD::SRL: {
2003 SDValue Op0 = Op.getOperand(0);
2004 SDValue Op1 = Op.getOperand(1);
2005 EVT ShiftVT = Op1.getValueType();
2006
2007 if (std::optional<unsigned> KnownSA =
2008 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2009 unsigned ShAmt = *KnownSA;
2010 if (ShAmt == 0)
2011 return TLO.CombineTo(Op, Op0);
2012
2013 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
2014 // single shift. We can do this if the top bits (which are shifted out)
2015 // are never demanded.
2016 // TODO - support non-uniform vector amounts.
2017 if (Op0.getOpcode() == ISD::SHL) {
2018 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
2019 if (std::optional<unsigned> InnerSA =
2020 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2021 unsigned C1 = *InnerSA;
2022 unsigned Opc = ISD::SRL;
2023 int Diff = ShAmt - C1;
2024 if (Diff < 0) {
2025 Diff = -Diff;
2026 Opc = ISD::SHL;
2027 }
2028 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
2029 return TLO.CombineTo(
2030 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
2031 }
2032 }
2033 }
2034
2035 // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
2036 // single sra. We can do this if the top bits are never demanded.
2037 if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
2038 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
2039 if (std::optional<unsigned> InnerSA =
2040 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2041 unsigned C1 = *InnerSA;
2042 // Clamp the combined shift amount if it exceeds the bit width.
2043 unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
2044 SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
2045 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
2046 Op0.getOperand(0), NewSA));
2047 }
2048 }
2049 }
2050
2051 APInt InDemandedMask = (DemandedBits << ShAmt);
2052
2053 // If the shift is exact, then it does demand the low bits (and knows that
2054 // they are zero).
2055 if (Op->getFlags().hasExact())
2056 InDemandedMask.setLowBits(ShAmt);
2057
2058 // Narrow shift to lower half - similar to ShrinkDemandedOp.
2059 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2060 if ((BitWidth % 2) == 0 && !VT.isVector()) {
2062 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
2063 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
2064 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
2065 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
2066 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
2067 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2068 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
2069 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
2070 SDValue NewShiftAmt =
2071 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
2072 SDValue NewShift =
2073 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
2074 return TLO.CombineTo(
2075 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
2076 }
2077 }
2078
2079 // Compute the new bits that are at the top now.
2080 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2081 Depth + 1))
2082 return true;
2083 Known >>= ShAmt;
2084 // High bits known zero.
2085 Known.Zero.setHighBits(ShAmt);
2086
2087 // Attempt to avoid multi-use ops if we don't need anything from them.
2088 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2090 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2091 if (DemandedOp0) {
2092 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2093 return TLO.CombineTo(Op, NewOp);
2094 }
2095 }
2096 } else {
2097 // Use generic knownbits computation as it has support for non-uniform
2098 // shift amounts.
2099 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2100 }
2101
2102 // If we are only demanding sign bits then we can use the shift source
2103 // directly.
2104 if (std::optional<unsigned> MaxSA =
2105 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
2106 unsigned ShAmt = *MaxSA;
2107 // Must already be signbits in DemandedBits bounds, and can't demand any
2108 // shifted in zeroes.
2109 if (DemandedBits.countl_zero() >= ShAmt) {
2110 unsigned NumSignBits =
2111 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
2112 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2113 return TLO.CombineTo(Op, Op0);
2114 }
2115 }
2116
2117 // Try to match AVG patterns (after shift simplification).
2118 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2119 DemandedElts, Depth + 1))
2120 return TLO.CombineTo(Op, AVG);
2121
2122 break;
2123 }
2124 case ISD::SRA: {
2125 SDValue Op0 = Op.getOperand(0);
2126 SDValue Op1 = Op.getOperand(1);
2127 EVT ShiftVT = Op1.getValueType();
2128
2129 // If we only want bits that already match the signbit then we don't need
2130 // to shift.
2131 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2132 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2133 NumHiDemandedBits)
2134 return TLO.CombineTo(Op, Op0);
2135
2136 // If this is an arithmetic shift right and only the low-bit is set, we can
2137 // always convert this into a logical shr, even if the shift amount is
2138 // variable. The low bit of the shift cannot be an input sign bit unless
2139 // the shift amount is >= the size of the datatype, which is undefined.
2140 if (DemandedBits.isOne())
2141 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2142
2143 if (std::optional<unsigned> KnownSA =
2144 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2145 unsigned ShAmt = *KnownSA;
2146 if (ShAmt == 0)
2147 return TLO.CombineTo(Op, Op0);
2148
2149 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2150 // supports sext_inreg.
2151 if (Op0.getOpcode() == ISD::SHL) {
2152 if (std::optional<unsigned> InnerSA =
2153 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2154 unsigned LowBits = BitWidth - ShAmt;
2155 EVT ExtVT = VT.changeElementType(
2156 *TLO.DAG.getContext(),
2157 EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits));
2158
2159 if (*InnerSA == ShAmt) {
2160 if (!TLO.LegalOperations() ||
2162 return TLO.CombineTo(
2163 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2164 Op0.getOperand(0),
2165 TLO.DAG.getValueType(ExtVT)));
2166
2167 // Even if we can't convert to sext_inreg, we might be able to
2168 // remove this shift pair if the input is already sign extended.
2169 unsigned NumSignBits =
2170 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2171 if (NumSignBits > ShAmt)
2172 return TLO.CombineTo(Op, Op0.getOperand(0));
2173 }
2174 }
2175 }
2176
2177 APInt InDemandedMask = (DemandedBits << ShAmt);
2178
2179 // If the shift is exact, then it does demand the low bits (and knows that
2180 // they are zero).
2181 if (Op->getFlags().hasExact())
2182 InDemandedMask.setLowBits(ShAmt);
2183
2184 // If any of the demanded bits are produced by the sign extension, we also
2185 // demand the input sign bit.
2186 if (DemandedBits.countl_zero() < ShAmt)
2187 InDemandedMask.setSignBit();
2188
2189 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2190 Depth + 1))
2191 return true;
2192 Known >>= ShAmt;
2193
2194 // If the input sign bit is known to be zero, or if none of the top bits
2195 // are demanded, turn this into an unsigned shift right.
2196 if (Known.Zero[BitWidth - ShAmt - 1] ||
2197 DemandedBits.countl_zero() >= ShAmt) {
2198 SDNodeFlags Flags;
2199 Flags.setExact(Op->getFlags().hasExact());
2200 return TLO.CombineTo(
2201 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2202 }
2203
2204 int Log2 = DemandedBits.exactLogBase2();
2205 if (Log2 >= 0) {
2206 // The bit must come from the sign.
2207 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2208 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2209 }
2210
2211 if (Known.One[BitWidth - ShAmt - 1])
2212 // New bits are known one.
2213 Known.One.setHighBits(ShAmt);
2214
2215 // Attempt to avoid multi-use ops if we don't need anything from them.
2216 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2218 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2219 if (DemandedOp0) {
2220 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2221 return TLO.CombineTo(Op, NewOp);
2222 }
2223 }
2224 }
2225
2226 // Try to match AVG patterns (after shift simplification).
2227 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2228 DemandedElts, Depth + 1))
2229 return TLO.CombineTo(Op, AVG);
2230
2231 break;
2232 }
2233 case ISD::FSHL:
2234 case ISD::FSHR: {
2235 SDValue Op0 = Op.getOperand(0);
2236 SDValue Op1 = Op.getOperand(1);
2237 SDValue Op2 = Op.getOperand(2);
2238 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2239
2240 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2241 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2242
2243 // For fshl, 0-shift returns the 1st arg.
2244 // For fshr, 0-shift returns the 2nd arg.
2245 if (Amt == 0) {
2246 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2247 Known, TLO, Depth + 1))
2248 return true;
2249 break;
2250 }
2251
2252 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2253 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2254 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2255 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2256 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2257 Depth + 1))
2258 return true;
2259 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2260 Depth + 1))
2261 return true;
2262
2263 Known2 <<= (IsFSHL ? Amt : (BitWidth - Amt));
2264 Known >>= (IsFSHL ? (BitWidth - Amt) : Amt);
2265 Known = Known.unionWith(Known2);
2266
2267 // Attempt to avoid multi-use ops if we don't need anything from them.
2268 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2269 !DemandedElts.isAllOnes()) {
2271 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2273 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2274 if (DemandedOp0 || DemandedOp1) {
2275 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2276 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2277 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2278 DemandedOp1, Op2);
2279 return TLO.CombineTo(Op, NewOp);
2280 }
2281 }
2282 }
2283
2284 if (isPowerOf2_32(BitWidth)) {
2285 // Fold FSHR(Op0,Op1,Op2) -> SRL(Op1,Op2)
2286 // iff we're guaranteed not to use Op0.
2287 // TODO: Add FSHL equivalent?
2288 if (!IsFSHL && !DemandedBits.isAllOnes() &&
2289 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT))) {
2290 KnownBits KnownAmt =
2291 TLO.DAG.computeKnownBits(Op2, DemandedElts, Depth + 1);
2292 unsigned MaxShiftAmt =
2293 KnownAmt.getMaxValue().getLimitedValue(BitWidth - 1);
2294 // Check we don't demand any shifted bits outside Op1.
2295 if (DemandedBits.countl_zero() >= MaxShiftAmt) {
2296 EVT AmtVT = Op2.getValueType();
2297 SDValue NewAmt =
2298 TLO.DAG.getNode(ISD::AND, dl, AmtVT, Op2,
2299 TLO.DAG.getConstant(BitWidth - 1, dl, AmtVT));
2300 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, Op1, NewAmt);
2301 return TLO.CombineTo(Op, NewOp);
2302 }
2303 }
2304
2305 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2306 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2307 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts, Known2, TLO,
2308 Depth + 1))
2309 return true;
2310 }
2311 break;
2312 }
2313 case ISD::ROTL:
2314 case ISD::ROTR: {
2315 SDValue Op0 = Op.getOperand(0);
2316 SDValue Op1 = Op.getOperand(1);
2317 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2318
2319 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2320 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2321 return TLO.CombineTo(Op, Op0);
2322
2323 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2324 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2325 unsigned RevAmt = BitWidth - Amt;
2326
2327 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2328 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2329 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2330 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2331 Depth + 1))
2332 return true;
2333
2334 // rot*(x, 0) --> x
2335 if (Amt == 0)
2336 return TLO.CombineTo(Op, Op0);
2337
2338 // See if we don't demand either half of the rotated bits.
2339 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2340 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2341 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2342 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2343 }
2344 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2345 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2346 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2347 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2348 }
2349 }
2350
2351 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2352 if (isPowerOf2_32(BitWidth)) {
2353 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2354 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2355 Depth + 1))
2356 return true;
2357 }
2358 break;
2359 }
2360 case ISD::SMIN:
2361 case ISD::SMAX:
2362 case ISD::UMIN:
2363 case ISD::UMAX: {
2364 unsigned Opc = Op.getOpcode();
2365 SDValue Op0 = Op.getOperand(0);
2366 SDValue Op1 = Op.getOperand(1);
2367
2368 // If we're only demanding signbits, then we can simplify to OR/AND node.
2369 unsigned BitOp =
2370 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2371 unsigned NumSignBits =
2372 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2373 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2374 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2375 if (NumSignBits >= NumDemandedUpperBits)
2376 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2377
2378 // Check if one arg is always less/greater than (or equal) to the other arg.
2379 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2380 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2381 switch (Opc) {
2382 case ISD::SMIN:
2383 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2384 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2385 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2386 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2387 Known = KnownBits::smin(Known0, Known1);
2388 break;
2389 case ISD::SMAX:
2390 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2391 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2392 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2393 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2394 Known = KnownBits::smax(Known0, Known1);
2395 break;
2396 case ISD::UMIN:
2397 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2398 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2399 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2400 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2401 Known = KnownBits::umin(Known0, Known1);
2402 break;
2403 case ISD::UMAX:
2404 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2405 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2406 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2407 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2408 Known = KnownBits::umax(Known0, Known1);
2409 break;
2410 }
2411 break;
2412 }
2413 case ISD::BITREVERSE: {
2414 SDValue Src = Op.getOperand(0);
2415 APInt DemandedSrcBits = DemandedBits.reverseBits();
2416 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2417 Depth + 1))
2418 return true;
2419 Known = Known2.reverseBits();
2420 break;
2421 }
2422 case ISD::BSWAP: {
2423 SDValue Src = Op.getOperand(0);
2424
2425 // If the only bits demanded come from one byte of the bswap result,
2426 // just shift the input byte into position to eliminate the bswap.
2427 unsigned NLZ = DemandedBits.countl_zero();
2428 unsigned NTZ = DemandedBits.countr_zero();
2429
2430 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2431 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2432 // have 14 leading zeros, round to 8.
2433 NLZ = alignDown(NLZ, 8);
2434 NTZ = alignDown(NTZ, 8);
2435 // If we need exactly one byte, we can do this transformation.
2436 if (BitWidth - NLZ - NTZ == 8) {
2437 // Replace this with either a left or right shift to get the byte into
2438 // the right place.
2439 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2440 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2441 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2442 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2443 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2444 return TLO.CombineTo(Op, NewOp);
2445 }
2446 }
2447
2448 APInt DemandedSrcBits = DemandedBits.byteSwap();
2449 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2450 Depth + 1))
2451 return true;
2452 Known = Known2.byteSwap();
2453 break;
2454 }
2455 case ISD::CTPOP: {
2456 // If only 1 bit is demanded, replace with PARITY as long as we're before
2457 // op legalization.
2458 // FIXME: Limit to scalars for now.
2459 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2460 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2461 Op.getOperand(0)));
2462
2463 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2464 break;
2465 }
2466 case ISD::PDEP: {
2467 SDValue Op0 = Op.getOperand(0);
2468 SDValue Op1 = Op.getOperand(1);
2469
2470 unsigned DemandedBitsLZ = OriginalDemandedBits.countl_zero();
2471 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2472
2473 // If the demanded bits has leading zeroes, we don't demand those from the
2474 // mask.
2475 if (SimplifyDemandedBits(Op1, LoMask, Known, TLO, Depth + 1))
2476 return true;
2477
2478 // The number of possible 1s in the mask determines the number of LSBs of
2479 // operand 0 used. Undemanded bits from the mask don't matter so filter
2480 // them before counting.
2481 KnownBits Known2;
2482 uint64_t Count = (~Known.Zero & LoMask).popcount();
2483 APInt DemandedMask(APInt::getLowBitsSet(BitWidth, Count));
2484 if (SimplifyDemandedBits(Op0, DemandedMask, Known2, TLO, Depth + 1))
2485 return true;
2486
2487 // Zeroes are retained from the mask, but not ones.
2488 Known.One.clearAllBits();
2489 // The result will have at least as many trailing zeros as the non-mask
2490 // operand since bits can only map to the same or higher bit position.
2491 Known.Zero.setLowBits(Known2.countMinTrailingZeros());
2492 break;
2493 }
2495 SDValue Op0 = Op.getOperand(0);
2496 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2497 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2498
2499 // If we only care about the highest bit, don't bother shifting right.
2500 if (DemandedBits.isSignMask()) {
2501 unsigned MinSignedBits =
2502 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2503 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2504 // However if the input is already sign extended we expect the sign
2505 // extension to be dropped altogether later and do not simplify.
2506 if (!AlreadySignExtended) {
2507 // Compute the correct shift amount type, which must be getShiftAmountTy
2508 // for scalar types after legalization.
2509 SDValue ShiftAmt =
2510 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2511 return TLO.CombineTo(Op,
2512 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2513 }
2514 }
2515
2516 // If none of the extended bits are demanded, eliminate the sextinreg.
2517 if (DemandedBits.getActiveBits() <= ExVTBits)
2518 return TLO.CombineTo(Op, Op0);
2519
2520 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2521
2522 // Since the sign extended bits are demanded, we know that the sign
2523 // bit is demanded.
2524 InputDemandedBits.setBit(ExVTBits - 1);
2525
2526 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2527 Depth + 1))
2528 return true;
2529
2530 // If the sign bit of the input is known set or clear, then we know the
2531 // top bits of the result.
2532
2533 // If the input sign bit is known zero, convert this into a zero extension.
2534 if (Known.Zero[ExVTBits - 1])
2535 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2536
2537 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2538 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2539 Known.One.setBitsFrom(ExVTBits);
2540 Known.Zero &= Mask;
2541 } else { // Input sign bit unknown
2542 Known.Zero &= Mask;
2543 Known.One &= Mask;
2544 }
2545 break;
2546 }
2547 case ISD::BUILD_PAIR: {
2548 EVT HalfVT = Op.getOperand(0).getValueType();
2549 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2550
2551 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2552 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2553
2554 KnownBits KnownLo, KnownHi;
2555
2556 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2557 return true;
2558
2559 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2560 return true;
2561
2562 Known = KnownHi.concat(KnownLo);
2563 break;
2564 }
2566 if (VT.isScalableVector())
2567 return false;
2568 [[fallthrough]];
2569 case ISD::ZERO_EXTEND: {
2570 SDValue Src = Op.getOperand(0);
2571 EVT SrcVT = Src.getValueType();
2572 unsigned InBits = SrcVT.getScalarSizeInBits();
2573 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2574 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2575
2576 // If none of the top bits are demanded, convert this into an any_extend.
2577 if (DemandedBits.getActiveBits() <= InBits) {
2578 // If we only need the non-extended bits of the bottom element
2579 // then we can just bitcast to the result.
2580 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2581 VT.getSizeInBits() == SrcVT.getSizeInBits())
2582 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2583
2584 unsigned Opc =
2586 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2587 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2588 }
2589
2590 APInt InDemandedBits = DemandedBits.trunc(InBits);
2591 APInt InDemandedElts = DemandedElts.zext(InElts);
2592 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2593 Depth + 1)) {
2594 Op->dropFlags(SDNodeFlags::NonNeg);
2595 return true;
2596 }
2597 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2598 Known = Known.zext(BitWidth);
2599
2600 // Attempt to avoid multi-use ops if we don't need anything from them.
2602 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2603 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2604 break;
2605 }
2607 if (VT.isScalableVector())
2608 return false;
2609 [[fallthrough]];
2610 case ISD::SIGN_EXTEND: {
2611 SDValue Src = Op.getOperand(0);
2612 EVT SrcVT = Src.getValueType();
2613 unsigned InBits = SrcVT.getScalarSizeInBits();
2614 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2615 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2616
2617 APInt InDemandedElts = DemandedElts.zext(InElts);
2618 APInt InDemandedBits = DemandedBits.trunc(InBits);
2619
2620 // Since some of the sign extended bits are demanded, we know that the sign
2621 // bit is demanded.
2622 InDemandedBits.setBit(InBits - 1);
2623
2624 // If none of the top bits are demanded, convert this into an any_extend.
2625 if (DemandedBits.getActiveBits() <= InBits) {
2626 // If we only need the non-extended bits of the bottom element
2627 // then we can just bitcast to the result.
2628 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2629 VT.getSizeInBits() == SrcVT.getSizeInBits())
2630 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2631
2632 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2634 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2635 InBits) {
2636 unsigned Opc =
2638 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2639 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2640 }
2641 }
2642
2643 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2644 Depth + 1))
2645 return true;
2646 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2647
2648 // If the sign bit is known one, the top bits match.
2649 Known = Known.sext(BitWidth);
2650
2651 // If the sign bit is known zero, convert this to a zero extend.
2652 if (Known.isNonNegative()) {
2653 unsigned Opc =
2655 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2656 SDNodeFlags Flags;
2657 if (!IsVecInReg)
2658 Flags |= SDNodeFlags::NonNeg;
2659 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2660 }
2661 }
2662
2663 // Attempt to avoid multi-use ops if we don't need anything from them.
2665 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2666 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2667 break;
2668 }
2670 if (VT.isScalableVector())
2671 return false;
2672 [[fallthrough]];
2673 case ISD::ANY_EXTEND: {
2674 SDValue Src = Op.getOperand(0);
2675 EVT SrcVT = Src.getValueType();
2676 unsigned InBits = SrcVT.getScalarSizeInBits();
2677 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2678 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2679
2680 // If we only need the bottom element then we can just bitcast.
2681 // TODO: Handle ANY_EXTEND?
2682 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2683 VT.getSizeInBits() == SrcVT.getSizeInBits())
2684 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2685
2686 APInt InDemandedBits = DemandedBits.trunc(InBits);
2687 APInt InDemandedElts = DemandedElts.zext(InElts);
2688 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2689 Depth + 1))
2690 return true;
2691 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2692 Known = Known.anyext(BitWidth);
2693
2694 // Attempt to avoid multi-use ops if we don't need anything from them.
2696 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2697 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2698 break;
2699 }
2700 case ISD::TRUNCATE: {
2701 SDValue Src = Op.getOperand(0);
2702
2703 // Simplify the input, using demanded bit information, and compute the known
2704 // zero/one bits live out.
2705 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2706 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2707 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2708 Depth + 1)) {
2709 // Disable the nsw and nuw flags. We can no longer guarantee that we
2710 // won't wrap after simplification.
2711 Op->dropFlags(SDNodeFlags::NoWrap);
2712 return true;
2713 }
2714 Known = Known.trunc(BitWidth);
2715
2716 // Attempt to avoid multi-use ops if we don't need anything from them.
2718 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2719 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2720
2721 // If the input is only used by this truncate, see if we can shrink it based
2722 // on the known demanded bits.
2723 switch (Src.getOpcode()) {
2724 default:
2725 break;
2726 case ISD::SRL:
2727 // Shrink SRL by a constant if none of the high bits shifted in are
2728 // demanded.
2729 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2730 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2731 // undesirable.
2732 break;
2733
2734 if (Src.getNode()->hasOneUse()) {
2735 if (isTruncateFree(Src, VT) &&
2736 !isTruncateFree(Src.getValueType(), VT)) {
2737 // If truncate is only free at trunc(srl), do not turn it into
2738 // srl(trunc). The check is done by first check the truncate is free
2739 // at Src's opcode(srl), then check the truncate is not done by
2740 // referencing sub-register. In test, if both trunc(srl) and
2741 // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2742 // trunc(srl)'s trunc is free, trunc(srl) is better.
2743 break;
2744 }
2745
2746 std::optional<unsigned> ShAmtC =
2747 TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2748 if (!ShAmtC || *ShAmtC >= BitWidth)
2749 break;
2750 unsigned ShVal = *ShAmtC;
2751
2752 APInt HighBits =
2753 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2754 HighBits.lshrInPlace(ShVal);
2755 HighBits = HighBits.trunc(BitWidth);
2756 if (!(HighBits & DemandedBits)) {
2757 // None of the shifted in bits are needed. Add a truncate of the
2758 // shift input, then shift it.
2759 SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2760 SDValue NewTrunc =
2761 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2762 return TLO.CombineTo(
2763 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2764 }
2765 }
2766 break;
2767 }
2768
2769 break;
2770 }
2771 case ISD::AssertZext: {
2772 // AssertZext demands all of the high bits, plus any of the low bits
2773 // demanded by its users.
2774 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2776 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2777 TLO, Depth + 1))
2778 return true;
2779
2780 Known.Zero |= ~InMask;
2781 Known.One &= (~Known.Zero);
2782 break;
2783 }
2785 SDValue Src = Op.getOperand(0);
2786 SDValue Idx = Op.getOperand(1);
2787 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2788 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2789
2790 if (SrcEltCnt.isScalable())
2791 return false;
2792
2793 // Demand the bits from every vector element without a constant index.
2794 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2795 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2796 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2797 if (CIdx->getAPIntValue().ult(NumSrcElts))
2798 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2799
2800 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2801 // anything about the extended bits.
2802 APInt DemandedSrcBits = DemandedBits;
2803 if (BitWidth > EltBitWidth)
2804 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2805
2806 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2807 Depth + 1))
2808 return true;
2809
2810 // Attempt to avoid multi-use ops if we don't need anything from them.
2811 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2812 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2813 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2814 SDValue NewOp =
2815 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2816 return TLO.CombineTo(Op, NewOp);
2817 }
2818 }
2819
2820 Known = Known2;
2821 if (BitWidth > EltBitWidth)
2822 Known = Known.anyext(BitWidth);
2823 break;
2824 }
2825 case ISD::BITCAST: {
2826 if (VT.isScalableVector())
2827 return false;
2828 SDValue Src = Op.getOperand(0);
2829 EVT SrcVT = Src.getValueType();
2830 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2831
2832 // If this is an FP->Int bitcast and if the sign bit is the only
2833 // thing demanded, turn this into a FGETSIGN.
2834 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2835 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2836 SrcVT.isFloatingPoint()) {
2838 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2839 // place. We expect the SHL to be eliminated by other optimizations.
2840 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, VT, Src);
2841 unsigned ShVal = Op.getValueSizeInBits() - 1;
2842 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2843 return TLO.CombineTo(Op,
2844 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2845 }
2846 }
2847
2848 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2849 // Demand the elt/bit if any of the original elts/bits are demanded.
2850 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2851 unsigned Scale = BitWidth / NumSrcEltBits;
2852 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2853 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2854 for (unsigned i = 0; i != Scale; ++i) {
2855 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2856 unsigned BitOffset = EltOffset * NumSrcEltBits;
2857 DemandedSrcBits |= DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2858 }
2859 // Recursive calls below may turn not demanded elements into poison, so we
2860 // need to demand all smaller source elements that maps to a demanded
2861 // destination element.
2862 APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
2863
2864 APInt KnownSrcUndef, KnownSrcZero;
2865 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2866 KnownSrcZero, TLO, Depth + 1))
2867 return true;
2868
2869 KnownBits KnownSrcBits;
2870 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2871 KnownSrcBits, TLO, Depth + 1))
2872 return true;
2873 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2874 // TODO - bigendian once we have test coverage.
2875 unsigned Scale = NumSrcEltBits / BitWidth;
2876 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2877 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2878 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2879 for (unsigned i = 0; i != NumElts; ++i)
2880 if (DemandedElts[i]) {
2881 unsigned Offset = (i % Scale) * BitWidth;
2882 DemandedSrcBits.insertBits(DemandedBits, Offset);
2883 DemandedSrcElts.setBit(i / Scale);
2884 }
2885
2886 if (SrcVT.isVector()) {
2887 APInt KnownSrcUndef, KnownSrcZero;
2888 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2889 KnownSrcZero, TLO, Depth + 1))
2890 return true;
2891 }
2892
2893 KnownBits KnownSrcBits;
2894 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2895 KnownSrcBits, TLO, Depth + 1))
2896 return true;
2897
2898 // Attempt to avoid multi-use ops if we don't need anything from them.
2899 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2900 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2901 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2902 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2903 return TLO.CombineTo(Op, NewOp);
2904 }
2905 }
2906 }
2907
2908 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2909 // recursive call where Known may be useful to the caller.
2910 if (Depth > 0) {
2911 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2912 return false;
2913 }
2914 break;
2915 }
2916 case ISD::MUL:
2917 if (DemandedBits.isPowerOf2()) {
2918 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2919 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2920 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2921 unsigned CTZ = DemandedBits.countr_zero();
2922 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2923 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2924 SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2925 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2926 return TLO.CombineTo(Op, Shl);
2927 }
2928 }
2929 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2930 // X * X is odd iff X is odd.
2931 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2932 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2933 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2934 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2935 return TLO.CombineTo(Op, And1);
2936 }
2937 [[fallthrough]];
2938 case ISD::PTRADD:
2939 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
2940 break;
2941 // PTRADD behaves like ADD if pointers are represented as integers.
2942 [[fallthrough]];
2943 case ISD::ADD:
2944 case ISD::SUB: {
2945 // Add, Sub, and Mul don't demand any bits in positions beyond that
2946 // of the highest bit demanded of them.
2947 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2948 SDNodeFlags Flags = Op.getNode()->getFlags();
2949 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2950 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2951 KnownBits KnownOp0, KnownOp1;
2952 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2953 const KnownBits &KnownRHS) {
2954 if (Op.getOpcode() == ISD::MUL)
2955 Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2956 return Demanded;
2957 };
2958 if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2959 Depth + 1) ||
2960 SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2961 DemandedElts, KnownOp0, TLO, Depth + 1) ||
2962 // See if the operation should be performed at a smaller bit width.
2964 // Disable the nsw and nuw flags. We can no longer guarantee that we
2965 // won't wrap after simplification.
2966 Op->dropFlags(SDNodeFlags::NoWrap);
2967 return true;
2968 }
2969
2970 // neg x with only low bit demanded is simply x.
2971 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2972 isNullConstant(Op0))
2973 return TLO.CombineTo(Op, Op1);
2974
2975 // Attempt to avoid multi-use ops if we don't need anything from them.
2976 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2978 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2980 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2981 if (DemandedOp0 || DemandedOp1) {
2982 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2983 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2984 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
2985 Flags & ~SDNodeFlags::NoWrap);
2986 return TLO.CombineTo(Op, NewOp);
2987 }
2988 }
2989
2990 // If we have a constant operand, we may be able to turn it into -1 if we
2991 // do not demand the high bits. This can make the constant smaller to
2992 // encode, allow more general folding, or match specialized instruction
2993 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2994 // is probably not useful (and could be detrimental).
2996 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2997 if (C && !C->isAllOnes() && !C->isOne() &&
2998 (C->getAPIntValue() | HighMask).isAllOnes()) {
2999 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
3000 // Disable the nsw and nuw flags. We can no longer guarantee that we
3001 // won't wrap after simplification.
3002 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1,
3003 Flags & ~SDNodeFlags::NoWrap);
3004 return TLO.CombineTo(Op, NewOp);
3005 }
3006
3007 // Match a multiply with a disguised negated-power-of-2 and convert to a
3008 // an equivalent shift-left amount.
3009 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
3010 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
3011 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
3012 return 0;
3013
3014 // Don't touch opaque constants. Also, ignore zero and power-of-2
3015 // multiplies. Those will get folded later.
3016 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
3017 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
3018 !MulC->getAPIntValue().isPowerOf2()) {
3019 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
3020 if (UnmaskedC.isNegatedPowerOf2())
3021 return (-UnmaskedC).logBase2();
3022 }
3023 return 0;
3024 };
3025
3026 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
3027 unsigned ShlAmt) {
3028 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
3029 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
3030 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
3031 return TLO.CombineTo(Op, Res);
3032 };
3033
3035 if (Op.getOpcode() == ISD::ADD) {
3036 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
3037 if (unsigned ShAmt = getShiftLeftAmt(Op0))
3038 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
3039 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
3040 if (unsigned ShAmt = getShiftLeftAmt(Op1))
3041 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
3042 }
3043 if (Op.getOpcode() == ISD::SUB) {
3044 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
3045 if (unsigned ShAmt = getShiftLeftAmt(Op1))
3046 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
3047 }
3048 }
3049
3050 if (Op.getOpcode() == ISD::MUL) {
3051 Known = KnownBits::mul(KnownOp0, KnownOp1);
3052 } else { // Op.getOpcode() is either ISD::ADD, ISD::PTRADD, or ISD::SUB.
3054 Op.getOpcode() != ISD::SUB, Flags.hasNoSignedWrap(),
3055 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
3056 }
3057 break;
3058 }
3059 case ISD::FABS: {
3060 SDValue Op0 = Op.getOperand(0);
3061 APInt SignMask = APInt::getSignMask(BitWidth);
3062
3063 if (!DemandedBits.intersects(SignMask))
3064 return TLO.CombineTo(Op, Op0);
3065
3066 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
3067 Depth + 1))
3068 return true;
3069
3070 if (Known.isNonNegative())
3071 return TLO.CombineTo(Op, Op0);
3072 if (Known.isNegative())
3073 return TLO.CombineTo(
3074 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT, Op0, Op->getFlags()));
3075
3076 Known.Zero |= SignMask;
3077 Known.One &= ~SignMask;
3078
3079 break;
3080 }
3081 case ISD::FCOPYSIGN: {
3082 SDValue Op0 = Op.getOperand(0);
3083 SDValue Op1 = Op.getOperand(1);
3084
3085 unsigned BitWidth0 = Op0.getScalarValueSizeInBits();
3086 unsigned BitWidth1 = Op1.getScalarValueSizeInBits();
3087 APInt SignMask0 = APInt::getSignMask(BitWidth0);
3088 APInt SignMask1 = APInt::getSignMask(BitWidth1);
3089
3090 if (!DemandedBits.intersects(SignMask0))
3091 return TLO.CombineTo(Op, Op0);
3092
3093 if (SimplifyDemandedBits(Op0, ~SignMask0 & DemandedBits, DemandedElts,
3094 Known, TLO, Depth + 1) ||
3095 SimplifyDemandedBits(Op1, SignMask1, DemandedElts, Known2, TLO,
3096 Depth + 1))
3097 return true;
3098
3099 if (Known2.isNonNegative())
3100 return TLO.CombineTo(
3101 Op, TLO.DAG.getNode(ISD::FABS, dl, VT, Op0, Op->getFlags()));
3102
3103 if (Known2.isNegative())
3104 return TLO.CombineTo(
3105 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT,
3106 TLO.DAG.getNode(ISD::FABS, SDLoc(Op0), VT, Op0)));
3107
3108 Known.Zero &= ~SignMask0;
3109 Known.One &= ~SignMask0;
3110 break;
3111 }
3112 case ISD::FNEG: {
3113 SDValue Op0 = Op.getOperand(0);
3114 APInt SignMask = APInt::getSignMask(BitWidth);
3115
3116 if (!DemandedBits.intersects(SignMask))
3117 return TLO.CombineTo(Op, Op0);
3118
3119 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
3120 Depth + 1))
3121 return true;
3122
3123 if (!Known.isSignUnknown()) {
3124 Known.Zero ^= SignMask;
3125 Known.One ^= SignMask;
3126 }
3127
3128 break;
3129 }
3130 default:
3131 // We also ask the target about intrinsics (which could be specific to it).
3132 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3133 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3134 // TODO: Probably okay to remove after audit; here to reduce change size
3135 // in initial enablement patch for scalable vectors
3136 if (Op.getValueType().isScalableVector())
3137 break;
3139 Known, TLO, Depth))
3140 return true;
3141 break;
3142 }
3143
3144 // Just use computeKnownBits to compute output bits.
3145 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
3146 break;
3147 }
3148
3149 // If we know the value of all of the demanded bits, return this as a
3150 // constant.
3152 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
3153 // Avoid folding to a constant if any OpaqueConstant is involved.
3154 if (llvm::any_of(Op->ops(), [](SDValue V) {
3155 auto *C = dyn_cast<ConstantSDNode>(V);
3156 return C && C->isOpaque();
3157 }))
3158 return false;
3159 if (VT.isInteger())
3160 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
3161 if (VT.isFloatingPoint())
3162 return TLO.CombineTo(
3163 Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
3164 dl, VT));
3165 }
3166
3167 // A multi use 'all demanded elts' simplify failed to find any knownbits.
3168 // Try again just for the original demanded elts.
3169 // Ensure we do this AFTER constant folding above.
3170 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3171 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
3172
3173 return false;
3174}
3175
3177 const APInt &DemandedElts,
3178 DAGCombinerInfo &DCI) const {
3179 SelectionDAG &DAG = DCI.DAG;
3180 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3181 !DCI.isBeforeLegalizeOps());
3182
3183 APInt KnownUndef, KnownZero;
3184 bool Simplified =
3185 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
3186 if (Simplified) {
3187 DCI.AddToWorklist(Op.getNode());
3188 DCI.CommitTargetLoweringOpt(TLO);
3189 }
3190
3191 return Simplified;
3192}
3193
3194/// Given a vector binary operation and known undefined elements for each input
3195/// operand, compute whether each element of the output is undefined.
3197 const APInt &UndefOp0,
3198 const APInt &UndefOp1) {
3199 EVT VT = BO.getValueType();
3201 "Vector binop only");
3202
3203 EVT EltVT = VT.getVectorElementType();
3204 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3205 assert(UndefOp0.getBitWidth() == NumElts &&
3206 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3207
3208 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3209 const APInt &UndefVals) {
3210 if (UndefVals[Index])
3211 return DAG.getUNDEF(EltVT);
3212
3213 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3214 // Try hard to make sure that the getNode() call is not creating temporary
3215 // nodes. Ignore opaque integers because they do not constant fold.
3216 SDValue Elt = BV->getOperand(Index);
3217 auto *C = dyn_cast<ConstantSDNode>(Elt);
3218 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3219 return Elt;
3220 }
3221
3222 return SDValue();
3223 };
3224
3225 APInt KnownUndef = APInt::getZero(NumElts);
3226 for (unsigned i = 0; i != NumElts; ++i) {
3227 // If both inputs for this element are either constant or undef and match
3228 // the element type, compute the constant/undef result for this element of
3229 // the vector.
3230 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3231 // not handle FP constants. The code within getNode() should be refactored
3232 // to avoid the danger of creating a bogus temporary node here.
3233 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3234 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3235 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3236 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3237 KnownUndef.setBit(i);
3238 }
3239 return KnownUndef;
3240}
3241
3243 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3244 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3245 bool AssumeSingleUse) const {
3246 EVT VT = Op.getValueType();
3247 unsigned Opcode = Op.getOpcode();
3248 APInt DemandedElts = OriginalDemandedElts;
3249 unsigned NumElts = DemandedElts.getBitWidth();
3250 assert(VT.isVector() && "Expected vector op");
3251
3252 KnownUndef = KnownZero = APInt::getZero(NumElts);
3253
3255 return false;
3256
3257 // TODO: For now we assume we know nothing about scalable vectors.
3258 if (VT.isScalableVector())
3259 return false;
3260
3261 assert(VT.getVectorNumElements() == NumElts &&
3262 "Mask size mismatches value type element count!");
3263
3264 // Undef operand.
3265 if (Op.isUndef()) {
3266 KnownUndef.setAllBits();
3267 return false;
3268 }
3269
3270 // If Op has other users, assume that all elements are needed.
3271 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3272 DemandedElts.setAllBits();
3273
3274 // Not demanding any elements from Op.
3275 if (DemandedElts == 0) {
3276 KnownUndef.setAllBits();
3277 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3278 }
3279
3280 // Limit search depth.
3282 return false;
3283
3284 SDLoc DL(Op);
3285 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3286 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3287
3288 // Helper for demanding the specified elements and all the bits of both binary
3289 // operands.
3290 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3291 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3292 TLO.DAG, Depth + 1);
3293 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3294 TLO.DAG, Depth + 1);
3295 if (NewOp0 || NewOp1) {
3296 SDValue NewOp =
3297 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3298 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3299 return TLO.CombineTo(Op, NewOp);
3300 }
3301 return false;
3302 };
3303
3304 switch (Opcode) {
3305 case ISD::SCALAR_TO_VECTOR: {
3306 if (!DemandedElts[0]) {
3307 KnownUndef.setAllBits();
3308 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3309 }
3310 KnownUndef.setHighBits(NumElts - 1);
3311 break;
3312 }
3313 case ISD::BITCAST: {
3314 SDValue Src = Op.getOperand(0);
3315 EVT SrcVT = Src.getValueType();
3316
3317 if (!SrcVT.isVector()) {
3318 // TODO - bigendian once we have test coverage.
3319 if (IsLE) {
3320 APInt DemandedSrcBits = APInt::getZero(SrcVT.getSizeInBits());
3321 unsigned EltSize = VT.getScalarSizeInBits();
3322 for (unsigned I = 0; I != NumElts; ++I) {
3323 if (DemandedElts[I]) {
3324 unsigned Offset = I * EltSize;
3325 DemandedSrcBits.setBits(Offset, Offset + EltSize);
3326 }
3327 }
3328 KnownBits Known;
3329 if (SimplifyDemandedBits(Src, DemandedSrcBits, Known, TLO, Depth + 1))
3330 return true;
3331 }
3332 break;
3333 }
3334
3335 // Fast handling of 'identity' bitcasts.
3336 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3337 if (NumSrcElts == NumElts)
3338 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3339 KnownZero, TLO, Depth + 1);
3340
3341 APInt SrcDemandedElts, SrcZero, SrcUndef;
3342
3343 // Bitcast from 'large element' src vector to 'small element' vector, we
3344 // must demand a source element if any DemandedElt maps to it.
3345 if ((NumElts % NumSrcElts) == 0) {
3346 unsigned Scale = NumElts / NumSrcElts;
3347 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3348 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3349 TLO, Depth + 1))
3350 return true;
3351
3352 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3353 // of the large element.
3354 // TODO - bigendian once we have test coverage.
3355 if (IsLE) {
3356 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3357 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3358 for (unsigned i = 0; i != NumElts; ++i)
3359 if (DemandedElts[i]) {
3360 unsigned Ofs = (i % Scale) * EltSizeInBits;
3361 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3362 }
3363
3364 KnownBits Known;
3365 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3366 TLO, Depth + 1))
3367 return true;
3368
3369 // The bitcast has split each wide element into a number of
3370 // narrow subelements. We have just computed the Known bits
3371 // for wide elements. See if element splitting results in
3372 // some subelements being zero. Only for demanded elements!
3373 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3374 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3375 .isAllOnes())
3376 continue;
3377 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3378 unsigned Elt = Scale * SrcElt + SubElt;
3379 if (DemandedElts[Elt])
3380 KnownZero.setBit(Elt);
3381 }
3382 }
3383 }
3384
3385 // If the src element is zero/undef then all the output elements will be -
3386 // only demanded elements are guaranteed to be correct.
3387 for (unsigned i = 0; i != NumSrcElts; ++i) {
3388 if (SrcDemandedElts[i]) {
3389 if (SrcZero[i])
3390 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3391 if (SrcUndef[i])
3392 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3393 }
3394 }
3395 }
3396
3397 // Bitcast from 'small element' src vector to 'large element' vector, we
3398 // demand all smaller source elements covered by the larger demanded element
3399 // of this vector.
3400 if ((NumSrcElts % NumElts) == 0) {
3401 unsigned Scale = NumSrcElts / NumElts;
3402 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3403 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3404 TLO, Depth + 1))
3405 return true;
3406
3407 // If all the src elements covering an output element are zero/undef, then
3408 // the output element will be as well, assuming it was demanded.
3409 for (unsigned i = 0; i != NumElts; ++i) {
3410 if (DemandedElts[i]) {
3411 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3412 KnownZero.setBit(i);
3413 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3414 KnownUndef.setBit(i);
3415 }
3416 }
3417 }
3418 break;
3419 }
3420 case ISD::FREEZE: {
3421 SDValue N0 = Op.getOperand(0);
3423 N0, DemandedElts, UndefPoisonKind::UndefOrPoison, Depth + 1))
3424 return TLO.CombineTo(Op, N0);
3425
3426 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3427 // freeze(op(x, ...)) -> op(freeze(x), ...).
3428 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3429 return TLO.CombineTo(
3431 TLO.DAG.getFreeze(N0.getOperand(0))));
3432 break;
3433 }
3434 case ISD::BUILD_VECTOR: {
3435 // Check all elements and simplify any unused elements with UNDEF.
3436 if (!DemandedElts.isAllOnes()) {
3437 // Don't simplify BROADCASTS.
3438 if (llvm::any_of(Op->op_values(),
3439 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3441 bool Updated = false;
3442 for (unsigned i = 0; i != NumElts; ++i) {
3443 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3444 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3445 KnownUndef.setBit(i);
3446 Updated = true;
3447 }
3448 }
3449 if (Updated)
3450 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3451 }
3452 }
3453 for (unsigned i = 0; i != NumElts; ++i) {
3454 SDValue SrcOp = Op.getOperand(i);
3455 if (SrcOp.isUndef()) {
3456 KnownUndef.setBit(i);
3457 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3459 KnownZero.setBit(i);
3460 }
3461 }
3462 break;
3463 }
3464 case ISD::CONCAT_VECTORS: {
3465 EVT SubVT = Op.getOperand(0).getValueType();
3466 unsigned NumSubVecs = Op.getNumOperands();
3467 unsigned NumSubElts = SubVT.getVectorNumElements();
3468 for (unsigned i = 0; i != NumSubVecs; ++i) {
3469 SDValue SubOp = Op.getOperand(i);
3470 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3471 APInt SubUndef, SubZero;
3472 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3473 Depth + 1))
3474 return true;
3475 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3476 KnownZero.insertBits(SubZero, i * NumSubElts);
3477 }
3478
3479 // Attempt to avoid multi-use ops if we don't need anything from them.
3480 if (!DemandedElts.isAllOnes()) {
3481 bool FoundNewSub = false;
3482 SmallVector<SDValue, 2> DemandedSubOps;
3483 for (unsigned i = 0; i != NumSubVecs; ++i) {
3484 SDValue SubOp = Op.getOperand(i);
3485 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3487 SubOp, SubElts, TLO.DAG, Depth + 1);
3488 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3489 FoundNewSub = NewSubOp ? true : FoundNewSub;
3490 }
3491 if (FoundNewSub) {
3492 SDValue NewOp =
3493 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3494 return TLO.CombineTo(Op, NewOp);
3495 }
3496 }
3497 break;
3498 }
3499 case ISD::INSERT_SUBVECTOR: {
3500 // Demand any elements from the subvector and the remainder from the src it
3501 // is inserted into.
3502 SDValue Src = Op.getOperand(0);
3503 SDValue Sub = Op.getOperand(1);
3504 uint64_t Idx = Op.getConstantOperandVal(2);
3505 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3506 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3507 APInt DemandedSrcElts = DemandedElts;
3508 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
3509
3510 // If none of the sub operand elements are demanded, bypass the insert.
3511 if (!DemandedSubElts)
3512 return TLO.CombineTo(Op, Src);
3513
3514 APInt SubUndef, SubZero;
3515 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3516 Depth + 1))
3517 return true;
3518
3519 // If none of the src operand elements are demanded, replace it with undef.
3520 if (!DemandedSrcElts && !Src.isUndef())
3521 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3522 TLO.DAG.getUNDEF(VT), Sub,
3523 Op.getOperand(2)));
3524
3525 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3526 TLO, Depth + 1))
3527 return true;
3528 KnownUndef.insertBits(SubUndef, Idx);
3529 KnownZero.insertBits(SubZero, Idx);
3530
3531 // Attempt to avoid multi-use ops if we don't need anything from them.
3532 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3534 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3536 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3537 if (NewSrc || NewSub) {
3538 NewSrc = NewSrc ? NewSrc : Src;
3539 NewSub = NewSub ? NewSub : Sub;
3540 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3541 NewSub, Op.getOperand(2));
3542 return TLO.CombineTo(Op, NewOp);
3543 }
3544 }
3545 break;
3546 }
3548 // Offset the demanded elts by the subvector index.
3549 SDValue Src = Op.getOperand(0);
3550 if (Src.getValueType().isScalableVector())
3551 break;
3552 uint64_t Idx = Op.getConstantOperandVal(1);
3553 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3554 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3555
3556 APInt SrcUndef, SrcZero;
3557 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3558 Depth + 1))
3559 return true;
3560 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3561 KnownZero = SrcZero.extractBits(NumElts, Idx);
3562
3563 // Attempt to avoid multi-use ops if we don't need anything from them.
3564 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(Src, DemandedSrcElts,
3565 TLO.DAG, Depth + 1);
3566 if (NewSrc) {
3567 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3568 Op.getOperand(1));
3569 return TLO.CombineTo(Op, NewOp);
3570 }
3571 break;
3572 }
3574 SDValue Vec = Op.getOperand(0);
3575 SDValue Scl = Op.getOperand(1);
3576 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3577
3578 // For a legal, constant insertion index, if we don't need this insertion
3579 // then strip it, else remove it from the demanded elts.
3580 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3581 unsigned Idx = CIdx->getZExtValue();
3582 if (!DemandedElts[Idx])
3583 return TLO.CombineTo(Op, Vec);
3584
3585 APInt DemandedVecElts(DemandedElts);
3586 DemandedVecElts.clearBit(Idx);
3587 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3588 KnownZero, TLO, Depth + 1))
3589 return true;
3590
3591 KnownUndef.setBitVal(Idx, Scl.isUndef());
3592
3593 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3594 break;
3595 }
3596
3597 APInt VecUndef, VecZero;
3598 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3599 Depth + 1))
3600 return true;
3601 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3602 break;
3603 }
3604 case ISD::VSELECT: {
3605 SDValue Sel = Op.getOperand(0);
3606 SDValue LHS = Op.getOperand(1);
3607 SDValue RHS = Op.getOperand(2);
3608
3609 // Try to transform the select condition based on the current demanded
3610 // elements.
3611 APInt UndefSel, ZeroSel;
3612 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3613 Depth + 1))
3614 return true;
3615
3616 // See if we can simplify either vselect operand.
3617 APInt DemandedLHS(DemandedElts);
3618 APInt DemandedRHS(DemandedElts);
3619 APInt UndefLHS, ZeroLHS;
3620 APInt UndefRHS, ZeroRHS;
3621 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3622 Depth + 1))
3623 return true;
3624 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3625 Depth + 1))
3626 return true;
3627
3628 KnownUndef = UndefLHS & UndefRHS;
3629 KnownZero = ZeroLHS & ZeroRHS;
3630
3631 // If we know that the selected element is always zero, we don't need the
3632 // select value element.
3633 APInt DemandedSel = DemandedElts & ~KnownZero;
3634 if (DemandedSel != DemandedElts)
3635 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3636 Depth + 1))
3637 return true;
3638
3639 break;
3640 }
3641 case ISD::VECTOR_SHUFFLE: {
3642 SDValue LHS = Op.getOperand(0);
3643 SDValue RHS = Op.getOperand(1);
3644 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3645
3646 // Collect demanded elements from shuffle operands..
3647 APInt DemandedLHS(NumElts, 0);
3648 APInt DemandedRHS(NumElts, 0);
3649 for (unsigned i = 0; i != NumElts; ++i) {
3650 int M = ShuffleMask[i];
3651 if (M < 0 || !DemandedElts[i])
3652 continue;
3653 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3654 if (M < (int)NumElts)
3655 DemandedLHS.setBit(M);
3656 else
3657 DemandedRHS.setBit(M - NumElts);
3658 }
3659
3660 // If either side isn't demanded, replace it by UNDEF. We handle this
3661 // explicitly here to also simplify in case of multiple uses (on the
3662 // contrary to the SimplifyDemandedVectorElts calls below).
3663 bool FoldLHS = !DemandedLHS && !LHS.isUndef();
3664 bool FoldRHS = !DemandedRHS && !RHS.isUndef();
3665 if (FoldLHS || FoldRHS) {
3666 LHS = FoldLHS ? TLO.DAG.getUNDEF(LHS.getValueType()) : LHS;
3667 RHS = FoldRHS ? TLO.DAG.getUNDEF(RHS.getValueType()) : RHS;
3668 SDValue NewOp =
3669 TLO.DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, ShuffleMask);
3670 return TLO.CombineTo(Op, NewOp);
3671 }
3672
3673 // See if we can simplify either shuffle operand.
3674 APInt UndefLHS, ZeroLHS;
3675 APInt UndefRHS, ZeroRHS;
3676 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3677 Depth + 1))
3678 return true;
3679 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3680 Depth + 1))
3681 return true;
3682
3683 // Simplify mask using undef elements from LHS/RHS.
3684 bool Updated = false;
3685 bool IdentityLHS = true, IdentityRHS = true;
3686 SmallVector<int, 32> NewMask(ShuffleMask);
3687 for (unsigned i = 0; i != NumElts; ++i) {
3688 int &M = NewMask[i];
3689 if (M < 0)
3690 continue;
3691 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3692 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3693 Updated = true;
3694 M = -1;
3695 }
3696 IdentityLHS &= (M < 0) || (M == (int)i);
3697 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3698 }
3699
3700 // Update legal shuffle masks based on demanded elements if it won't reduce
3701 // to Identity which can cause premature removal of the shuffle mask.
3702 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3703 SDValue LegalShuffle =
3704 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3705 if (LegalShuffle)
3706 return TLO.CombineTo(Op, LegalShuffle);
3707 }
3708
3709 // Propagate undef/zero elements from LHS/RHS.
3710 for (unsigned i = 0; i != NumElts; ++i) {
3711 int M = ShuffleMask[i];
3712 if (M < 0) {
3713 KnownUndef.setBit(i);
3714 } else if (M < (int)NumElts) {
3715 if (UndefLHS[M])
3716 KnownUndef.setBit(i);
3717 if (ZeroLHS[M])
3718 KnownZero.setBit(i);
3719 } else {
3720 if (UndefRHS[M - NumElts])
3721 KnownUndef.setBit(i);
3722 if (ZeroRHS[M - NumElts])
3723 KnownZero.setBit(i);
3724 }
3725 }
3726 break;
3727 }
3731 APInt SrcUndef, SrcZero;
3732 SDValue Src = Op.getOperand(0);
3733 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3734 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3735 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3736 Depth + 1))
3737 return true;
3738 KnownZero = SrcZero.zextOrTrunc(NumElts);
3739 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3740
3741 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3742 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3743 DemandedSrcElts == 1) {
3744 // aext - if we just need the bottom element then we can bitcast.
3745 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3746 }
3747
3748 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3749 // zext(undef) upper bits are guaranteed to be zero.
3750 if (DemandedElts.isSubsetOf(KnownUndef))
3751 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3752 KnownUndef.clearAllBits();
3753
3754 // zext - if we just need the bottom element then we can mask:
3755 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3756 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3757 Op->isOnlyUserOf(Src.getNode()) &&
3758 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3759 SDLoc DL(Op);
3760 EVT SrcVT = Src.getValueType();
3761 EVT SrcSVT = SrcVT.getScalarType();
3762
3763 // If we're after type legalization and SrcSVT is not legal, use the
3764 // promoted type for creating constants to avoid creating nodes with
3765 // illegal types.
3766 if (TLO.LegalTypes())
3767 SrcSVT = getLegalTypeToTransformTo(*TLO.DAG.getContext(), SrcSVT);
3768
3769 SmallVector<SDValue> MaskElts;
3770 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3771 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3772 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3773 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3774 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3775 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3776 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3777 }
3778 }
3779 }
3780 break;
3781 }
3782
3783 // TODO: There are more binop opcodes that could be handled here - MIN,
3784 // MAX, saturated math, etc.
3785 case ISD::ADD: {
3786 SDValue Op0 = Op.getOperand(0);
3787 SDValue Op1 = Op.getOperand(1);
3788 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3789 APInt UndefLHS, ZeroLHS;
3790 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3791 Depth + 1, /*AssumeSingleUse*/ true))
3792 return true;
3793 }
3794 [[fallthrough]];
3795 }
3796 case ISD::AVGCEILS:
3797 case ISD::AVGCEILU:
3798 case ISD::AVGFLOORS:
3799 case ISD::AVGFLOORU:
3800 case ISD::OR:
3801 case ISD::XOR:
3802 case ISD::SUB:
3803 case ISD::FADD:
3804 case ISD::FSUB:
3805 case ISD::FMUL:
3806 case ISD::FDIV:
3807 case ISD::FREM: {
3808 SDValue Op0 = Op.getOperand(0);
3809 SDValue Op1 = Op.getOperand(1);
3810
3811 APInt UndefRHS, ZeroRHS;
3812 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3813 Depth + 1))
3814 return true;
3815 APInt UndefLHS, ZeroLHS;
3816 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3817 Depth + 1))
3818 return true;
3819
3820 KnownZero = ZeroLHS & ZeroRHS;
3821 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3822
3823 // Attempt to avoid multi-use ops if we don't need anything from them.
3824 // TODO - use KnownUndef to relax the demandedelts?
3825 if (!DemandedElts.isAllOnes())
3826 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3827 return true;
3828 break;
3829 }
3830 case ISD::SHL:
3831 case ISD::SRL:
3832 case ISD::SRA:
3833 case ISD::ROTL:
3834 case ISD::ROTR: {
3835 SDValue Op0 = Op.getOperand(0);
3836 SDValue Op1 = Op.getOperand(1);
3837
3838 APInt UndefRHS, ZeroRHS;
3839 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3840 Depth + 1))
3841 return true;
3842 APInt UndefLHS, ZeroLHS;
3843 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3844 Depth + 1))
3845 return true;
3846
3847 KnownZero = ZeroLHS;
3848 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3849
3850 // Attempt to avoid multi-use ops if we don't need anything from them.
3851 // TODO - use KnownUndef to relax the demandedelts?
3852 if (!DemandedElts.isAllOnes())
3853 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3854 return true;
3855 break;
3856 }
3857 case ISD::MUL:
3858 case ISD::MULHU:
3859 case ISD::MULHS:
3860 case ISD::AND: {
3861 SDValue Op0 = Op.getOperand(0);
3862 SDValue Op1 = Op.getOperand(1);
3863
3864 APInt SrcUndef, SrcZero;
3865 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3866 Depth + 1))
3867 return true;
3868 // FIXME: If we know that a demanded element was zero in Op1 we don't need
3869 // to demand it in Op0 - its guaranteed to be zero. There is however a
3870 // restriction, as we must not make any of the originally demanded elements
3871 // more poisonous. We could reduce amount of elements demanded, but then we
3872 // also need a to inform SimplifyDemandedVectorElts that some elements must
3873 // not be made more poisonous.
3874 if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero,
3875 TLO, Depth + 1))
3876 return true;
3877
3878 KnownUndef &= DemandedElts;
3879 KnownZero &= DemandedElts;
3880
3881 // If every element pair has a zero/undef/poison then just fold to zero.
3882 // fold (and x, undef/poison) -> 0 / (and x, 0) -> 0
3883 // fold (mul x, undef/poison) -> 0 / (mul x, 0) -> 0
3884 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3885 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3886
3887 // If either side has a zero element, then the result element is zero, even
3888 // if the other is an UNDEF.
3889 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3890 // and then handle 'and' nodes with the rest of the binop opcodes.
3891 KnownZero |= SrcZero;
3892 KnownUndef &= SrcUndef;
3893 KnownUndef &= ~KnownZero;
3894
3895 // Attempt to avoid multi-use ops if we don't need anything from them.
3896 if (!DemandedElts.isAllOnes())
3897 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3898 return true;
3899 break;
3900 }
3901 case ISD::TRUNCATE:
3902 case ISD::SIGN_EXTEND:
3903 case ISD::ZERO_EXTEND:
3904 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3905 KnownZero, TLO, Depth + 1))
3906 return true;
3907
3908 if (!DemandedElts.isAllOnes())
3910 Op.getOperand(0), DemandedElts, TLO.DAG, Depth + 1))
3911 return TLO.CombineTo(Op, TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp));
3912
3913 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3914 // zext(undef) upper bits are guaranteed to be zero.
3915 if (DemandedElts.isSubsetOf(KnownUndef))
3916 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3917 KnownUndef.clearAllBits();
3918 }
3919 break;
3920 case ISD::SINT_TO_FP:
3921 case ISD::UINT_TO_FP:
3922 case ISD::FP_TO_SINT:
3923 case ISD::FP_TO_UINT:
3924 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3925 KnownZero, TLO, Depth + 1))
3926 return true;
3927 // Don't fall through to generic undef -> undef handling.
3928 return false;
3929 default: {
3930 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3931 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3932 KnownZero, TLO, Depth))
3933 return true;
3934 } else {
3935 KnownBits Known;
3936 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3937 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3938 TLO, Depth, AssumeSingleUse))
3939 return true;
3940 }
3941 break;
3942 }
3943 }
3944 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3945
3946 // Constant fold all undef cases.
3947 // TODO: Handle zero cases as well.
3948 if (DemandedElts.isSubsetOf(KnownUndef))
3949 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3950
3951 return false;
3952}
3953
3954/// Determine which of the bits specified in Mask are known to be either zero or
3955/// one and return them in the Known.
3957 KnownBits &Known,
3958 const APInt &DemandedElts,
3959 const SelectionDAG &DAG,
3960 unsigned Depth) const {
3961 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3962 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3963 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3964 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3965 "Should use MaskedValueIsZero if you don't know whether Op"
3966 " is a target node!");
3967 Known.resetAll();
3968}
3969
3972 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3973 unsigned Depth) const {
3974 Known.resetAll();
3975}
3976
3979 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3980 unsigned Depth) const {
3981 Known.resetAll();
3982}
3983
3985 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3986 // The low bits are known zero if the pointer is aligned.
3987 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3988}
3989
3995
3996/// This method can be implemented by targets that want to expose additional
3997/// information about sign bits to the DAG Combiner.
3999 const APInt &,
4000 const SelectionDAG &,
4001 unsigned Depth) const {
4002 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4003 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4004 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4005 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4006 "Should use ComputeNumSignBits if you don't know whether Op"
4007 " is a target node!");
4008 return 1;
4009}
4010
4012 GISelValueTracking &Analysis, Register R, const APInt &DemandedElts,
4013 const MachineRegisterInfo &MRI, unsigned Depth) const {
4014 return 1;
4015}
4016
4018 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
4019 TargetLoweringOpt &TLO, unsigned Depth) const {
4020 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4021 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4022 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4023 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4024 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
4025 " is a target node!");
4026 return false;
4027}
4028
4030 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
4031 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
4032 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4033 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4034 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4035 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4036 "Should use SimplifyDemandedBits if you don't know whether Op"
4037 " is a target node!");
4038 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
4039 return false;
4040}
4041
4043 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
4044 SelectionDAG &DAG, unsigned Depth) const {
4045 assert(
4046 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4047 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4048 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4049 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4050 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
4051 " is a target node!");
4052 return SDValue();
4053}
4054
4055SDValue
4058 SelectionDAG &DAG) const {
4059 bool LegalMask = isShuffleMaskLegal(Mask, VT);
4060 if (!LegalMask) {
4061 std::swap(N0, N1);
4063 LegalMask = isShuffleMaskLegal(Mask, VT);
4064 }
4065
4066 if (!LegalMask)
4067 return SDValue();
4068
4069 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
4070}
4071
4073 return nullptr;
4074}
4075
4077 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4078 UndefPoisonKind Kind, unsigned Depth) const {
4079 assert(
4080 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4081 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4082 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4083 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4084 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
4085 " is a target node!");
4086
4087 // If Op can't create undef/poison and none of its operands are undef/poison
4088 // then Op is never undef/poison.
4089 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, Kind,
4090 /*ConsiderFlags*/ true, Depth) &&
4091 all_of(Op->ops(), [&](SDValue V) {
4092 return DAG.isGuaranteedNotToBeUndefOrPoison(V, Kind, Depth + 1);
4093 });
4094}
4095
4097 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4098 UndefPoisonKind Kind, bool ConsiderFlags, unsigned Depth) const {
4099 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4100 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4101 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4102 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4103 "Should use canCreateUndefOrPoison if you don't know whether Op"
4104 " is a target node!");
4105 // Be conservative and return true.
4106 return true;
4107}
4108
4110 KnownFPClass &Known,
4111 const APInt &DemandedElts,
4112 const SelectionDAG &DAG,
4113 unsigned Depth) const {
4114 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4115 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4116 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4117 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4118 "Should use computeKnownFPClass if you don't know whether Op"
4119 " is a target node!");
4120}
4121
4123 const APInt &DemandedElts,
4124 const SelectionDAG &DAG,
4125 bool SNaN,
4126 unsigned Depth) const {
4127 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4128 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4129 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4130 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4131 "Should use isKnownNeverNaN if you don't know whether Op"
4132 " is a target node!");
4133 return false;
4134}
4135
4137 const APInt &DemandedElts,
4138 APInt &UndefElts,
4139 const SelectionDAG &DAG,
4140 unsigned Depth) const {
4141 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4142 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4143 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4144 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4145 "Should use isSplatValue if you don't know whether Op"
4146 " is a target node!");
4147 return false;
4148}
4149
4150// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
4151// work with truncating build vectors and vectors with elements of less than
4152// 8 bits.
4154 if (!N)
4155 return false;
4156
4157 unsigned EltWidth;
4158 APInt CVal;
4159 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
4160 /*AllowTruncation=*/true)) {
4161 CVal = CN->getAPIntValue();
4162 EltWidth = N.getValueType().getScalarSizeInBits();
4163 } else
4164 return false;
4165
4166 // If this is a truncating splat, truncate the splat value.
4167 // Otherwise, we may fail to match the expected values below.
4168 if (EltWidth < CVal.getBitWidth())
4169 CVal = CVal.trunc(EltWidth);
4170
4171 switch (getBooleanContents(N.getValueType())) {
4173 return CVal[0];
4175 return CVal.isOne();
4177 return CVal.isAllOnes();
4178 }
4179
4180 llvm_unreachable("Invalid boolean contents");
4181}
4182
4184 if (!N)
4185 return false;
4186
4188 if (!CN) {
4190 if (!BV)
4191 return false;
4192
4193 // Only interested in constant splats, we don't care about undef
4194 // elements in identifying boolean constants and getConstantSplatNode
4195 // returns NULL if all ops are undef;
4196 CN = BV->getConstantSplatNode();
4197 if (!CN)
4198 return false;
4199 }
4200
4201 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
4202 return !CN->getAPIntValue()[0];
4203
4204 return CN->isZero();
4205}
4206
4208 bool SExt) const {
4209 if (VT == MVT::i1)
4210 return N->isOne();
4211
4213 switch (Cnt) {
4215 // An extended value of 1 is always true, unless its original type is i1,
4216 // in which case it will be sign extended to -1.
4217 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
4220 return N->isAllOnes() && SExt;
4221 }
4222 llvm_unreachable("Unexpected enumeration.");
4223}
4224
4225/// This helper function of SimplifySetCC tries to optimize the comparison when
4226/// either operand of the SetCC node is a bitwise-and instruction.
4227SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4228 ISD::CondCode Cond, const SDLoc &DL,
4229 DAGCombinerInfo &DCI) const {
4230 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4231 std::swap(N0, N1);
4232
4233 SelectionDAG &DAG = DCI.DAG;
4234 EVT OpVT = N0.getValueType();
4235 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
4236 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4237 return SDValue();
4238
4239 // (X & Y) != 0 --> zextOrTrunc(X & Y)
4240 // iff everything but LSB is known zero:
4241 if (Cond == ISD::SETNE && isNullConstant(N1) &&
4244 unsigned NumEltBits = OpVT.getScalarSizeInBits();
4245 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
4246 if (DAG.MaskedValueIsZero(N0, UpperBits))
4247 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
4248 }
4249
4250 // Try to eliminate a power-of-2 mask constant by converting to a signbit
4251 // test in a narrow type that we can truncate to with no cost. Examples:
4252 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4253 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4254 // TODO: This conservatively checks for type legality on the source and
4255 // destination types. That may inhibit optimizations, but it also
4256 // allows setcc->shift transforms that may be more beneficial.
4257 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4258 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4259 isTypeLegal(OpVT) && N0.hasOneUse()) {
4260 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4261 AndC->getAPIntValue().getActiveBits());
4262 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4263 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4264 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4265 return DAG.getSetCC(DL, VT, Trunc, Zero,
4267 }
4268 }
4269
4270 // Match these patterns in any of their permutations:
4271 // (X & Y) == Y
4272 // (X & Y) != Y
4273 SDValue X, Y;
4274 if (N0.getOperand(0) == N1) {
4275 X = N0.getOperand(1);
4276 Y = N0.getOperand(0);
4277 } else if (N0.getOperand(1) == N1) {
4278 X = N0.getOperand(0);
4279 Y = N0.getOperand(1);
4280 } else {
4281 return SDValue();
4282 }
4283
4284 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4285 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4286 // its liable to create and infinite loop.
4287 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4288 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4290 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4291 // Note that where Y is variable and is known to have at most one bit set
4292 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4293 // equivalent when Y == 0.
4294 assert(OpVT.isInteger());
4296 if (DCI.isBeforeLegalizeOps() ||
4298 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4299 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4300 // If the target supports an 'and-not' or 'and-complement' logic operation,
4301 // try to use that to make a comparison operation more efficient.
4302 // But don't do this transform if the mask is a single bit because there are
4303 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4304 // 'rlwinm' on PPC).
4305
4306 // Bail out if the compare operand that we want to turn into a zero is
4307 // already a zero (otherwise, infinite loop).
4308 if (isNullConstant(Y))
4309 return SDValue();
4310
4311 // Transform this into: ~X & Y == 0.
4312 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4313 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4314 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4315 }
4316
4317 return SDValue();
4318}
4319
4320/// This helper function of SimplifySetCC tries to optimize the comparison when
4321/// either operand of the SetCC node is a bitwise-or instruction.
4322/// For now, this just transforms (X | Y) ==/!= Y into X & ~Y ==/!= 0.
4323SDValue TargetLowering::foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1,
4324 ISD::CondCode Cond, const SDLoc &DL,
4325 DAGCombinerInfo &DCI) const {
4326 if (N1.getOpcode() == ISD::OR && N0.getOpcode() != ISD::OR)
4327 std::swap(N0, N1);
4328
4329 SelectionDAG &DAG = DCI.DAG;
4330 EVT OpVT = N0.getValueType();
4331 if (!N0.hasOneUse() || !OpVT.isInteger() ||
4332 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4333 return SDValue();
4334
4335 // (X | Y) == Y
4336 // (X | Y) != Y
4337 SDValue X;
4338 if (sd_match(N0, m_Or(m_Value(X), m_Specific(N1))) && hasAndNotCompare(X)) {
4339 // If the target supports an 'and-not' or 'and-complement' logic operation,
4340 // try to use that to make a comparison operation more efficient.
4341
4342 // Bail out if the compare operand that we want to turn into a zero is
4343 // already a zero (otherwise, infinite loop).
4344 if (isNullConstant(N1))
4345 return SDValue();
4346
4347 // Transform this into: X & ~Y ==/!= 0.
4348 SDValue NotY = DAG.getNOT(SDLoc(N1), N1, OpVT);
4349 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, X, NotY);
4350 return DAG.getSetCC(DL, VT, NewAnd, DAG.getConstant(0, DL, OpVT), Cond);
4351 }
4352
4353 return SDValue();
4354}
4355
4356/// There are multiple IR patterns that could be checking whether certain
4357/// truncation of a signed number would be lossy or not. The pattern which is
4358/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4359/// We are looking for the following pattern: (KeptBits is a constant)
4360/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4361/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4362/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4363/// We will unfold it into the natural trunc+sext pattern:
4364/// ((%x << C) a>> C) dstcond %x
4365/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4366SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4367 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4368 const SDLoc &DL) const {
4369 // We must be comparing with a constant.
4370 ConstantSDNode *C1;
4371 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4372 return SDValue();
4373
4374 // N0 should be: add %x, (1 << (KeptBits-1))
4375 if (N0->getOpcode() != ISD::ADD)
4376 return SDValue();
4377
4378 // And we must be 'add'ing a constant.
4379 ConstantSDNode *C01;
4380 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4381 return SDValue();
4382
4383 SDValue X = N0->getOperand(0);
4384 EVT XVT = X.getValueType();
4385
4386 // Validate constants ...
4387
4388 APInt I1 = C1->getAPIntValue();
4389
4390 ISD::CondCode NewCond;
4391 if (Cond == ISD::CondCode::SETULT) {
4392 NewCond = ISD::CondCode::SETEQ;
4393 } else if (Cond == ISD::CondCode::SETULE) {
4394 NewCond = ISD::CondCode::SETEQ;
4395 // But need to 'canonicalize' the constant.
4396 I1 += 1;
4397 } else if (Cond == ISD::CondCode::SETUGT) {
4398 NewCond = ISD::CondCode::SETNE;
4399 // But need to 'canonicalize' the constant.
4400 I1 += 1;
4401 } else if (Cond == ISD::CondCode::SETUGE) {
4402 NewCond = ISD::CondCode::SETNE;
4403 } else
4404 return SDValue();
4405
4406 APInt I01 = C01->getAPIntValue();
4407
4408 auto checkConstants = [&I1, &I01]() -> bool {
4409 // Both of them must be power-of-two, and the constant from setcc is bigger.
4410 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4411 };
4412
4413 if (checkConstants()) {
4414 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4415 } else {
4416 // What if we invert constants? (and the target predicate)
4417 I1.negate();
4418 I01.negate();
4419 assert(XVT.isInteger());
4420 NewCond = getSetCCInverse(NewCond, XVT);
4421 if (!checkConstants())
4422 return SDValue();
4423 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4424 }
4425
4426 // They are power-of-two, so which bit is set?
4427 const unsigned KeptBits = I1.logBase2();
4428 const unsigned KeptBitsMinusOne = I01.logBase2();
4429
4430 // Magic!
4431 if (KeptBits != (KeptBitsMinusOne + 1))
4432 return SDValue();
4433 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4434
4435 // We don't want to do this in every single case.
4436 SelectionDAG &DAG = DCI.DAG;
4437 if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4438 return SDValue();
4439
4440 // Unfold into: sext_inreg(%x) cond %x
4441 // Where 'cond' will be either 'eq' or 'ne'.
4442 SDValue SExtInReg = DAG.getNode(
4444 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4445 return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4446}
4447
4448// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4449SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4450 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4451 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4453 "Should be a comparison with 0.");
4454 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4455 "Valid only for [in]equality comparisons.");
4456
4457 unsigned NewShiftOpcode;
4458 SDValue X, C, Y;
4459
4460 SelectionDAG &DAG = DCI.DAG;
4461
4462 // Look for '(C l>>/<< Y)'.
4463 auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4464 // The shift should be one-use.
4465 if (!V.hasOneUse())
4466 return false;
4467 unsigned OldShiftOpcode = V.getOpcode();
4468 switch (OldShiftOpcode) {
4469 case ISD::SHL:
4470 NewShiftOpcode = ISD::SRL;
4471 break;
4472 case ISD::SRL:
4473 NewShiftOpcode = ISD::SHL;
4474 break;
4475 default:
4476 return false; // must be a logical shift.
4477 }
4478 // We should be shifting a constant.
4479 // FIXME: best to use isConstantOrConstantVector().
4480 C = V.getOperand(0);
4481 ConstantSDNode *CC =
4482 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4483 if (!CC)
4484 return false;
4485 Y = V.getOperand(1);
4486
4487 ConstantSDNode *XC =
4488 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4490 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4491 };
4492
4493 // LHS of comparison should be an one-use 'and'.
4494 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4495 return SDValue();
4496
4497 X = N0.getOperand(0);
4498 SDValue Mask = N0.getOperand(1);
4499
4500 // 'and' is commutative!
4501 if (!Match(Mask)) {
4502 std::swap(X, Mask);
4503 if (!Match(Mask))
4504 return SDValue();
4505 }
4506
4507 EVT VT = X.getValueType();
4508
4509 // Produce:
4510 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4511 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4512 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4513 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4514 return T2;
4515}
4516
4517/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4518/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4519/// handle the commuted versions of these patterns.
4520SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4521 ISD::CondCode Cond, const SDLoc &DL,
4522 DAGCombinerInfo &DCI) const {
4523 unsigned BOpcode = N0.getOpcode();
4524 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4525 "Unexpected binop");
4526 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4527
4528 // (X + Y) == X --> Y == 0
4529 // (X - Y) == X --> Y == 0
4530 // (X ^ Y) == X --> Y == 0
4531 SelectionDAG &DAG = DCI.DAG;
4532 EVT OpVT = N0.getValueType();
4533 SDValue X = N0.getOperand(0);
4534 SDValue Y = N0.getOperand(1);
4535 if (X == N1)
4536 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4537
4538 if (Y != N1)
4539 return SDValue();
4540
4541 // (X + Y) == Y --> X == 0
4542 // (X ^ Y) == Y --> X == 0
4543 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4544 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4545
4546 // The shift would not be valid if the operands are boolean (i1).
4547 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4548 return SDValue();
4549
4550 // (X - Y) == Y --> X == Y << 1
4551 SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4552 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4553 if (!DCI.isCalledByLegalizer())
4554 DCI.AddToWorklist(YShl1.getNode());
4555 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4556}
4557
4559 SDValue N0, const APInt &C1,
4560 ISD::CondCode Cond, const SDLoc &dl,
4561 SelectionDAG &DAG) {
4562 // Look through truncs that don't change the value of a ctpop.
4563 // FIXME: Add vector support? Need to be careful with setcc result type below.
4564 SDValue CTPOP = N0;
4565 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4567 CTPOP = N0.getOperand(0);
4568
4569 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4570 return SDValue();
4571
4572 EVT CTVT = CTPOP.getValueType();
4573 SDValue CTOp = CTPOP.getOperand(0);
4574
4575 // Expand a power-of-2-or-zero comparison based on ctpop:
4576 // (ctpop x) u< 2 -> (x & x-1) == 0
4577 // (ctpop x) u> 1 -> (x & x-1) != 0
4578 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4579 // Keep the CTPOP if it is a cheap vector op.
4580 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4581 return SDValue();
4582
4583 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4584 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4585 return SDValue();
4586 if (C1 == 0 && (Cond == ISD::SETULT))
4587 return SDValue(); // This is handled elsewhere.
4588
4589 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4590
4591 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4592 SDValue Result = CTOp;
4593 for (unsigned i = 0; i < Passes; i++) {
4594 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4595 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4596 }
4598 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4599 }
4600
4601 // Expand a power-of-2 comparison based on ctpop
4602 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4603 // Keep the CTPOP if it is cheap.
4604 if (TLI.isCtpopFast(CTVT))
4605 return SDValue();
4606
4607 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4608 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4609 assert(CTVT.isInteger());
4610 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4611
4612 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4613 // check before emitting a potentially unnecessary op.
4614 if (DAG.isKnownNeverZero(CTOp)) {
4615 // (ctpop x) == 1 --> (x & x-1) == 0
4616 // (ctpop x) != 1 --> (x & x-1) != 0
4617 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4618 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4619 return RHS;
4620 }
4621
4622 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4623 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4624 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4626 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4627 }
4628
4629 return SDValue();
4630}
4631
4633 ISD::CondCode Cond, const SDLoc &dl,
4634 SelectionDAG &DAG) {
4635 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4636 return SDValue();
4637
4638 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4639 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4640 return SDValue();
4641
4642 auto getRotateSource = [](SDValue X) {
4643 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4644 return X.getOperand(0);
4645 return SDValue();
4646 };
4647
4648 // Peek through a rotated value compared against 0 or -1:
4649 // (rot X, Y) == 0/-1 --> X == 0/-1
4650 // (rot X, Y) != 0/-1 --> X != 0/-1
4651 if (SDValue R = getRotateSource(N0))
4652 return DAG.getSetCC(dl, VT, R, N1, Cond);
4653
4654 // Peek through an 'or' of a rotated value compared against 0:
4655 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4656 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4657 //
4658 // TODO: Add the 'and' with -1 sibling.
4659 // TODO: Recurse through a series of 'or' ops to find the rotate.
4660 EVT OpVT = N0.getValueType();
4661 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4662 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4663 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4664 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4665 }
4666 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4667 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4668 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4669 }
4670 }
4671
4672 return SDValue();
4673}
4674
4676 ISD::CondCode Cond, const SDLoc &dl,
4677 SelectionDAG &DAG) {
4678 // If we are testing for all-bits-clear, we might be able to do that with
4679 // less shifting since bit-order does not matter.
4680 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4681 return SDValue();
4682
4683 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4684 if (!C1 || !C1->isZero())
4685 return SDValue();
4686
4687 if (!N0.hasOneUse() ||
4688 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4689 return SDValue();
4690
4691 unsigned BitWidth = N0.getScalarValueSizeInBits();
4692 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4693 if (!ShAmtC)
4694 return SDValue();
4695
4696 uint64_t ShAmt = ShAmtC->getAPIntValue().urem(BitWidth);
4697 if (ShAmt == 0)
4698 return SDValue();
4699
4700 // Canonicalize fshr as fshl to reduce pattern-matching.
4701 if (N0.getOpcode() == ISD::FSHR)
4702 ShAmt = BitWidth - ShAmt;
4703
4704 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4705 SDValue X, Y;
4706 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4707 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4708 return false;
4709 if (Or.getOperand(0) == Other) {
4710 X = Or.getOperand(0);
4711 Y = Or.getOperand(1);
4712 return true;
4713 }
4714 if (Or.getOperand(1) == Other) {
4715 X = Or.getOperand(1);
4716 Y = Or.getOperand(0);
4717 return true;
4718 }
4719 return false;
4720 };
4721
4722 EVT OpVT = N0.getValueType();
4723 EVT ShAmtVT = N0.getOperand(2).getValueType();
4724 SDValue F0 = N0.getOperand(0);
4725 SDValue F1 = N0.getOperand(1);
4726 if (matchOr(F0, F1)) {
4727 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4728 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4729 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4730 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4731 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4732 }
4733 if (matchOr(F1, F0)) {
4734 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4735 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4736 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4737 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4738 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4739 }
4740
4741 return SDValue();
4742}
4743
4744/// Try to simplify a setcc built with the specified operands and cc. If it is
4745/// unable to simplify it, return a null SDValue.
4747 ISD::CondCode Cond, bool foldBooleans,
4748 DAGCombinerInfo &DCI,
4749 const SDLoc &dl) const {
4750 SelectionDAG &DAG = DCI.DAG;
4751 const DataLayout &Layout = DAG.getDataLayout();
4752 EVT OpVT = N0.getValueType();
4753 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4754
4755 // Constant fold or commute setcc.
4756 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4757 return Fold;
4758
4759 bool N0ConstOrSplat =
4760 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4761 bool N1ConstOrSplat =
4762 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4763
4764 // Canonicalize toward having the constant on the RHS.
4765 // TODO: Handle non-splat vector constants. All undef causes trouble.
4766 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4767 // infinite loop here when we encounter one.
4769 if (N0ConstOrSplat && !N1ConstOrSplat &&
4770 (DCI.isBeforeLegalizeOps() ||
4771 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4772 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4773
4774 // If we have a subtract with the same 2 non-constant operands as this setcc
4775 // -- but in reverse order -- then try to commute the operands of this setcc
4776 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4777 // instruction on some targets.
4778 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4779 (DCI.isBeforeLegalizeOps() ||
4780 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4781 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4782 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4783 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4784
4785 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4786 return V;
4787
4788 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4789 return V;
4790
4791 if (auto *N1C = isConstOrConstSplat(N1)) {
4792 const APInt &C1 = N1C->getAPIntValue();
4793
4794 // Optimize some CTPOP cases.
4795 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4796 return V;
4797
4798 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4799 // X * Y == 0 --> (X == 0) || (Y == 0)
4800 // X * Y != 0 --> (X != 0) && (Y != 0)
4801 // TODO: This bails out if minsize is set, but if the target doesn't have a
4802 // single instruction multiply for this type, it would likely be
4803 // smaller to decompose.
4804 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4805 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4806 (N0->getFlags().hasNoUnsignedWrap() ||
4807 N0->getFlags().hasNoSignedWrap()) &&
4808 !Attr.hasFnAttr(Attribute::MinSize)) {
4809 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4810 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4811 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4812 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4813 }
4814
4815 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4816 // equality comparison, then we're just comparing whether X itself is
4817 // zero.
4818 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4819 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4821 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4822 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4823 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4824 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4825 // (srl (ctlz x), 5) == 0 -> X != 0
4826 // (srl (ctlz x), 5) != 1 -> X != 0
4827 Cond = ISD::SETNE;
4828 } else {
4829 // (srl (ctlz x), 5) != 0 -> X == 0
4830 // (srl (ctlz x), 5) == 1 -> X == 0
4831 Cond = ISD::SETEQ;
4832 }
4833 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4834 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4835 Cond);
4836 }
4837 }
4838 }
4839 }
4840
4841 // setcc X, 0, setlt --> X (when X is all sign bits)
4842 // setcc X, 0, setne --> X (when X is all sign bits)
4843 //
4844 // When we know that X has 0 or -1 in each element (or scalar), this
4845 // comparison will produce X. This is only true when boolean contents are
4846 // represented via 0s and -1s.
4847 if (VT == OpVT &&
4848 // Check that the result of setcc is 0 and -1.
4850 // Match only for checks X < 0 and X != 0
4851 (Cond == ISD::SETLT || Cond == ISD::SETNE) && isNullOrNullSplat(N1) &&
4852 // The identity holds iff we know all sign bits for all lanes.
4854 return N0;
4855
4856 // FIXME: Support vectors.
4857 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4858 const APInt &C1 = N1C->getAPIntValue();
4859
4860 // (zext x) == C --> x == (trunc C)
4861 // (sext x) == C --> x == (trunc C)
4862 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4863 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4864 unsigned MinBits = N0.getValueSizeInBits();
4865 SDValue PreExt;
4866 bool Signed = false;
4867 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4868 // ZExt
4869 MinBits = N0->getOperand(0).getValueSizeInBits();
4870 PreExt = N0->getOperand(0);
4871 } else if (N0->getOpcode() == ISD::AND) {
4872 // DAGCombine turns costly ZExts into ANDs
4873 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4874 if ((C->getAPIntValue()+1).isPowerOf2()) {
4875 MinBits = C->getAPIntValue().countr_one();
4876 PreExt = N0->getOperand(0);
4877 }
4878 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4879 // SExt
4880 MinBits = N0->getOperand(0).getValueSizeInBits();
4881 PreExt = N0->getOperand(0);
4882 Signed = true;
4883 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4884 // ZEXTLOAD / SEXTLOAD
4885 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4886 MinBits = LN0->getMemoryVT().getSizeInBits();
4887 PreExt = N0;
4888 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4889 Signed = true;
4890 MinBits = LN0->getMemoryVT().getSizeInBits();
4891 PreExt = N0;
4892 }
4893 }
4894
4895 // Figure out how many bits we need to preserve this constant.
4896 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4897
4898 // Make sure we're not losing bits from the constant.
4899 if (MinBits > 0 &&
4900 MinBits < C1.getBitWidth() &&
4901 MinBits >= ReqdBits) {
4902 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4903 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4904 // Will get folded away.
4905 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4906 if (MinBits == 1 && C1 == 1)
4907 // Invert the condition.
4908 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4910 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4911 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4912 }
4913
4914 // If truncating the setcc operands is not desirable, we can still
4915 // simplify the expression in some cases:
4916 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4917 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4918 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4919 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4920 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4921 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4922 SDValue TopSetCC = N0->getOperand(0);
4923 unsigned N0Opc = N0->getOpcode();
4924 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4925 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4926 TopSetCC.getOpcode() == ISD::SETCC &&
4927 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4928 (isConstFalseVal(N1) ||
4929 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4930
4931 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4932 (!N1C->isZero() && Cond == ISD::SETNE);
4933
4934 if (!Inverse)
4935 return TopSetCC;
4936
4938 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4939 TopSetCC.getOperand(0).getValueType());
4940 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4941 TopSetCC.getOperand(1),
4942 InvCond);
4943 }
4944 }
4945 }
4946
4947 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4948 // equality or unsigned, and all 1 bits of the const are in the same
4949 // partial word, see if we can shorten the load.
4950 if (DCI.isBeforeLegalize() &&
4952 N0.getOpcode() == ISD::AND && C1 == 0 &&
4953 N0.getNode()->hasOneUse() &&
4954 isa<LoadSDNode>(N0.getOperand(0)) &&
4955 N0.getOperand(0).getNode()->hasOneUse() &&
4957 auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4958 APInt bestMask;
4959 unsigned bestWidth = 0, bestOffset = 0;
4960 if (Lod->isSimple() && Lod->isUnindexed() &&
4961 (Lod->getMemoryVT().isByteSized() ||
4962 isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4963 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4964 unsigned origWidth = N0.getValueSizeInBits();
4965 unsigned maskWidth = origWidth;
4966 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4967 // 8 bits, but have to be careful...
4968 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4969 origWidth = Lod->getMemoryVT().getSizeInBits();
4970 const APInt &Mask = N0.getConstantOperandAPInt(1);
4971 // Only consider power-of-2 widths (and at least one byte) as candiates
4972 // for the narrowed load.
4973 for (unsigned width = 8; width < origWidth; width *= 2) {
4974 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4975 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4976 // Avoid accessing any padding here for now (we could use memWidth
4977 // instead of origWidth here otherwise).
4978 unsigned maxOffset = origWidth - width;
4979 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4980 if (Mask.isSubsetOf(newMask)) {
4981 unsigned ptrOffset =
4982 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4983 unsigned IsFast = 0;
4984 assert((ptrOffset % 8) == 0 && "Non-Bytealigned pointer offset");
4985 Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4987 ptrOffset / 8) &&
4989 *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4990 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4991 IsFast) {
4992 bestOffset = ptrOffset / 8;
4993 bestMask = Mask.lshr(offset);
4994 bestWidth = width;
4995 break;
4996 }
4997 }
4998 newMask <<= 8;
4999 }
5000 if (bestWidth)
5001 break;
5002 }
5003 }
5004 if (bestWidth) {
5005 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
5006 SDValue Ptr = Lod->getBasePtr();
5007 if (bestOffset != 0)
5008 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
5009 SDValue NewLoad =
5010 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
5011 Lod->getPointerInfo().getWithOffset(bestOffset),
5012 Lod->getBaseAlign());
5013 SDValue And =
5014 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
5015 DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
5016 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
5017 }
5018 }
5019
5020 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
5021 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
5022 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
5023
5024 // If the comparison constant has bits in the upper part, the
5025 // zero-extended value could never match.
5027 C1.getBitWidth() - InSize))) {
5028 switch (Cond) {
5029 case ISD::SETUGT:
5030 case ISD::SETUGE:
5031 case ISD::SETEQ:
5032 return DAG.getConstant(0, dl, VT);
5033 case ISD::SETULT:
5034 case ISD::SETULE:
5035 case ISD::SETNE:
5036 return DAG.getConstant(1, dl, VT);
5037 case ISD::SETGT:
5038 case ISD::SETGE:
5039 // True if the sign bit of C1 is set.
5040 return DAG.getConstant(C1.isNegative(), dl, VT);
5041 case ISD::SETLT:
5042 case ISD::SETLE:
5043 // True if the sign bit of C1 isn't set.
5044 return DAG.getConstant(C1.isNonNegative(), dl, VT);
5045 default:
5046 break;
5047 }
5048 }
5049
5050 // Otherwise, we can perform the comparison with the low bits.
5051 switch (Cond) {
5052 case ISD::SETEQ:
5053 case ISD::SETNE:
5054 case ISD::SETUGT:
5055 case ISD::SETUGE:
5056 case ISD::SETULT:
5057 case ISD::SETULE: {
5058 EVT newVT = N0.getOperand(0).getValueType();
5059 // FIXME: Should use isNarrowingProfitable.
5060 if (DCI.isBeforeLegalizeOps() ||
5061 (isOperationLegal(ISD::SETCC, newVT) &&
5062 isCondCodeLegal(Cond, newVT.getSimpleVT()) &&
5064 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
5065 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
5066
5067 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
5068 NewConst, Cond);
5069 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
5070 }
5071 break;
5072 }
5073 default:
5074 break; // todo, be more careful with signed comparisons
5075 }
5076 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
5077 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5079 OpVT)) {
5080 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
5081 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
5082 EVT ExtDstTy = N0.getValueType();
5083 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
5084
5085 // If the constant doesn't fit into the number of bits for the source of
5086 // the sign extension, it is impossible for both sides to be equal.
5087 if (C1.getSignificantBits() > ExtSrcTyBits)
5088 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
5089
5090 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
5091 ExtDstTy != ExtSrcTy && "Unexpected types!");
5092 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
5093 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
5094 DAG.getConstant(Imm, dl, ExtDstTy));
5095 if (!DCI.isCalledByLegalizer())
5096 DCI.AddToWorklist(ZextOp.getNode());
5097 // Otherwise, make this a use of a zext.
5098 return DAG.getSetCC(dl, VT, ZextOp,
5099 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
5100 } else if ((N1C->isZero() || N1C->isOne()) &&
5101 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5102 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
5103 // excluded as they are handled below whilst checking for foldBooleans.
5104 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
5105 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
5106 (N0.getValueType() == MVT::i1 ||
5110 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
5111 if (TrueWhenTrue)
5112 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
5113 // Invert the condition.
5114 if (N0.getOpcode() == ISD::SETCC) {
5117 if (DCI.isBeforeLegalizeOps() ||
5119 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
5120 }
5121 }
5122
5123 if ((N0.getOpcode() == ISD::XOR ||
5124 (N0.getOpcode() == ISD::AND &&
5125 N0.getOperand(0).getOpcode() == ISD::XOR &&
5126 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
5127 isOneConstant(N0.getOperand(1))) {
5128 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
5129 // can only do this if the top bits are known zero.
5130 unsigned BitWidth = N0.getValueSizeInBits();
5131 if (DAG.MaskedValueIsZero(N0,
5133 BitWidth-1))) {
5134 // Okay, get the un-inverted input value.
5135 SDValue Val;
5136 if (N0.getOpcode() == ISD::XOR) {
5137 Val = N0.getOperand(0);
5138 } else {
5139 assert(N0.getOpcode() == ISD::AND &&
5140 N0.getOperand(0).getOpcode() == ISD::XOR);
5141 // ((X^1)&1)^1 -> X & 1
5142 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
5143 N0.getOperand(0).getOperand(0),
5144 N0.getOperand(1));
5145 }
5146
5147 return DAG.getSetCC(dl, VT, Val, N1,
5149 }
5150 } else if (N1C->isOne()) {
5151 SDValue Op0 = N0;
5152 if (Op0.getOpcode() == ISD::TRUNCATE)
5153 Op0 = Op0.getOperand(0);
5154
5155 if ((Op0.getOpcode() == ISD::XOR) &&
5156 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
5157 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
5158 SDValue XorLHS = Op0.getOperand(0);
5159 SDValue XorRHS = Op0.getOperand(1);
5160 // Ensure that the input setccs return an i1 type or 0/1 value.
5161 if (Op0.getValueType() == MVT::i1 ||
5166 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
5168 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
5169 }
5170 }
5171 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
5172 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
5173 if (Op0.getValueType().bitsGT(VT))
5174 Op0 = DAG.getNode(ISD::AND, dl, VT,
5175 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
5176 DAG.getConstant(1, dl, VT));
5177 else if (Op0.getValueType().bitsLT(VT))
5178 Op0 = DAG.getNode(ISD::AND, dl, VT,
5179 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
5180 DAG.getConstant(1, dl, VT));
5181
5182 return DAG.getSetCC(dl, VT, Op0,
5183 DAG.getConstant(0, dl, Op0.getValueType()),
5185 }
5186 if (Op0.getOpcode() == ISD::AssertZext &&
5187 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
5188 return DAG.getSetCC(dl, VT, Op0,
5189 DAG.getConstant(0, dl, Op0.getValueType()),
5191 }
5192 }
5193
5194 // Given:
5195 // icmp eq/ne (urem %x, %y), 0
5196 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
5197 // icmp eq/ne %x, 0
5198 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
5199 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5200 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
5201 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
5202 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
5203 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
5204 }
5205
5206 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
5207 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
5208 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5210 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
5211 N1C->isAllOnes()) {
5212 return DAG.getSetCC(dl, VT, N0.getOperand(0),
5213 DAG.getConstant(0, dl, OpVT),
5215 }
5216
5217 // fold (setcc (trunc x) c) -> (setcc x c)
5218 if (N0.getOpcode() == ISD::TRUNCATE &&
5220 (N0->getFlags().hasNoSignedWrap() &&
5223 EVT NewVT = N0.getOperand(0).getValueType();
5224 SDValue NewConst = DAG.getConstant(
5226 ? C1.sext(NewVT.getSizeInBits())
5227 : C1.zext(NewVT.getSizeInBits()),
5228 dl, NewVT);
5229 return DAG.getSetCC(dl, VT, N0.getOperand(0), NewConst, Cond);
5230 }
5231
5232 if (SDValue V =
5233 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
5234 return V;
5235 }
5236
5237 // These simplifications apply to splat vectors as well.
5238 // TODO: Handle more splat vector cases.
5239 if (auto *N1C = isConstOrConstSplat(N1)) {
5240 const APInt &C1 = N1C->getAPIntValue();
5241
5242 APInt MinVal, MaxVal;
5243 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
5245 MinVal = APInt::getSignedMinValue(OperandBitSize);
5246 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
5247 } else {
5248 MinVal = APInt::getMinValue(OperandBitSize);
5249 MaxVal = APInt::getMaxValue(OperandBitSize);
5250 }
5251
5252 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
5253 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
5254 // X >= MIN --> true
5255 if (C1 == MinVal)
5256 return DAG.getBoolConstant(true, dl, VT, OpVT);
5257
5258 if (!VT.isVector()) { // TODO: Support this for vectors.
5259 // X >= C0 --> X > (C0 - 1)
5260 APInt C = C1 - 1;
5262 if ((DCI.isBeforeLegalizeOps() ||
5263 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5264 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5265 isLegalICmpImmediate(C.getSExtValue())))) {
5266 return DAG.getSetCC(dl, VT, N0,
5267 DAG.getConstant(C, dl, N1.getValueType()),
5268 NewCC);
5269 }
5270 }
5271 }
5272
5273 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
5274 // X <= MAX --> true
5275 if (C1 == MaxVal)
5276 return DAG.getBoolConstant(true, dl, VT, OpVT);
5277
5278 // X <= C0 --> X < (C0 + 1)
5279 if (!VT.isVector()) { // TODO: Support this for vectors.
5280 APInt C = C1 + 1;
5282 if ((DCI.isBeforeLegalizeOps() ||
5283 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5284 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5285 isLegalICmpImmediate(C.getSExtValue())))) {
5286 return DAG.getSetCC(dl, VT, N0,
5287 DAG.getConstant(C, dl, N1.getValueType()),
5288 NewCC);
5289 }
5290 }
5291 }
5292
5293 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
5294 if (C1 == MinVal)
5295 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
5296
5297 // TODO: Support this for vectors after legalize ops.
5298 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5299 // Canonicalize setlt X, Max --> setne X, Max
5300 if (C1 == MaxVal)
5301 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5302
5303 // If we have setult X, 1, turn it into seteq X, 0
5304 if (C1 == MinVal+1)
5305 return DAG.getSetCC(dl, VT, N0,
5306 DAG.getConstant(MinVal, dl, N0.getValueType()),
5307 ISD::SETEQ);
5308 }
5309 }
5310
5311 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5312 if (C1 == MaxVal)
5313 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
5314
5315 // TODO: Support this for vectors after legalize ops.
5316 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5317 // Canonicalize setgt X, Min --> setne X, Min
5318 if (C1 == MinVal)
5319 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5320
5321 // If we have setugt X, Max-1, turn it into seteq X, Max
5322 if (C1 == MaxVal-1)
5323 return DAG.getSetCC(dl, VT, N0,
5324 DAG.getConstant(MaxVal, dl, N0.getValueType()),
5325 ISD::SETEQ);
5326 }
5327 }
5328
5329 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5330 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5331 if (C1.isZero())
5332 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5333 VT, N0, N1, Cond, DCI, dl))
5334 return CC;
5335
5336 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5337 // For example, when high 32-bits of i64 X are known clear:
5338 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5339 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5340 bool CmpZero = N1C->isZero();
5341 bool CmpNegOne = N1C->isAllOnes();
5342 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5343 // Match or(lo,shl(hi,bw/2)) pattern.
5344 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5345 unsigned EltBits = V.getScalarValueSizeInBits();
5346 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5347 return false;
5348 SDValue LHS = V.getOperand(0);
5349 SDValue RHS = V.getOperand(1);
5350 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5351 // Unshifted element must have zero upperbits.
5352 if (RHS.getOpcode() == ISD::SHL &&
5353 isa<ConstantSDNode>(RHS.getOperand(1)) &&
5354 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5355 DAG.MaskedValueIsZero(LHS, HiBits)) {
5356 Lo = LHS;
5357 Hi = RHS.getOperand(0);
5358 return true;
5359 }
5360 if (LHS.getOpcode() == ISD::SHL &&
5361 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5362 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5363 DAG.MaskedValueIsZero(RHS, HiBits)) {
5364 Lo = RHS;
5365 Hi = LHS.getOperand(0);
5366 return true;
5367 }
5368 return false;
5369 };
5370
5371 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5372 unsigned EltBits = N0.getScalarValueSizeInBits();
5373 unsigned HalfBits = EltBits / 2;
5374 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5375 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5376 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5377 SDValue NewN0 =
5378 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5379 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5380 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5381 };
5382
5383 SDValue Lo, Hi;
5384 if (IsConcat(N0, Lo, Hi))
5385 return MergeConcat(Lo, Hi);
5386
5387 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5388 SDValue Lo0, Lo1, Hi0, Hi1;
5389 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5390 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5391 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5392 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5393 }
5394 }
5395 }
5396 }
5397
5398 // If we have "setcc X, C0", check to see if we can shrink the immediate
5399 // by changing cc.
5400 // TODO: Support this for vectors after legalize ops.
5401 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5402 // SETUGT X, SINTMAX -> SETLT X, 0
5403 // SETUGE X, SINTMIN -> SETLT X, 0
5404 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5405 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5406 return DAG.getSetCC(dl, VT, N0,
5407 DAG.getConstant(0, dl, N1.getValueType()),
5408 ISD::SETLT);
5409
5410 // SETULT X, SINTMIN -> SETGT X, -1
5411 // SETULE X, SINTMAX -> SETGT X, -1
5412 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5413 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5414 return DAG.getSetCC(dl, VT, N0,
5415 DAG.getAllOnesConstant(dl, N1.getValueType()),
5416 ISD::SETGT);
5417 }
5418 }
5419
5420 // Back to non-vector simplifications.
5421 // TODO: Can we do these for vector splats?
5422 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5423 const APInt &C1 = N1C->getAPIntValue();
5424 EVT ShValTy = N0.getValueType();
5425
5426 // Fold bit comparisons when we can. This will result in an
5427 // incorrect value when boolean false is negative one, unless
5428 // the bitsize is 1 in which case the false value is the same
5429 // in practice regardless of the representation.
5430 if ((VT.getSizeInBits() == 1 ||
5432 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5433 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5434 N0.getOpcode() == ISD::AND) {
5435 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5436 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5437 // Perform the xform if the AND RHS is a single bit.
5438 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5439 if (AndRHS->getAPIntValue().isPowerOf2() &&
5440 !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5441 return DAG.getNode(
5442 ISD::TRUNCATE, dl, VT,
5443 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5444 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5445 }
5446 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5447 // (X & 8) == 8 --> (X & 8) >> 3
5448 // Perform the xform if C1 is a single bit.
5449 unsigned ShCt = C1.logBase2();
5450 if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5451 return DAG.getNode(
5452 ISD::TRUNCATE, dl, VT,
5453 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5454 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5455 }
5456 }
5457 }
5458 }
5459
5460 if (C1.getSignificantBits() <= 64 &&
5462 // (X & -256) == 256 -> (X >> 8) == 1
5463 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5464 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5465 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5466 const APInt &AndRHSC = AndRHS->getAPIntValue();
5467 if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5468 unsigned ShiftBits = AndRHSC.countr_zero();
5469 if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5470 // If using an unsigned shift doesn't yield a legal compare
5471 // immediate, try using sra instead.
5472 APInt NewC = C1.lshr(ShiftBits);
5473 if (NewC.getSignificantBits() <= 64 &&
5475 APInt SignedC = C1.ashr(ShiftBits);
5476 if (SignedC.getSignificantBits() <= 64 &&
5478 SDValue Shift = DAG.getNode(
5479 ISD::SRA, dl, ShValTy, N0.getOperand(0),
5480 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5481 SDValue CmpRHS = DAG.getConstant(SignedC, dl, ShValTy);
5482 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5483 }
5484 }
5485 SDValue Shift = DAG.getNode(
5486 ISD::SRL, dl, ShValTy, N0.getOperand(0),
5487 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5488 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5489 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5490 }
5491 }
5492 }
5493 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5494 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5495 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5496 // X < 0x100000000 -> (X >> 32) < 1
5497 // X >= 0x100000000 -> (X >> 32) >= 1
5498 // X <= 0x0ffffffff -> (X >> 32) < 1
5499 // X > 0x0ffffffff -> (X >> 32) >= 1
5500 unsigned ShiftBits;
5501 APInt NewC = C1;
5502 ISD::CondCode NewCond = Cond;
5503 if (AdjOne) {
5504 ShiftBits = C1.countr_one();
5505 NewC = NewC + 1;
5506 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5507 } else {
5508 ShiftBits = C1.countr_zero();
5509 }
5510 NewC.lshrInPlace(ShiftBits);
5511 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5513 !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5514 SDValue Shift =
5515 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5516 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5517 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5518 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5519 }
5520 }
5521 }
5522 }
5523
5525 auto *CFP = cast<ConstantFPSDNode>(N1);
5526 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5527
5528 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5529 // constant if knowing that the operand is non-nan is enough. We prefer to
5530 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5531 // materialize 0.0.
5532 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5533 return DAG.getSetCC(dl, VT, N0, N0, Cond);
5534
5535 // setcc (fneg x), C -> setcc swap(pred) x, -C
5536 if (N0.getOpcode() == ISD::FNEG) {
5538 if (DCI.isBeforeLegalizeOps() ||
5539 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5540 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5541 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5542 }
5543 }
5544
5545 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5547 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5548 bool IsFabs = N0.getOpcode() == ISD::FABS;
5549 SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5550 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5551 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5552 : (IsFabs ? fcInf : fcPosInf);
5553 if (Cond == ISD::SETUEQ)
5554 Flag |= fcNan;
5555 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5556 DAG.getTargetConstant(Flag, dl, MVT::i32));
5557 }
5558 }
5559
5560 // If the condition is not legal, see if we can find an equivalent one
5561 // which is legal.
5563 // If the comparison was an awkward floating-point == or != and one of
5564 // the comparison operands is infinity or negative infinity, convert the
5565 // condition to a less-awkward <= or >=.
5566 if (CFP->getValueAPF().isInfinity()) {
5567 bool IsNegInf = CFP->getValueAPF().isNegative();
5569 switch (Cond) {
5570 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5571 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5572 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5573 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5574 default: break;
5575 }
5576 if (NewCond != ISD::SETCC_INVALID &&
5577 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5578 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5579 }
5580 }
5581 }
5582
5583 if (N0 == N1) {
5584 // The sext(setcc()) => setcc() optimization relies on the appropriate
5585 // constant being emitted.
5586 assert(!N0.getValueType().isInteger() &&
5587 "Integer types should be handled by FoldSetCC");
5588
5589 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5590 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5591 if (UOF == 2) // FP operators that are undefined on NaNs.
5592 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5593 if (UOF == unsigned(EqTrue))
5594 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5595 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5596 // if it is not already.
5597 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5598 if (NewCond != Cond &&
5599 (DCI.isBeforeLegalizeOps() ||
5600 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5601 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5602 }
5603
5604 // ~X > ~Y --> Y > X
5605 // ~X < ~Y --> Y < X
5606 // ~X < C --> X > ~C
5607 // ~X > C --> X < ~C
5608 if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5609 N0.getValueType().isInteger()) {
5610 if (isBitwiseNot(N0)) {
5611 if (isBitwiseNot(N1))
5612 return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5613
5616 SDValue Not = DAG.getNOT(dl, N1, OpVT);
5617 return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5618 }
5619 }
5620 }
5621
5622 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5623 N0.getValueType().isInteger()) {
5624 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5625 N0.getOpcode() == ISD::XOR) {
5626 // Simplify (X+Y) == (X+Z) --> Y == Z
5627 if (N0.getOpcode() == N1.getOpcode()) {
5628 if (N0.getOperand(0) == N1.getOperand(0))
5629 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5630 if (N0.getOperand(1) == N1.getOperand(1))
5631 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5632 if (isCommutativeBinOp(N0.getOpcode())) {
5633 // If X op Y == Y op X, try other combinations.
5634 if (N0.getOperand(0) == N1.getOperand(1))
5635 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5636 Cond);
5637 if (N0.getOperand(1) == N1.getOperand(0))
5638 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5639 Cond);
5640 }
5641 }
5642
5643 // If RHS is a legal immediate value for a compare instruction, we need
5644 // to be careful about increasing register pressure needlessly.
5645 bool LegalRHSImm = false;
5646
5647 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5648 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5649 // Turn (X+C1) == C2 --> X == C2-C1
5650 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5651 return DAG.getSetCC(
5652 dl, VT, N0.getOperand(0),
5653 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5654 dl, N0.getValueType()),
5655 Cond);
5656
5657 // Turn (X^C1) == C2 --> X == C1^C2
5658 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5659 return DAG.getSetCC(
5660 dl, VT, N0.getOperand(0),
5661 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5662 dl, N0.getValueType()),
5663 Cond);
5664 }
5665
5666 // Turn (C1-X) == C2 --> X == C1-C2
5667 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5668 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5669 return DAG.getSetCC(
5670 dl, VT, N0.getOperand(1),
5671 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5672 dl, N0.getValueType()),
5673 Cond);
5674
5675 // Could RHSC fold directly into a compare?
5676 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5677 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5678 }
5679
5680 // (X+Y) == X --> Y == 0 and similar folds.
5681 // Don't do this if X is an immediate that can fold into a cmp
5682 // instruction and X+Y has other uses. It could be an induction variable
5683 // chain, and the transform would increase register pressure.
5684 if (!LegalRHSImm || N0.hasOneUse())
5685 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5686 return V;
5687 }
5688
5689 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5690 N1.getOpcode() == ISD::XOR)
5691 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5692 return V;
5693
5694 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5695 return V;
5696
5697 if (SDValue V = foldSetCCWithOr(VT, N0, N1, Cond, dl, DCI))
5698 return V;
5699 }
5700
5701 // Fold remainder of division by a constant.
5702 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5703 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5704 // When division is cheap or optimizing for minimum size,
5705 // fall through to DIVREM creation by skipping this fold.
5706 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5707 if (N0.getOpcode() == ISD::UREM) {
5708 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5709 return Folded;
5710 } else if (N0.getOpcode() == ISD::SREM) {
5711 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5712 return Folded;
5713 }
5714 }
5715 }
5716
5717 // Fold away ALL boolean setcc's.
5718 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5719 SDValue Temp;
5720 switch (Cond) {
5721 default: llvm_unreachable("Unknown integer setcc!");
5722 case ISD::SETEQ: // X == Y -> ~(X^Y)
5723 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5724 N0 = DAG.getNOT(dl, Temp, OpVT);
5725 if (!DCI.isCalledByLegalizer())
5726 DCI.AddToWorklist(Temp.getNode());
5727 break;
5728 case ISD::SETNE: // X != Y --> (X^Y)
5729 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5730 break;
5731 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5732 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5733 Temp = DAG.getNOT(dl, N0, OpVT);
5734 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5735 if (!DCI.isCalledByLegalizer())
5736 DCI.AddToWorklist(Temp.getNode());
5737 break;
5738 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5739 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5740 Temp = DAG.getNOT(dl, N1, OpVT);
5741 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5742 if (!DCI.isCalledByLegalizer())
5743 DCI.AddToWorklist(Temp.getNode());
5744 break;
5745 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5746 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5747 Temp = DAG.getNOT(dl, N0, OpVT);
5748 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5749 if (!DCI.isCalledByLegalizer())
5750 DCI.AddToWorklist(Temp.getNode());
5751 break;
5752 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5753 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5754 Temp = DAG.getNOT(dl, N1, OpVT);
5755 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5756 break;
5757 }
5758 if (VT.getScalarType() != MVT::i1) {
5759 if (!DCI.isCalledByLegalizer())
5760 DCI.AddToWorklist(N0.getNode());
5761 // FIXME: If running after legalize, we probably can't do this.
5763 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5764 }
5765 return N0;
5766 }
5767
5768 // Fold (setcc (trunc x) (trunc y)) -> (setcc x y)
5769 if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE &&
5770 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
5772 N1->getFlags().hasNoUnsignedWrap()) ||
5774 N1->getFlags().hasNoSignedWrap())) &&
5776 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5777 }
5778
5779 // Fold (setcc (sub nsw a, b), zero, s??) -> (setcc a, b, s??)
5780 // TODO: Remove that .isVector() check
5781 if (VT.isVector() && isZeroOrZeroSplat(N1) && N0.getOpcode() == ISD::SUB &&
5783 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), Cond);
5784 }
5785
5786 // Could not fold it.
5787 return SDValue();
5788}
5789
5790/// Returns true (and the GlobalValue and the offset) if the node is a
5791/// GlobalAddress + offset.
5793 int64_t &Offset) const {
5794
5795 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5796
5797 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5798 GA = GASD->getGlobal();
5799 Offset += GASD->getOffset();
5800 return true;
5801 }
5802
5803 if (N->isAnyAdd()) {
5804 SDValue N1 = N->getOperand(0);
5805 SDValue N2 = N->getOperand(1);
5806 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5807 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5808 Offset += V->getSExtValue();
5809 return true;
5810 }
5811 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5812 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5813 Offset += V->getSExtValue();
5814 return true;
5815 }
5816 }
5817 }
5818
5819 return false;
5820}
5821
5823 DAGCombinerInfo &DCI) const {
5824 // Default implementation: no optimization.
5825 return SDValue();
5826}
5827
5828//===----------------------------------------------------------------------===//
5829// Inline Assembler Implementation Methods
5830//===----------------------------------------------------------------------===//
5831
5834 unsigned S = Constraint.size();
5835
5836 if (S == 1) {
5837 switch (Constraint[0]) {
5838 default: break;
5839 case 'r':
5840 return C_RegisterClass;
5841 case 'm': // memory
5842 case 'o': // offsetable
5843 case 'V': // not offsetable
5844 return C_Memory;
5845 case 'p': // Address.
5846 return C_Address;
5847 case 'n': // Simple Integer
5848 case 'E': // Floating Point Constant
5849 case 'F': // Floating Point Constant
5850 return C_Immediate;
5851 case 'i': // Simple Integer or Relocatable Constant
5852 case 's': // Relocatable Constant
5853 case 'X': // Allow ANY value.
5854 case 'I': // Target registers.
5855 case 'J':
5856 case 'K':
5857 case 'L':
5858 case 'M':
5859 case 'N':
5860 case 'O':
5861 case 'P':
5862 case '<':
5863 case '>':
5864 return C_Other;
5865 }
5866 }
5867
5868 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5869 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5870 return C_Memory;
5871 return C_Register;
5872 }
5873 return C_Unknown;
5874}
5875
5876/// Try to replace an X constraint, which matches anything, with another that
5877/// has more specific requirements based on the type of the corresponding
5878/// operand.
5879const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5880 if (ConstraintVT.isInteger())
5881 return "r";
5882 if (ConstraintVT.isFloatingPoint())
5883 return "f"; // works for many targets
5884 return nullptr;
5885}
5886
5888 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5889 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5890 return SDValue();
5891}
5892
5893/// Lower the specified operand into the Ops vector.
5894/// If it is invalid, don't add anything to Ops.
5896 StringRef Constraint,
5897 std::vector<SDValue> &Ops,
5898 SelectionDAG &DAG) const {
5899
5900 if (Constraint.size() > 1)
5901 return;
5902
5903 char ConstraintLetter = Constraint[0];
5904 switch (ConstraintLetter) {
5905 default: break;
5906 case 'X': // Allows any operand
5907 case 'i': // Simple Integer or Relocatable Constant
5908 case 'n': // Simple Integer
5909 case 's': { // Relocatable Constant
5910
5912 uint64_t Offset = 0;
5913
5914 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5915 // etc., since getelementpointer is variadic. We can't use
5916 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5917 // while in this case the GA may be furthest from the root node which is
5918 // likely an ISD::ADD.
5919 while (true) {
5920 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5921 // gcc prints these as sign extended. Sign extend value to 64 bits
5922 // now; without this it would get ZExt'd later in
5923 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5924 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5925 BooleanContent BCont = getBooleanContents(MVT::i64);
5926 ISD::NodeType ExtOpc =
5927 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5928 int64_t ExtVal =
5929 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5930 Ops.push_back(
5931 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5932 return;
5933 }
5934 if (ConstraintLetter != 'n') {
5935 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5936 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5937 GA->getValueType(0),
5938 Offset + GA->getOffset()));
5939 return;
5940 }
5941 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5942 Ops.push_back(DAG.getTargetBlockAddress(
5943 BA->getBlockAddress(), BA->getValueType(0),
5944 Offset + BA->getOffset(), BA->getTargetFlags()));
5945 return;
5946 }
5948 Ops.push_back(Op);
5949 return;
5950 }
5951 }
5952 const unsigned OpCode = Op.getOpcode();
5953 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5954 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5955 Op = Op.getOperand(1);
5956 // Subtraction is not commutative.
5957 else if (OpCode == ISD::ADD &&
5958 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5959 Op = Op.getOperand(0);
5960 else
5961 return;
5962 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5963 continue;
5964 }
5965 return;
5966 }
5967 break;
5968 }
5969 }
5970}
5971
5975
5976std::pair<unsigned, const TargetRegisterClass *>
5978 StringRef Constraint,
5979 MVT VT) const {
5980 if (!Constraint.starts_with("{"))
5981 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5982 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5983
5984 // Remove the braces from around the name.
5985 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5986
5987 std::pair<unsigned, const TargetRegisterClass *> R =
5988 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5989
5990 // Figure out which register class contains this reg.
5991 for (const TargetRegisterClass *RC : RI->regclasses()) {
5992 // If none of the value types for this register class are valid, we
5993 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5994 if (!isLegalRC(*RI, *RC))
5995 continue;
5996
5997 for (const MCPhysReg &PR : *RC) {
5998 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5999 std::pair<unsigned, const TargetRegisterClass *> S =
6000 std::make_pair(PR, RC);
6001
6002 // If this register class has the requested value type, return it,
6003 // otherwise keep searching and return the first class found
6004 // if no other is found which explicitly has the requested type.
6005 if (RI->isTypeLegalForClass(*RC, VT))
6006 return S;
6007 if (!R.second)
6008 R = S;
6009 }
6010 }
6011 }
6012
6013 return R;
6014}
6015
6016//===----------------------------------------------------------------------===//
6017// Constraint Selection.
6018
6019/// Return true of this is an input operand that is a matching constraint like
6020/// "4".
6022 assert(!ConstraintCode.empty() && "No known constraint!");
6023 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
6024}
6025
6026/// If this is an input matching constraint, this method returns the output
6027/// operand it matches.
6029 assert(!ConstraintCode.empty() && "No known constraint!");
6030 return atoi(ConstraintCode.c_str());
6031}
6032
6033/// Split up the constraint string from the inline assembly value into the
6034/// specific constraints and their prefixes, and also tie in the associated
6035/// operand values.
6036/// If this returns an empty vector, and if the constraint string itself
6037/// isn't empty, there was an error parsing.
6040 const TargetRegisterInfo *TRI,
6041 const CallBase &Call) const {
6042 /// Information about all of the constraints.
6043 AsmOperandInfoVector ConstraintOperands;
6044 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
6045 unsigned maCount = 0; // Largest number of multiple alternative constraints.
6046
6047 // Do a prepass over the constraints, canonicalizing them, and building up the
6048 // ConstraintOperands list.
6049 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
6050 unsigned ResNo = 0; // ResNo - The result number of the next output.
6051 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
6052
6053 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
6054 ConstraintOperands.emplace_back(std::move(CI));
6055 AsmOperandInfo &OpInfo = ConstraintOperands.back();
6056
6057 // Update multiple alternative constraint count.
6058 if (OpInfo.multipleAlternatives.size() > maCount)
6059 maCount = OpInfo.multipleAlternatives.size();
6060
6061 OpInfo.ConstraintVT = MVT::Other;
6062
6063 // Compute the value type for each operand.
6064 switch (OpInfo.Type) {
6065 case InlineAsm::isOutput: {
6066 // Indirect outputs just consume an argument.
6067 if (OpInfo.isIndirect) {
6068 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
6069 break;
6070 }
6071
6072 // The return value of the call is this value. As such, there is no
6073 // corresponding argument.
6074 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
6075 EVT VT;
6076 if (auto *STy = dyn_cast<StructType>(Call.getType())) {
6077 VT = getAsmOperandValueType(DL, STy->getElementType(ResNo));
6078 } else {
6079 assert(ResNo == 0 && "Asm only has one result!");
6080 VT = getAsmOperandValueType(DL, Call.getType());
6081 }
6082 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6083 ++ResNo;
6084 break;
6085 }
6086 case InlineAsm::isInput:
6087 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
6088 break;
6089 case InlineAsm::isLabel:
6090 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
6091 ++LabelNo;
6092 continue;
6094 // Nothing to do.
6095 break;
6096 }
6097
6098 if (OpInfo.CallOperandVal) {
6099 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
6100 if (OpInfo.isIndirect) {
6101 OpTy = Call.getParamElementType(ArgNo);
6102 assert(OpTy && "Indirect operand must have elementtype attribute");
6103 }
6104
6105 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
6106 if (StructType *STy = dyn_cast<StructType>(OpTy))
6107 if (STy->getNumElements() == 1)
6108 OpTy = STy->getElementType(0);
6109
6110 // If OpTy is not a single value, it may be a struct/union that we
6111 // can tile with integers.
6112 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
6113 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
6114 switch (BitSize) {
6115 default: break;
6116 case 1:
6117 case 8:
6118 case 16:
6119 case 32:
6120 case 64:
6121 case 128:
6122 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
6123 break;
6124 }
6125 }
6126
6127 EVT VT = getAsmOperandValueType(DL, OpTy, true);
6128 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6129 ArgNo++;
6130 }
6131 }
6132
6133 // If we have multiple alternative constraints, select the best alternative.
6134 if (!ConstraintOperands.empty()) {
6135 if (maCount) {
6136 unsigned bestMAIndex = 0;
6137 int bestWeight = -1;
6138 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
6139 int weight = -1;
6140 unsigned maIndex;
6141 // Compute the sums of the weights for each alternative, keeping track
6142 // of the best (highest weight) one so far.
6143 for (maIndex = 0; maIndex < maCount; ++maIndex) {
6144 int weightSum = 0;
6145 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6146 cIndex != eIndex; ++cIndex) {
6147 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6148 if (OpInfo.Type == InlineAsm::isClobber)
6149 continue;
6150
6151 // If this is an output operand with a matching input operand,
6152 // look up the matching input. If their types mismatch, e.g. one
6153 // is an integer, the other is floating point, or their sizes are
6154 // different, flag it as an maCantMatch.
6155 if (OpInfo.hasMatchingInput()) {
6156 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6157 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6158 if ((OpInfo.ConstraintVT.isInteger() !=
6159 Input.ConstraintVT.isInteger()) ||
6160 (OpInfo.ConstraintVT.getSizeInBits() !=
6161 Input.ConstraintVT.getSizeInBits())) {
6162 weightSum = -1; // Can't match.
6163 break;
6164 }
6165 }
6166 }
6167 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
6168 if (weight == -1) {
6169 weightSum = -1;
6170 break;
6171 }
6172 weightSum += weight;
6173 }
6174 // Update best.
6175 if (weightSum > bestWeight) {
6176 bestWeight = weightSum;
6177 bestMAIndex = maIndex;
6178 }
6179 }
6180
6181 // Now select chosen alternative in each constraint.
6182 for (AsmOperandInfo &cInfo : ConstraintOperands)
6183 if (cInfo.Type != InlineAsm::isClobber)
6184 cInfo.selectAlternative(bestMAIndex);
6185 }
6186 }
6187
6188 // Check and hook up tied operands, choose constraint code to use.
6189 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6190 cIndex != eIndex; ++cIndex) {
6191 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6192
6193 // If this is an output operand with a matching input operand, look up the
6194 // matching input. If their types mismatch, e.g. one is an integer, the
6195 // other is floating point, or their sizes are different, flag it as an
6196 // error.
6197 if (OpInfo.hasMatchingInput()) {
6198 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6199
6200 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6201 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
6202 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
6203 OpInfo.ConstraintVT);
6204 std::pair<unsigned, const TargetRegisterClass *> InputRC =
6205 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
6206 Input.ConstraintVT);
6207 const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
6208 OpInfo.ConstraintVT.isFloatingPoint();
6209 const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
6210 Input.ConstraintVT.isFloatingPoint();
6211 if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
6212 (MatchRC.second != InputRC.second)) {
6213 report_fatal_error("Unsupported asm: input constraint"
6214 " with a matching output constraint of"
6215 " incompatible type!");
6216 }
6217 }
6218 }
6219 }
6220
6221 return ConstraintOperands;
6222}
6223
6224/// Return a number indicating our preference for chosing a type of constraint
6225/// over another, for the purpose of sorting them. Immediates are almost always
6226/// preferrable (when they can be emitted). A higher return value means a
6227/// stronger preference for one constraint type relative to another.
6228/// FIXME: We should prefer registers over memory but doing so may lead to
6229/// unrecoverable register exhaustion later.
6230/// https://github.com/llvm/llvm-project/issues/20571
6232 switch (CT) {
6235 return 4;
6238 return 3;
6240 return 2;
6242 return 1;
6244 return 0;
6245 }
6246 llvm_unreachable("Invalid constraint type");
6247}
6248
6249/// Examine constraint type and operand type and determine a weight value.
6250/// This object must already have been set up with the operand type
6251/// and the current alternative constraint selected.
6254 AsmOperandInfo &info, int maIndex) const {
6256 if (maIndex >= (int)info.multipleAlternatives.size())
6257 rCodes = &info.Codes;
6258 else
6259 rCodes = &info.multipleAlternatives[maIndex].Codes;
6260 ConstraintWeight BestWeight = CW_Invalid;
6261
6262 // Loop over the options, keeping track of the most general one.
6263 for (const std::string &rCode : *rCodes) {
6264 ConstraintWeight weight =
6265 getSingleConstraintMatchWeight(info, rCode.c_str());
6266 if (weight > BestWeight)
6267 BestWeight = weight;
6268 }
6269
6270 return BestWeight;
6271}
6272
6273/// Examine constraint type and operand type and determine a weight value.
6274/// This object must already have been set up with the operand type
6275/// and the current alternative constraint selected.
6278 AsmOperandInfo &info, const char *constraint) const {
6280 Value *CallOperandVal = info.CallOperandVal;
6281 // If we don't have a value, we can't do a match,
6282 // but allow it at the lowest weight.
6283 if (!CallOperandVal)
6284 return CW_Default;
6285 // Look at the constraint type.
6286 switch (*constraint) {
6287 case 'i': // immediate integer.
6288 case 'n': // immediate integer with a known value.
6289 if (isa<ConstantInt>(CallOperandVal))
6290 weight = CW_Constant;
6291 break;
6292 case 's': // non-explicit intregal immediate.
6293 if (isa<GlobalValue>(CallOperandVal))
6294 weight = CW_Constant;
6295 break;
6296 case 'E': // immediate float if host format.
6297 case 'F': // immediate float.
6298 if (isa<ConstantFP>(CallOperandVal))
6299 weight = CW_Constant;
6300 break;
6301 case '<': // memory operand with autodecrement.
6302 case '>': // memory operand with autoincrement.
6303 case 'm': // memory operand.
6304 case 'o': // offsettable memory operand
6305 case 'V': // non-offsettable memory operand
6306 weight = CW_Memory;
6307 break;
6308 case 'r': // general register.
6309 case 'g': // general register, memory operand or immediate integer.
6310 // note: Clang converts "g" to "imr".
6311 if (CallOperandVal->getType()->isIntegerTy())
6312 weight = CW_Register;
6313 break;
6314 case 'X': // any operand.
6315 default:
6316 weight = CW_Default;
6317 break;
6318 }
6319 return weight;
6320}
6321
6322/// If there are multiple different constraints that we could pick for this
6323/// operand (e.g. "imr") try to pick the 'best' one.
6324/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6325/// into seven classes:
6326/// Register -> one specific register
6327/// RegisterClass -> a group of regs
6328/// Memory -> memory
6329/// Address -> a symbolic memory reference
6330/// Immediate -> immediate values
6331/// Other -> magic values (such as "Flag Output Operands")
6332/// Unknown -> something we don't recognize yet and can't handle
6333/// Ideally, we would pick the most specific constraint possible: if we have
6334/// something that fits into a register, we would pick it. The problem here
6335/// is that if we have something that could either be in a register or in
6336/// memory that use of the register could cause selection of *other*
6337/// operands to fail: they might only succeed if we pick memory. Because of
6338/// this the heuristic we use is:
6339///
6340/// 1) If there is an 'other' constraint, and if the operand is valid for
6341/// that constraint, use it. This makes us take advantage of 'i'
6342/// constraints when available.
6343/// 2) Otherwise, pick the most general constraint present. This prefers
6344/// 'm' over 'r', for example.
6345///
6347 TargetLowering::AsmOperandInfo &OpInfo) const {
6348 ConstraintGroup Ret;
6349
6350 Ret.reserve(OpInfo.Codes.size());
6351 for (StringRef Code : OpInfo.Codes) {
6353
6354 // Indirect 'other' or 'immediate' constraints are not allowed.
6355 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6356 CType == TargetLowering::C_Register ||
6358 continue;
6359
6360 // Things with matching constraints can only be registers, per gcc
6361 // documentation. This mainly affects "g" constraints.
6362 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6363 continue;
6364
6365 Ret.emplace_back(Code, CType);
6366 }
6367
6369 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6370 });
6371
6372 return Ret;
6373}
6374
6375/// If we have an immediate, see if we can lower it. Return true if we can,
6376/// false otherwise.
6378 SDValue Op, SelectionDAG *DAG,
6379 const TargetLowering &TLI) {
6380
6381 assert((P.second == TargetLowering::C_Other ||
6382 P.second == TargetLowering::C_Immediate) &&
6383 "need immediate or other");
6384
6385 if (!Op.getNode())
6386 return false;
6387
6388 std::vector<SDValue> ResultOps;
6389 TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6390 return !ResultOps.empty();
6391}
6392
6393/// Determines the constraint code and constraint type to use for the specific
6394/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6396 SDValue Op,
6397 SelectionDAG *DAG) const {
6398 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6399
6400 // Single-letter constraints ('r') are very common.
6401 if (OpInfo.Codes.size() == 1) {
6402 OpInfo.ConstraintCode = OpInfo.Codes[0];
6403 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6404 } else {
6406 if (G.empty())
6407 return;
6408
6409 unsigned BestIdx = 0;
6410 for (const unsigned E = G.size();
6411 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6412 G[BestIdx].second == TargetLowering::C_Immediate);
6413 ++BestIdx) {
6414 if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6415 break;
6416 // If we're out of constraints, just pick the first one.
6417 if (BestIdx + 1 == E) {
6418 BestIdx = 0;
6419 break;
6420 }
6421 }
6422
6423 OpInfo.ConstraintCode = G[BestIdx].first;
6424 OpInfo.ConstraintType = G[BestIdx].second;
6425 }
6426
6427 // 'X' matches anything.
6428 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6429 // Constants are handled elsewhere. For Functions, the type here is the
6430 // type of the result, which is not what we want to look at; leave them
6431 // alone.
6432 Value *v = OpInfo.CallOperandVal;
6433 if (isa<ConstantInt>(v) || isa<Function>(v)) {
6434 return;
6435 }
6436
6437 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6438 OpInfo.ConstraintCode = "i";
6439 return;
6440 }
6441
6442 // Otherwise, try to resolve it to something we know about by looking at
6443 // the actual operand type.
6444 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6445 OpInfo.ConstraintCode = Repl;
6446 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6447 }
6448 }
6449}
6450
6451/// Given an exact SDIV by a constant, create a multiplication
6452/// with the multiplicative inverse of the constant.
6453/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6455 const SDLoc &dl, SelectionDAG &DAG,
6456 SmallVectorImpl<SDNode *> &Created) {
6457 SDValue Op0 = N->getOperand(0);
6458 SDValue Op1 = N->getOperand(1);
6459 EVT VT = N->getValueType(0);
6460 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6461 EVT ShSVT = ShVT.getScalarType();
6462
6463 bool UseSRA = false;
6464 SmallVector<SDValue, 16> Shifts, Factors;
6465
6466 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6467 if (C->isZero())
6468 return false;
6469
6470 EVT CT = C->getValueType(0);
6471 APInt Divisor = C->getAPIntValue();
6472 unsigned Shift = Divisor.countr_zero();
6473 if (Shift) {
6474 Divisor.ashrInPlace(Shift);
6475 UseSRA = true;
6476 }
6477 APInt Factor = Divisor.multiplicativeInverse();
6478 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6479 Factors.push_back(DAG.getConstant(Factor, dl, CT));
6480 return true;
6481 };
6482
6483 // Collect all magic values from the build vector.
6484 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6485 return SDValue();
6486
6487 SDValue Shift, Factor;
6488 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6489 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6490 Factor = DAG.getBuildVector(VT, dl, Factors);
6491 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6492 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6493 "Expected matchUnaryPredicate to return one element for scalable "
6494 "vectors");
6495 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6496 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6497 } else {
6498 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6499 Shift = Shifts[0];
6500 Factor = Factors[0];
6501 }
6502
6503 SDValue Res = Op0;
6504 if (UseSRA) {
6505 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, SDNodeFlags::Exact);
6506 Created.push_back(Res.getNode());
6507 }
6508
6509 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6510}
6511
6512/// Given an exact UDIV by a constant, create a multiplication
6513/// with the multiplicative inverse of the constant.
6514/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6516 const SDLoc &dl, SelectionDAG &DAG,
6517 SmallVectorImpl<SDNode *> &Created) {
6518 EVT VT = N->getValueType(0);
6519 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6520 EVT ShSVT = ShVT.getScalarType();
6521
6522 bool UseSRL = false;
6523 SmallVector<SDValue, 16> Shifts, Factors;
6524
6525 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6526 if (C->isZero())
6527 return false;
6528
6529 EVT CT = C->getValueType(0);
6530 APInt Divisor = C->getAPIntValue();
6531 unsigned Shift = Divisor.countr_zero();
6532 if (Shift) {
6533 Divisor.lshrInPlace(Shift);
6534 UseSRL = true;
6535 }
6536 // Calculate the multiplicative inverse modulo BW.
6537 APInt Factor = Divisor.multiplicativeInverse();
6538 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6539 Factors.push_back(DAG.getConstant(Factor, dl, CT));
6540 return true;
6541 };
6542
6543 SDValue Op1 = N->getOperand(1);
6544
6545 // Collect all magic values from the build vector.
6546 if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6547 return SDValue();
6548
6549 SDValue Shift, Factor;
6550 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6551 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6552 Factor = DAG.getBuildVector(VT, dl, Factors);
6553 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6554 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6555 "Expected matchUnaryPredicate to return one element for scalable "
6556 "vectors");
6557 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6558 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6559 } else {
6560 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6561 Shift = Shifts[0];
6562 Factor = Factors[0];
6563 }
6564
6565 SDValue Res = N->getOperand(0);
6566 if (UseSRL) {
6567 Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, SDNodeFlags::Exact);
6568 Created.push_back(Res.getNode());
6569 }
6570
6571 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6572}
6573
6575 SelectionDAG &DAG,
6576 SmallVectorImpl<SDNode *> &Created) const {
6577 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6578 if (isIntDivCheap(N->getValueType(0), Attr))
6579 return SDValue(N, 0); // Lower SDIV as SDIV
6580 return SDValue();
6581}
6582
6583SDValue
6585 SelectionDAG &DAG,
6586 SmallVectorImpl<SDNode *> &Created) const {
6587 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6588 if (isIntDivCheap(N->getValueType(0), Attr))
6589 return SDValue(N, 0); // Lower SREM as SREM
6590 return SDValue();
6591}
6592
6593/// Build sdiv by power-of-2 with conditional move instructions
6594/// Ref: "Hacker's Delight" by Henry Warren 10-1
6595/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6596/// bgez x, label
6597/// add x, x, 2**k-1
6598/// label:
6599/// sra res, x, k
6600/// neg res, res (when the divisor is negative)
6602 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6603 SmallVectorImpl<SDNode *> &Created) const {
6604 unsigned Lg2 = Divisor.countr_zero();
6605 EVT VT = N->getValueType(0);
6606
6607 SDLoc DL(N);
6608 SDValue N0 = N->getOperand(0);
6609 SDValue Zero = DAG.getConstant(0, DL, VT);
6610 APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6611 SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6612
6613 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6614 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6615 SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6616 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6617 SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6618
6619 Created.push_back(Cmp.getNode());
6620 Created.push_back(Add.getNode());
6621 Created.push_back(CMov.getNode());
6622
6623 // Divide by pow2.
6624 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, CMov,
6625 DAG.getShiftAmountConstant(Lg2, VT, DL));
6626
6627 // If we're dividing by a positive value, we're done. Otherwise, we must
6628 // negate the result.
6629 if (Divisor.isNonNegative())
6630 return SRA;
6631
6632 Created.push_back(SRA.getNode());
6633 return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6634}
6635
6636/// Given an ISD::SDIV node expressing a divide by constant,
6637/// return a DAG expression to select that will generate the same value by
6638/// multiplying by a magic number.
6639/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6641 bool IsAfterLegalization,
6642 bool IsAfterLegalTypes,
6643 SmallVectorImpl<SDNode *> &Created) const {
6644 SDLoc dl(N);
6645
6646 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6647 if (N->getFlags().hasExact())
6648 return BuildExactSDIV(*this, N, dl, DAG, Created);
6649
6650 EVT VT = N->getValueType(0);
6651 EVT SVT = VT.getScalarType();
6652 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6653 EVT ShSVT = ShVT.getScalarType();
6654 unsigned EltBits = VT.getScalarSizeInBits();
6655 EVT MulVT;
6656
6657 // Check to see if we can do this.
6658 // FIXME: We should be more aggressive here.
6659 EVT QueryVT = VT;
6660 if (VT.isVector()) {
6661 // If the vector type will be legalized to a vector type with the same
6662 // element type, allow the transform before type legalization if MULHS or
6663 // SMUL_LOHI are supported.
6664 QueryVT = getLegalTypeToTransformTo(*DAG.getContext(), VT);
6665 if (!QueryVT.isVector() ||
6667 return SDValue();
6668 } else if (!isTypeLegal(VT)) {
6669 // Limit this to simple scalars for now.
6670 if (!VT.isSimple())
6671 return SDValue();
6672
6673 // If this type will be promoted to a large enough type with a legal
6674 // multiply operation, we can go ahead and do this transform.
6676 return SDValue();
6677
6678 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6679 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6680 !isOperationLegal(ISD::MUL, MulVT))
6681 return SDValue();
6682 }
6683
6684 bool HasMULHS =
6685 isOperationLegalOrCustom(ISD::MULHS, QueryVT, IsAfterLegalization);
6686 bool HasSMUL_LOHI =
6687 isOperationLegalOrCustom(ISD::SMUL_LOHI, QueryVT, IsAfterLegalization);
6688
6689 if (isTypeLegal(VT) && !HasMULHS && !HasSMUL_LOHI && MulVT == EVT()) {
6690 // If type twice as wide legal, widen and use a mul plus a shift.
6691 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
6692 // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6693 // custom lowered. This is very expensive so avoid it at all costs for
6694 // constant divisors.
6695 if ((!IsAfterLegalTypes && isOperationExpand(ISD::SDIV, VT) &&
6698 MulVT = WideVT;
6699 }
6700
6701 if (!HasMULHS && !HasSMUL_LOHI && MulVT == EVT())
6702 return SDValue();
6703
6704 // If we're after type legalization and SVT is not legal, use the
6705 // promoted type for creating constants to avoid creating nodes with
6706 // illegal types.
6707 if (IsAfterLegalTypes && VT.isVector()) {
6708 SVT = getTypeToTransformTo(*DAG.getContext(), SVT);
6709 if (SVT.bitsLT(VT.getScalarType()))
6710 return SDValue();
6711 ShSVT = getTypeToTransformTo(*DAG.getContext(), ShSVT);
6712 if (ShSVT.bitsLT(ShVT.getScalarType()))
6713 return SDValue();
6714 }
6715 const unsigned SVTBits = SVT.getSizeInBits();
6716
6717 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6718
6719 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6720 if (C->isZero())
6721 return false;
6722 // Truncate the divisor to the target scalar type in case it was promoted
6723 // during type legalization.
6724 APInt Divisor = C->getAPIntValue().trunc(EltBits);
6726 int NumeratorFactor = 0;
6727 int ShiftMask = -1;
6728
6729 if (Divisor.isOne() || Divisor.isAllOnes()) {
6730 // If d is +1/-1, we just multiply the numerator by +1/-1.
6731 NumeratorFactor = Divisor.getSExtValue();
6732 magics.Magic = 0;
6733 magics.ShiftAmount = 0;
6734 ShiftMask = 0;
6735 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6736 // If d > 0 and m < 0, add the numerator.
6737 NumeratorFactor = 1;
6738 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6739 // If d < 0 and m > 0, subtract the numerator.
6740 NumeratorFactor = -1;
6741 }
6742
6743 MagicFactors.push_back(
6744 DAG.getConstant(magics.Magic.zext(SVTBits), dl, SVT));
6745 Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6746 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6747 ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6748 return true;
6749 };
6750
6751 SDValue N0 = N->getOperand(0);
6752 SDValue N1 = N->getOperand(1);
6753
6754 // Collect the shifts / magic values from each element.
6755 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern, /*AllowUndefs=*/false,
6756 /*AllowTruncation=*/true))
6757 return SDValue();
6758
6759 SDValue MagicFactor, Factor, Shift, ShiftMask;
6760 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6761 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6762 Factor = DAG.getBuildVector(VT, dl, Factors);
6763 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6764 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6765 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6766 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6767 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6768 "Expected matchUnaryPredicate to return one element for scalable "
6769 "vectors");
6770 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6771 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6772 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6773 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6774 } else {
6775 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6776 MagicFactor = MagicFactors[0];
6777 Factor = Factors[0];
6778 Shift = Shifts[0];
6779 ShiftMask = ShiftMasks[0];
6780 }
6781
6782 // Multiply the numerator (operand 0) by the magic value.
6783 auto GetMULHS = [&](SDValue X, SDValue Y) {
6784 if (HasMULHS)
6785 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6786 if (HasSMUL_LOHI) {
6787 SDValue LoHi =
6788 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6789 return LoHi.getValue(1);
6790 }
6791
6792 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6793 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6794 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6795 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6796 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6797 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6798 };
6799
6800 SDValue Q = GetMULHS(N0, MagicFactor);
6801 if (!Q)
6802 return SDValue();
6803
6804 Created.push_back(Q.getNode());
6805
6806 // (Optionally) Add/subtract the numerator using Factor.
6807 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6808 Created.push_back(Factor.getNode());
6809 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6810 Created.push_back(Q.getNode());
6811
6812 // Shift right algebraic by shift value.
6813 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6814 Created.push_back(Q.getNode());
6815
6816 // Extract the sign bit, mask it and add it to the quotient.
6817 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6818 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6819 Created.push_back(T.getNode());
6820 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6821 Created.push_back(T.getNode());
6822 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6823}
6824
6825/// Given an ISD::UDIV node expressing a divide by constant,
6826/// return a DAG expression to select that will generate the same value by
6827/// multiplying by a magic number.
6828/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6830 bool IsAfterLegalization,
6831 bool IsAfterLegalTypes,
6832 SmallVectorImpl<SDNode *> &Created) const {
6833 SDLoc dl(N);
6834
6835 // If the udiv has an 'exact' bit we can use a simpler lowering.
6836 if (N->getFlags().hasExact())
6837 return BuildExactUDIV(*this, N, dl, DAG, Created);
6838
6839 EVT VT = N->getValueType(0);
6840 EVT SVT = VT.getScalarType();
6841 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6842 EVT ShSVT = ShVT.getScalarType();
6843 unsigned EltBits = VT.getScalarSizeInBits();
6844 EVT MulVT;
6845
6846 // Check to see if we can do this.
6847 // FIXME: We should be more aggressive here.
6848 EVT QueryVT = VT;
6849 if (VT.isVector()) {
6850 // If the vector type will be legalized to a vector type with the same
6851 // element type, allow the transform before type legalization if MULHU or
6852 // UMUL_LOHI are supported.
6853 QueryVT = getLegalTypeToTransformTo(*DAG.getContext(), VT);
6854 if (!QueryVT.isVector() ||
6856 return SDValue();
6857 } else if (!isTypeLegal(VT)) {
6858 // Limit this to simple scalars for now.
6859 if (!VT.isSimple())
6860 return SDValue();
6861
6862 // If this type will be promoted to a large enough type with a legal
6863 // multiply operation, we can go ahead and do this transform.
6865 return SDValue();
6866
6867 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6868 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6869 !isOperationLegal(ISD::MUL, MulVT))
6870 return SDValue();
6871 }
6872
6873 bool HasMULHU =
6874 isOperationLegalOrCustom(ISD::MULHU, QueryVT, IsAfterLegalization);
6875 bool HasUMUL_LOHI =
6876 isOperationLegalOrCustom(ISD::UMUL_LOHI, QueryVT, IsAfterLegalization);
6877
6878 if (isTypeLegal(VT) && !HasMULHU && !HasUMUL_LOHI && MulVT == EVT()) {
6879 // If type twice as wide legal, widen and use a mul plus a shift.
6880 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
6881 // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6882 // custom lowered. This is very expensive so avoid it at all costs for
6883 // constant divisors.
6884 if ((!IsAfterLegalTypes && isOperationExpand(ISD::UDIV, VT) &&
6887 MulVT = WideVT;
6888 }
6889
6890 if (!HasMULHU && !HasUMUL_LOHI && MulVT == EVT())
6891 return SDValue();
6892
6893 SDValue N0 = N->getOperand(0);
6894 SDValue N1 = N->getOperand(1);
6895
6896 // Try to use leading zeros of the dividend to reduce the multiplier and
6897 // avoid expensive fixups.
6898 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6899
6900 // If we're after type legalization and SVT is not legal, use the
6901 // promoted type for creating constants to avoid creating nodes with
6902 // illegal types.
6903 if (IsAfterLegalTypes && VT.isVector()) {
6904 SVT = getTypeToTransformTo(*DAG.getContext(), SVT);
6905 if (SVT.bitsLT(VT.getScalarType()))
6906 return SDValue();
6907 ShSVT = getTypeToTransformTo(*DAG.getContext(), ShSVT);
6908 if (ShSVT.bitsLT(ShVT.getScalarType()))
6909 return SDValue();
6910 }
6911 const unsigned SVTBits = SVT.getSizeInBits();
6912
6913 // Allow i32 to be widened to i64 for uncooperative divisors if i64 MULHU or
6914 // UMUL_LOHI is supported.
6915 const EVT WideSVT = MVT::i64;
6916 const bool HasWideMULHU =
6917 VT == MVT::i32 &&
6918 isOperationLegalOrCustom(ISD::MULHU, WideSVT, IsAfterLegalization);
6919 const bool HasWideUMUL_LOHI =
6920 VT == MVT::i32 &&
6921 isOperationLegalOrCustom(ISD::UMUL_LOHI, WideSVT, IsAfterLegalization);
6922 const bool AllowWiden = (HasWideMULHU || HasWideUMUL_LOHI);
6923
6924 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6925 bool UseWiden = false;
6926 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6927
6928 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6929 if (C->isZero())
6930 return false;
6931 // Truncate the divisor to the target scalar type in case it was promoted
6932 // during type legalization.
6933 APInt Divisor = C->getAPIntValue().trunc(EltBits);
6934
6935 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6936
6937 // Magic algorithm doesn't work for division by 1. We need to emit a select
6938 // at the end.
6939 if (Divisor.isOne()) {
6940 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6941 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6942 } else {
6945 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()),
6946 /*AllowEvenDivisorOptimization=*/true,
6947 /*AllowWidenOptimization=*/AllowWiden);
6948
6949 if (magics.Widen) {
6950 UseWiden = true;
6951 MagicFactor = DAG.getConstant(magics.Magic, dl, WideSVT);
6952 } else {
6953 MagicFactor = DAG.getConstant(magics.Magic.zext(SVTBits), dl, SVT);
6954 }
6955
6956 assert(magics.PreShift < Divisor.getBitWidth() &&
6957 "We shouldn't generate an undefined shift!");
6958 assert(magics.PostShift < Divisor.getBitWidth() &&
6959 "We shouldn't generate an undefined shift!");
6960 assert((!magics.IsAdd || magics.PreShift == 0) &&
6961 "Unexpected pre-shift");
6962 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6963 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6964 NPQFactor = DAG.getConstant(
6965 magics.IsAdd ? APInt::getOneBitSet(SVTBits, EltBits - 1)
6966 : APInt::getZero(SVTBits),
6967 dl, SVT);
6968 UseNPQ |= magics.IsAdd;
6969 UsePreShift |= magics.PreShift != 0;
6970 UsePostShift |= magics.PostShift != 0;
6971 }
6972
6973 PreShifts.push_back(PreShift);
6974 MagicFactors.push_back(MagicFactor);
6975 NPQFactors.push_back(NPQFactor);
6976 PostShifts.push_back(PostShift);
6977 return true;
6978 };
6979
6980 // Collect the shifts/magic values from each element.
6981 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern, /*AllowUndefs=*/false,
6982 /*AllowTruncation=*/true))
6983 return SDValue();
6984
6985 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6986 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6987 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6988 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6989 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6990 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6991 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6992 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6993 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6994 "Expected matchUnaryPredicate to return one for scalable vectors");
6995 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6996 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6997 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6998 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6999 } else {
7000 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
7001 PreShift = PreShifts[0];
7002 MagicFactor = MagicFactors[0];
7003 PostShift = PostShifts[0];
7004 }
7005
7006 if (UseWiden) {
7007 // Compute: (WideSVT(x) * MagicFactor) >> WideSVTBits.
7008 SDValue WideN0 = DAG.getNode(ISD::ZERO_EXTEND, dl, WideSVT, N0);
7009
7010 // Perform WideSVTxWideSVT -> 2*WideSVT multiplication and extract high
7011 // WideSVT bits
7012 SDValue High;
7013 if (HasWideMULHU) {
7014 High = DAG.getNode(ISD::MULHU, dl, WideSVT, WideN0, MagicFactor);
7015 } else {
7016 assert(HasWideUMUL_LOHI);
7017 SDValue LoHi =
7018 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(WideSVT, WideSVT),
7019 WideN0, MagicFactor);
7020 High = LoHi.getValue(1);
7021 }
7022
7023 Created.push_back(High.getNode());
7024 return DAG.getNode(ISD::TRUNCATE, dl, VT, High);
7025 }
7026
7027 SDValue Q = N0;
7028 if (UsePreShift) {
7029 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
7030 Created.push_back(Q.getNode());
7031 }
7032
7033 auto GetMULHU = [&](SDValue X, SDValue Y) {
7034 if (HasMULHU)
7035 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
7036 if (HasUMUL_LOHI) {
7037 SDValue LoHi =
7038 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
7039 return LoHi.getValue(1);
7040 }
7041
7042 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
7043 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
7044 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
7045 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
7046 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
7047 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
7048 };
7049
7050 // Multiply the numerator (operand 0) by the magic value.
7051 Q = GetMULHU(Q, MagicFactor);
7052 if (!Q)
7053 return SDValue();
7054
7055 Created.push_back(Q.getNode());
7056
7057 if (UseNPQ) {
7058 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
7059 Created.push_back(NPQ.getNode());
7060
7061 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
7062 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
7063 if (VT.isVector())
7064 NPQ = GetMULHU(NPQ, NPQFactor);
7065 else
7066 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
7067
7068 Created.push_back(NPQ.getNode());
7069
7070 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
7071 Created.push_back(Q.getNode());
7072 }
7073
7074 if (UsePostShift) {
7075 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
7076 Created.push_back(Q.getNode());
7077 }
7078
7079 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7080
7081 SDValue One = DAG.getConstant(1, dl, VT);
7082 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
7083 return DAG.getSelect(dl, VT, IsOne, N0, Q);
7084}
7085
7086/// If all values in Values that *don't* match the predicate are same 'splat'
7087/// value, then replace all values with that splat value.
7088/// Else, if AlternativeReplacement was provided, then replace all values that
7089/// do match predicate with AlternativeReplacement value.
7090static void
7092 std::function<bool(SDValue)> Predicate,
7093 SDValue AlternativeReplacement = SDValue()) {
7094 SDValue Replacement;
7095 // Is there a value for which the Predicate does *NOT* match? What is it?
7096 auto SplatValue = llvm::find_if_not(Values, Predicate);
7097 if (SplatValue != Values.end()) {
7098 // Does Values consist only of SplatValue's and values matching Predicate?
7099 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
7100 return Value == *SplatValue || Predicate(Value);
7101 })) // Then we shall replace values matching predicate with SplatValue.
7102 Replacement = *SplatValue;
7103 }
7104 if (!Replacement) {
7105 // Oops, we did not find the "baseline" splat value.
7106 if (!AlternativeReplacement)
7107 return; // Nothing to do.
7108 // Let's replace with provided value then.
7109 Replacement = AlternativeReplacement;
7110 }
7111 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
7112}
7113
7114/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
7115/// where the divisor and comparison target are constants,
7116/// return a DAG expression that will generate the same comparison result
7117/// using only multiplications, additions and shifts/rotations.
7118/// Ref: "Hacker's Delight" 10-17.
7119SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
7120 SDValue CompTargetNode,
7122 DAGCombinerInfo &DCI,
7123 const SDLoc &DL) const {
7125 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7126 DCI, DL, Built)) {
7127 for (SDNode *N : Built)
7128 DCI.AddToWorklist(N);
7129 return Folded;
7130 }
7131
7132 return SDValue();
7133}
7134
7135SDValue
7136TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
7137 SDValue CompTargetNode, ISD::CondCode Cond,
7138 DAGCombinerInfo &DCI, const SDLoc &DL,
7139 SmallVectorImpl<SDNode *> &Created) const {
7140 // fold (seteq/ne (urem N, D), C) ->
7141 // (setule/ugt (rotr (mul (sub N, C), P), K), Q)
7142 // - D must be constant, with D = D0 * 2^K where D0 is odd
7143 // - P is the multiplicative inverse of D0 modulo 2^W
7144 // - Q = floor(((2^W) - 1) / D)
7145 // where W is the width of the common type of N and D.
7146 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7147 "Only applicable for (in)equality comparisons.");
7148
7149 SelectionDAG &DAG = DCI.DAG;
7150
7151 EVT VT = REMNode.getValueType();
7152 EVT SVT = VT.getScalarType();
7153 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7154 EVT ShSVT = ShVT.getScalarType();
7155
7156 // If MUL is unavailable, we cannot proceed in any case.
7157 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7158 return SDValue();
7159
7160 bool ComparingWithAllZeros = true;
7161 bool AllComparisonsWithNonZerosAreTautological = true;
7162 bool HadTautologicalLanes = false;
7163 bool AllLanesAreTautological = true;
7164 bool HadEvenDivisor = false;
7165 bool AllDivisorsArePowerOfTwo = true;
7166 bool HadTautologicalInvertedLanes = false;
7167 SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
7168
7169 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
7170 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7171 if (CDiv->isZero())
7172 return false;
7173
7174 const APInt &D = CDiv->getAPIntValue();
7175 const APInt &Cmp = CCmp->getAPIntValue();
7176
7177 ComparingWithAllZeros &= Cmp.isZero();
7178
7179 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7180 // if C2 is not less than C1, the comparison is always false.
7181 // But we will only be able to produce the comparison that will give the
7182 // opposive tautological answer. So this lane would need to be fixed up.
7183 bool TautologicalInvertedLane = D.ule(Cmp);
7184 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
7185
7186 // If all lanes are tautological (either all divisors are ones, or divisor
7187 // is not greater than the constant we are comparing with),
7188 // we will prefer to avoid the fold.
7189 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
7190 HadTautologicalLanes |= TautologicalLane;
7191 AllLanesAreTautological &= TautologicalLane;
7192
7193 // If we are comparing with non-zero, we need'll need to subtract said
7194 // comparison value from the LHS. But there is no point in doing that if
7195 // every lane where we are comparing with non-zero is tautological..
7196 if (!Cmp.isZero())
7197 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
7198
7199 // Decompose D into D0 * 2^K
7200 unsigned K = D.countr_zero();
7201 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7202 APInt D0 = D.lshr(K);
7203
7204 // D is even if it has trailing zeros.
7205 HadEvenDivisor |= (K != 0);
7206 // D is a power-of-two if D0 is one.
7207 // If all divisors are power-of-two, we will prefer to avoid the fold.
7208 AllDivisorsArePowerOfTwo &= D0.isOne();
7209
7210 // P = inv(D0, 2^W)
7211 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7212 unsigned W = D.getBitWidth();
7213 APInt P = D0.multiplicativeInverse();
7214 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7215
7216 // Q = floor((2^W - 1) u/ D)
7217 // R = ((2^W - 1) u% D)
7218 APInt Q, R;
7220
7221 // If we are comparing with zero, then that comparison constant is okay,
7222 // else it may need to be one less than that.
7223 if (Cmp.ugt(R))
7224 Q -= 1;
7225
7227 "We are expecting that K is always less than all-ones for ShSVT");
7228
7229 // If the lane is tautological the result can be constant-folded.
7230 if (TautologicalLane) {
7231 // Set P and K amount to a bogus values so we can try to splat them.
7232 P = 0;
7233 KAmts.push_back(DAG.getAllOnesConstant(DL, ShSVT));
7234 // And ensure that comparison constant is tautological,
7235 // it will always compare true/false.
7236 Q.setAllBits();
7237 } else {
7238 KAmts.push_back(DAG.getConstant(K, DL, ShSVT));
7239 }
7240
7241 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7242 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7243 return true;
7244 };
7245
7246 SDValue N = REMNode.getOperand(0);
7247 SDValue D = REMNode.getOperand(1);
7248
7249 // Collect the values from each element.
7250 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
7251 return SDValue();
7252
7253 // If all lanes are tautological, the result can be constant-folded.
7254 if (AllLanesAreTautological)
7255 return SDValue();
7256
7257 // If this is a urem by a powers-of-two, avoid the fold since it can be
7258 // best implemented as a bit test.
7259 if (AllDivisorsArePowerOfTwo)
7260 return SDValue();
7261
7262 SDValue PVal, KVal, QVal;
7263 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7264 if (HadTautologicalLanes) {
7265 // Try to turn PAmts into a splat, since we don't care about the values
7266 // that are currently '0'. If we can't, just keep '0'`s.
7268 // Try to turn KAmts into a splat, since we don't care about the values
7269 // that are currently '-1'. If we can't, change them to '0'`s.
7271 DAG.getConstant(0, DL, ShSVT));
7272 }
7273
7274 PVal = DAG.getBuildVector(VT, DL, PAmts);
7275 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7276 QVal = DAG.getBuildVector(VT, DL, QAmts);
7277 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7278 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
7279 "Expected matchBinaryPredicate to return one element for "
7280 "SPLAT_VECTORs");
7281 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7282 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7283 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7284 } else {
7285 PVal = PAmts[0];
7286 KVal = KAmts[0];
7287 QVal = QAmts[0];
7288 }
7289
7290 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
7291 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
7292 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
7293 assert(CompTargetNode.getValueType() == N.getValueType() &&
7294 "Expecting that the types on LHS and RHS of comparisons match.");
7295 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
7296 }
7297
7298 // (mul N, P)
7299 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7300 Created.push_back(Op0.getNode());
7301
7302 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7303 // divisors as a performance improvement, since rotating by 0 is a no-op.
7304 if (HadEvenDivisor) {
7305 // We need ROTR to do this.
7306 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7307 return SDValue();
7308 // UREM: (rotr (mul N, P), K)
7309 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7310 Created.push_back(Op0.getNode());
7311 }
7312
7313 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
7314 SDValue NewCC =
7315 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7317 if (!HadTautologicalInvertedLanes)
7318 return NewCC;
7319
7320 // If any lanes previously compared always-false, the NewCC will give
7321 // always-true result for them, so we need to fixup those lanes.
7322 // Or the other way around for inequality predicate.
7323 assert(VT.isVector() && "Can/should only get here for vectors.");
7324 Created.push_back(NewCC.getNode());
7325
7326 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7327 // if C2 is not less than C1, the comparison is always false.
7328 // But we have produced the comparison that will give the
7329 // opposive tautological answer. So these lanes would need to be fixed up.
7330 SDValue TautologicalInvertedChannels =
7331 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
7332 Created.push_back(TautologicalInvertedChannels.getNode());
7333
7334 // NOTE: we avoid letting illegal types through even if we're before legalize
7335 // ops – legalization has a hard time producing good code for this.
7336 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
7337 // If we have a vector select, let's replace the comparison results in the
7338 // affected lanes with the correct tautological result.
7339 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
7340 DL, SETCCVT, SETCCVT);
7341 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
7342 Replacement, NewCC);
7343 }
7344
7345 // Else, we can just invert the comparison result in the appropriate lanes.
7346 //
7347 // NOTE: see the note above VSELECT above.
7348 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
7349 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
7350 TautologicalInvertedChannels);
7351
7352 return SDValue(); // Don't know how to lower.
7353}
7354
7355/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
7356/// where the divisor is constant and the comparison target is zero,
7357/// return a DAG expression that will generate the same comparison result
7358/// using only multiplications, additions and shifts/rotations.
7359/// Ref: "Hacker's Delight" 10-17.
7360SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
7361 SDValue CompTargetNode,
7363 DAGCombinerInfo &DCI,
7364 const SDLoc &DL) const {
7366 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7367 DCI, DL, Built)) {
7368 assert(Built.size() <= 7 && "Max size prediction failed.");
7369 for (SDNode *N : Built)
7370 DCI.AddToWorklist(N);
7371 return Folded;
7372 }
7373
7374 return SDValue();
7375}
7376
7377SDValue
7378TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
7379 SDValue CompTargetNode, ISD::CondCode Cond,
7380 DAGCombinerInfo &DCI, const SDLoc &DL,
7381 SmallVectorImpl<SDNode *> &Created) const {
7382 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
7383 // Fold:
7384 // (seteq/ne (srem N, D), 0)
7385 // To:
7386 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
7387 //
7388 // - D must be constant, with D = D0 * 2^K where D0 is odd
7389 // - P is the multiplicative inverse of D0 modulo 2^W
7390 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7391 // - Q = floor((2 * A) / (2^K))
7392 // where W is the width of the common type of N and D.
7393 //
7394 // When D is a power of two (and thus D0 is 1), the normal
7395 // formula for A and Q don't apply, because the derivation
7396 // depends on D not dividing 2^(W-1), and thus theorem ZRS
7397 // does not apply. This specifically fails when N = INT_MIN.
7398 //
7399 // Instead, for power-of-two D, we use:
7400 // - A = 0
7401 // | -> No offset needed. We're effectively treating it the same as urem.
7402 // - Q = 2^(W-K) - 1
7403 // |-> Test that the top K bits are zero after rotation
7404 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7405 "Only applicable for (in)equality comparisons.");
7406
7407 SelectionDAG &DAG = DCI.DAG;
7408
7409 EVT VT = REMNode.getValueType();
7410 EVT SVT = VT.getScalarType();
7411 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7412 EVT ShSVT = ShVT.getScalarType();
7413
7414 // If we are after ops legalization, and MUL is unavailable, we can not
7415 // proceed.
7416 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7417 return SDValue();
7418
7419 // TODO: Could support comparing with non-zero too.
7420 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
7421 if (!CompTarget || !CompTarget->isZero())
7422 return SDValue();
7423
7424 bool HadOneDivisor = false;
7425 bool AllDivisorsAreOnes = true;
7426 bool HadEvenDivisor = false;
7427 bool AllDivisorsArePowerOfTwo = true;
7428 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7429
7430 auto BuildSREMPattern = [&](ConstantSDNode *C) {
7431 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7432 if (C->isZero())
7433 return false;
7434
7435 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7436
7437 // WARNING: this fold is only valid for positive divisors!
7438 // `rem %X, -C` is equivalent to `rem %X, C`
7439 APInt D = C->getAPIntValue().abs();
7440
7441 // If all divisors are ones, we will prefer to avoid the fold.
7442 HadOneDivisor |= D.isOne();
7443 AllDivisorsAreOnes &= D.isOne();
7444
7445 // Decompose D into D0 * 2^K
7446 unsigned K = D.countr_zero();
7447 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7448 APInt D0 = D.lshr(K);
7449
7450 // D is even if it has trailing zeros.
7451 HadEvenDivisor |= (K != 0);
7452
7453 // D is a power-of-two if D0 is one. This includes INT_MIN.
7454 // If all divisors are power-of-two, we will prefer to avoid the fold.
7455 AllDivisorsArePowerOfTwo &= D0.isOne();
7456
7457 // P = inv(D0, 2^W)
7458 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7459 unsigned W = D.getBitWidth();
7460 APInt P = D0.multiplicativeInverse();
7461 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7462
7463 // A = floor((2^(W - 1) - 1) / D0) & -2^K
7464 APInt A = APInt::getSignedMaxValue(W).udiv(D0);
7465 A.clearLowBits(K);
7466
7467 // Q = floor((2 * A) / (2^K))
7468 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7469
7471 "We are expecting that A is always less than all-ones for SVT");
7473 "We are expecting that K is always less than all-ones for ShSVT");
7474
7475 // If D was a power of two, apply the alternate constant derivation.
7476 if (D0.isOne()) {
7477 // A = 0
7478 A = APInt(W, 0);
7479 // - Q = 2^(W-K) - 1
7480 Q = APInt::getLowBitsSet(W, W - K);
7481 }
7482
7483 // If the divisor is 1 the result can be constant-folded.
7484 if (D.isOne()) {
7485 // Set P, A and K to a bogus values so we can try to splat them.
7486 P = 0;
7487 A.setAllBits();
7488 KAmts.push_back(DAG.getAllOnesConstant(DL, ShSVT));
7489
7490 // x ?% 1 == 0 <--> true <--> x u<= -1
7491 Q.setAllBits();
7492 } else {
7493 KAmts.push_back(DAG.getConstant(K, DL, ShSVT));
7494 }
7495
7496 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7497 AAmts.push_back(DAG.getConstant(A, DL, SVT));
7498 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7499 return true;
7500 };
7501
7502 SDValue N = REMNode.getOperand(0);
7503 SDValue D = REMNode.getOperand(1);
7504
7505 // Collect the values from each element.
7506 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7507 return SDValue();
7508
7509 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7510 if (AllDivisorsAreOnes)
7511 return SDValue();
7512
7513 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7514 // since it can be best implemented as a bit test.
7515 if (AllDivisorsArePowerOfTwo)
7516 return SDValue();
7517
7518 SDValue PVal, AVal, KVal, QVal;
7519 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7520 if (HadOneDivisor) {
7521 // Try to turn PAmts into a splat, since we don't care about the values
7522 // that are currently '0'. If we can't, just keep '0'`s.
7524 // Try to turn AAmts into a splat, since we don't care about the
7525 // values that are currently '-1'. If we can't, change them to '0'`s.
7527 DAG.getConstant(0, DL, SVT));
7528 // Try to turn KAmts into a splat, since we don't care about the values
7529 // that are currently '-1'. If we can't, change them to '0'`s.
7531 DAG.getConstant(0, DL, ShSVT));
7532 }
7533
7534 PVal = DAG.getBuildVector(VT, DL, PAmts);
7535 AVal = DAG.getBuildVector(VT, DL, AAmts);
7536 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7537 QVal = DAG.getBuildVector(VT, DL, QAmts);
7538 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7539 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7540 QAmts.size() == 1 &&
7541 "Expected matchUnaryPredicate to return one element for scalable "
7542 "vectors");
7543 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7544 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7545 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7546 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7547 } else {
7548 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7549 PVal = PAmts[0];
7550 AVal = AAmts[0];
7551 KVal = KAmts[0];
7552 QVal = QAmts[0];
7553 }
7554
7555 // (mul N, P)
7556 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7557 Created.push_back(Op0.getNode());
7558
7559 // We need ADD to do this.
7560 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7561 return SDValue();
7562
7563 // (add (mul N, P), A)
7564 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7565 Created.push_back(Op0.getNode());
7566
7567 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7568 // divisors as a performance improvement, since rotating by 0 is a no-op.
7569 if (HadEvenDivisor) {
7570 // We need ROTR to do this.
7571 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7572 return SDValue();
7573 // SREM: (rotr (add (mul N, P), A), K)
7574 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7575 Created.push_back(Op0.getNode());
7576 }
7577
7578 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7579 return DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7581}
7582
7584 const DenormalMode &Mode,
7585 SDNodeFlags Flags) const {
7586 SDLoc DL(Op);
7587 EVT VT = Op.getValueType();
7588 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7589 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7590
7591 // This is specifically a check for the handling of denormal inputs, not the
7592 // result.
7593 if (Mode.Input == DenormalMode::PreserveSign ||
7594 Mode.Input == DenormalMode::PositiveZero) {
7595 // Test = X == 0.0
7596 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ, /*Chain=*/{},
7597 /*Signaling=*/false, Flags);
7598 }
7599
7600 // Testing it with denormal inputs to avoid wrong estimate.
7601 //
7602 // Test = fabs(X) < SmallestNormal
7603 const fltSemantics &FltSem = VT.getFltSemantics();
7604 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7605 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7606 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op, Flags);
7607 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT, /*Chain=*/{},
7608 /*Signaling=*/false, Flags);
7609}
7610
7612 bool LegalOps, bool OptForSize,
7614 unsigned Depth) const {
7615 // fneg is removable even if it has multiple uses.
7616 if (Op.getOpcode() == ISD::FNEG) {
7618 return Op.getOperand(0);
7619 }
7620
7621 // Don't recurse exponentially.
7623 return SDValue();
7624
7625 // Pre-increment recursion depth for use in recursive calls.
7626 ++Depth;
7627 const SDNodeFlags Flags = Op->getFlags();
7628 EVT VT = Op.getValueType();
7629 unsigned Opcode = Op.getOpcode();
7630
7631 // Don't allow anything with multiple uses unless we know it is free.
7632 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7633 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7634 isFPExtFree(VT, Op.getOperand(0).getValueType());
7635 if (!IsFreeExtend)
7636 return SDValue();
7637 }
7638
7639 auto RemoveDeadNode = [&](SDValue N) {
7640 if (N && N.getNode()->use_empty())
7641 DAG.RemoveDeadNode(N.getNode());
7642 };
7643
7644 SDLoc DL(Op);
7645
7646 // Because getNegatedExpression can delete nodes we need a handle to keep
7647 // temporary nodes alive in case the recursion manages to create an identical
7648 // node.
7649 std::list<HandleSDNode> Handles;
7650
7651 switch (Opcode) {
7652 case ISD::ConstantFP: {
7653 // Don't invert constant FP values after legalization unless the target says
7654 // the negated constant is legal.
7655 bool IsOpLegal =
7657 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7658 OptForSize);
7659
7660 if (LegalOps && !IsOpLegal)
7661 break;
7662
7663 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7664 V.changeSign();
7665 SDValue CFP = DAG.getConstantFP(V, DL, VT);
7666
7667 // If we already have the use of the negated floating constant, it is free
7668 // to negate it even it has multiple uses.
7669 if (!Op.hasOneUse() && CFP.use_empty())
7670 break;
7672 return CFP;
7673 }
7674 case ISD::SPLAT_VECTOR: {
7675 // fold splat_vector(fneg(X)) -> splat_vector(-X)
7676 SDValue X = Op.getOperand(0);
7678 break;
7679
7680 SDValue NegX = getCheaperNegatedExpression(X, DAG, LegalOps, OptForSize);
7681 if (!NegX)
7682 break;
7684 return DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, NegX);
7685 }
7686 case ISD::BUILD_VECTOR: {
7687 // Only permit BUILD_VECTOR of constants.
7688 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7689 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7690 }))
7691 break;
7692
7693 bool IsOpLegal =
7696 llvm::all_of(Op->op_values(), [&](SDValue N) {
7697 return N.isUndef() ||
7698 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7699 OptForSize);
7700 });
7701
7702 if (LegalOps && !IsOpLegal)
7703 break;
7704
7706 for (SDValue C : Op->op_values()) {
7707 if (C.isUndef()) {
7708 Ops.push_back(C);
7709 continue;
7710 }
7711 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7712 V.changeSign();
7713 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7714 }
7716 return DAG.getBuildVector(VT, DL, Ops);
7717 }
7718 case ISD::FADD: {
7719 if (!Flags.hasNoSignedZeros())
7720 break;
7721
7722 // After operation legalization, it might not be legal to create new FSUBs.
7723 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7724 break;
7725 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7726
7727 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7729 SDValue NegX =
7730 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7731 // Prevent this node from being deleted by the next call.
7732 if (NegX)
7733 Handles.emplace_back(NegX);
7734
7735 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7737 SDValue NegY =
7738 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7739
7740 // We're done with the handles.
7741 Handles.clear();
7742
7743 // Negate the X if its cost is less or equal than Y.
7744 if (NegX && (CostX <= CostY)) {
7745 Cost = CostX;
7746 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7747 if (NegY != N)
7748 RemoveDeadNode(NegY);
7749 return N;
7750 }
7751
7752 // Negate the Y if it is not expensive.
7753 if (NegY) {
7754 Cost = CostY;
7755 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7756 if (NegX != N)
7757 RemoveDeadNode(NegX);
7758 return N;
7759 }
7760 break;
7761 }
7762 case ISD::FSUB: {
7763 // We can't turn -(A-B) into B-A when we honor signed zeros.
7764 if (!Flags.hasNoSignedZeros())
7765 break;
7766
7767 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7768 // fold (fneg (fsub 0, Y)) -> Y
7769 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7770 if (C->isZero()) {
7772 return Y;
7773 }
7774
7775 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7777 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7778 }
7779 case ISD::FMUL:
7780 case ISD::FDIV: {
7781 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7782
7783 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7785 SDValue NegX =
7786 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7787 // Prevent this node from being deleted by the next call.
7788 if (NegX)
7789 Handles.emplace_back(NegX);
7790
7791 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7793 SDValue NegY =
7794 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7795
7796 // We're done with the handles.
7797 Handles.clear();
7798
7799 // Negate the X if its cost is less or equal than Y.
7800 if (NegX && (CostX <= CostY)) {
7801 Cost = CostX;
7802 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7803 if (NegY != N)
7804 RemoveDeadNode(NegY);
7805 return N;
7806 }
7807
7808 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7809 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7810 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7811 break;
7812
7813 // Negate the Y if it is not expensive.
7814 if (NegY) {
7815 Cost = CostY;
7816 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7817 if (NegX != N)
7818 RemoveDeadNode(NegX);
7819 return N;
7820 }
7821 break;
7822 }
7823 case ISD::FMA:
7824 case ISD::FMULADD:
7825 case ISD::FMAD: {
7826 if (!Flags.hasNoSignedZeros())
7827 break;
7828
7829 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7831 SDValue NegZ =
7832 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7833 // Give up if fail to negate the Z.
7834 if (!NegZ)
7835 break;
7836
7837 // Prevent this node from being deleted by the next two calls.
7838 Handles.emplace_back(NegZ);
7839
7840 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7842 SDValue NegX =
7843 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7844 // Prevent this node from being deleted by the next call.
7845 if (NegX)
7846 Handles.emplace_back(NegX);
7847
7848 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7850 SDValue NegY =
7851 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7852
7853 // We're done with the handles.
7854 Handles.clear();
7855
7856 // Negate the X if its cost is less or equal than Y.
7857 if (NegX && (CostX <= CostY)) {
7858 Cost = std::min(CostX, CostZ);
7859 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7860 if (NegY != N)
7861 RemoveDeadNode(NegY);
7862 return N;
7863 }
7864
7865 // Negate the Y if it is not expensive.
7866 if (NegY) {
7867 Cost = std::min(CostY, CostZ);
7868 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7869 if (NegX != N)
7870 RemoveDeadNode(NegX);
7871 return N;
7872 }
7873 break;
7874 }
7875
7876 case ISD::FP_EXTEND:
7877 case ISD::FSIN:
7878 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7879 OptForSize, Cost, Depth))
7880 return DAG.getNode(Opcode, DL, VT, NegV);
7881 break;
7882 case ISD::FP_ROUND:
7883 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7884 OptForSize, Cost, Depth))
7885 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7886 break;
7887 case ISD::SELECT:
7888 case ISD::VSELECT: {
7889 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7890 // iff at least one cost is cheaper and the other is neutral/cheaper
7891 SDValue LHS = Op.getOperand(1);
7893 SDValue NegLHS =
7894 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7895 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7896 RemoveDeadNode(NegLHS);
7897 break;
7898 }
7899
7900 // Prevent this node from being deleted by the next call.
7901 Handles.emplace_back(NegLHS);
7902
7903 SDValue RHS = Op.getOperand(2);
7905 SDValue NegRHS =
7906 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7907
7908 // We're done with the handles.
7909 Handles.clear();
7910
7911 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7912 (CostLHS != NegatibleCost::Cheaper &&
7913 CostRHS != NegatibleCost::Cheaper)) {
7914 RemoveDeadNode(NegLHS);
7915 RemoveDeadNode(NegRHS);
7916 break;
7917 }
7918
7919 Cost = std::min(CostLHS, CostRHS);
7920 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7921 }
7922 }
7923
7924 return SDValue();
7925}
7926
7927//===----------------------------------------------------------------------===//
7928// Legalization Utilities
7929//===----------------------------------------------------------------------===//
7930
7931bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7932 SDValue LHS, SDValue RHS,
7934 EVT HiLoVT, SelectionDAG &DAG,
7935 MulExpansionKind Kind, SDValue LL,
7936 SDValue LH, SDValue RL, SDValue RH) const {
7937 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7938 Opcode == ISD::SMUL_LOHI);
7939
7940 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7942 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7944 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7946 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7948
7949 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7950 return false;
7951
7952 unsigned OuterBitSize = VT.getScalarSizeInBits();
7953 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7954
7955 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7956 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7957 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7958
7959 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7960 bool Signed) -> bool {
7961 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7962 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7963 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7964 Hi = Lo.getValue(1);
7965 return true;
7966 }
7967 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7968 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7969 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7970 return true;
7971 }
7972 return false;
7973 };
7974
7975 SDValue Lo, Hi;
7976
7977 if (!LL.getNode() && !RL.getNode() &&
7979 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7980 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7981 }
7982
7983 if (!LL.getNode())
7984 return false;
7985
7986 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7987 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7988 DAG.MaskedValueIsZero(RHS, HighMask)) {
7989 // The inputs are both zero-extended.
7990 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7991 Result.push_back(Lo);
7992 Result.push_back(Hi);
7993 if (Opcode != ISD::MUL) {
7994 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7995 Result.push_back(Zero);
7996 Result.push_back(Zero);
7997 }
7998 return true;
7999 }
8000 }
8001
8002 if (!VT.isVector() && Opcode == ISD::MUL &&
8003 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
8004 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
8005 // The input values are both sign-extended.
8006 // TODO non-MUL case?
8007 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
8008 Result.push_back(Lo);
8009 Result.push_back(Hi);
8010 return true;
8011 }
8012 }
8013
8014 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
8015 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
8016
8017 if (!LH.getNode() && !RH.getNode() &&
8020 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
8021 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
8022 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
8023 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
8024 }
8025
8026 if (!LH.getNode())
8027 return false;
8028
8029 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
8030 return false;
8031
8032 Result.push_back(Lo);
8033
8034 if (Opcode == ISD::MUL) {
8035 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
8036 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
8037 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
8038 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
8039 Result.push_back(Hi);
8040 return true;
8041 }
8042
8043 // Compute the full width result.
8044 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
8045 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
8046 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
8047 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
8048 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
8049 };
8050
8051 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
8052 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
8053 return false;
8054
8055 // This is effectively the add part of a multiply-add of half-sized operands,
8056 // so it cannot overflow.
8057 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
8058
8059 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
8060 return false;
8061
8062 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
8063 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8064
8065 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
8067 if (UseGlue)
8068 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
8069 Merge(Lo, Hi));
8070 else
8071 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
8072 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
8073
8074 SDValue Carry = Next.getValue(1);
8075 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8076 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
8077
8078 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
8079 return false;
8080
8081 if (UseGlue)
8082 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
8083 Carry);
8084 else
8085 Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
8086 Zero, Carry);
8087
8088 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
8089
8090 if (Opcode == ISD::SMUL_LOHI) {
8091 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
8092 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
8093 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
8094
8095 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
8096 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
8097 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
8098 }
8099
8100 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8101 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
8102 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8103 return true;
8104}
8105
8107 SelectionDAG &DAG, MulExpansionKind Kind,
8108 SDValue LL, SDValue LH, SDValue RL,
8109 SDValue RH) const {
8111 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
8112 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
8113 DAG, Kind, LL, LH, RL, RH);
8114 if (Ok) {
8115 assert(Result.size() == 2);
8116 Lo = Result[0];
8117 Hi = Result[1];
8118 }
8119 return Ok;
8120}
8121
8122// Optimize unsigned division or remainder by constants for types twice as large
8123// as a legal VT.
8124//
8125// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
8126// can be computed
8127// as:
8128// Sum = __builtin_uadd_overflow(Lo, High, &Sum);
8129// Remainder = Sum % Constant;
8130//
8131// If (1 << (BitWidth / 2)) % Constant != 1, we can search for a smaller value
8132// W such that W != (BitWidth / 2) and (1 << W) % Constant == 1. We can break
8133// High:Low into 3 chunks of W bits and compute remainder as
8134// Sum = Chunk0 + Chunk1 + Chunk2;
8135// Remainder = Sum % Constant;
8136//
8137// This is based on "Remainder by Summing Digits" from Hacker's Delight.
8138//
8139// For division, we can compute the remainder using the algorithm described
8140// above, subtract it from the dividend to get an exact multiple of Constant.
8141// Then multiply that exact multiply by the multiplicative inverse modulo
8142// (1 << (BitWidth / 2)) to get the quotient.
8143
8144// If Constant is even, we can shift right the dividend and the divisor by the
8145// number of trailing zeros in Constant before applying the remainder algorithm.
8146// If we're after the quotient, we can subtract this value from the shifted
8147// dividend and multiply by the multiplicative inverse of the shifted divisor.
8148// If we want the remainder, we shift the value left by the number of trailing
8149// zeros and add the bits that were shifted out of the dividend.
8150bool TargetLowering::expandUDIVREMByConstantViaUREMDecomposition(
8151 SDNode *N, APInt Divisor, SmallVectorImpl<SDValue> &Result, EVT HiLoVT,
8152 SelectionDAG &DAG, SDValue LL, SDValue LH) const {
8153 unsigned Opcode = N->getOpcode();
8154 EVT VT = N->getValueType(0);
8155
8156 unsigned BitWidth = Divisor.getBitWidth();
8157 unsigned HBitWidth = BitWidth / 2;
8159 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
8160
8161 // If the divisor is even, shift it until it becomes odd.
8162 unsigned TrailingZeros = 0;
8163 if (!Divisor[0]) {
8164 TrailingZeros = Divisor.countr_zero();
8165 Divisor.lshrInPlace(TrailingZeros);
8166 }
8167
8168 // After removing trailing zeros, the divisor needs to be less than
8169 // (1 << HBitWidth).
8170 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
8171 if (Divisor.uge(HalfMaxPlus1))
8172 return false;
8173
8174 // Look for the largest chunk width W such that (1 << W) % Divisor == 1 or
8175 // (1 << W) % Divisor == -1.
8176 unsigned BestChunkWidth = 0, AltChunkWidth = 0;
8177 for (unsigned I = HBitWidth, E = HBitWidth / 2; I > E; --I) {
8178 // Skip HBitWidth-1, it doesn't have enough bits for carries.
8179 if (I == HBitWidth - 1)
8180 continue;
8181
8182 APInt Mod = APInt::getOneBitSet(Divisor.getBitWidth(), I).urem(Divisor);
8183
8184 if (Mod.isOne()) {
8185 BestChunkWidth = I;
8186 break;
8187 }
8188
8189 // We have an alternate strategy for Remainder == Divisor - 1.
8190 // FIXME: Support HBitWidth.
8191 if (I != HBitWidth && Mod == Divisor - 1)
8192 AltChunkWidth = I;
8193 }
8194
8195 bool Alternate = false;
8196 if (!BestChunkWidth) {
8197 if (!AltChunkWidth)
8198 return false;
8199 Alternate = true;
8200 BestChunkWidth = AltChunkWidth;
8201 }
8202
8203 SDLoc dl(N);
8204
8205 assert(!LL == !LH && "Expected both input halves or no input halves!");
8206 if (!LL)
8207 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
8208
8209 bool HasFSHR = isOperationLegal(ISD::FSHR, HiLoVT);
8210
8211 auto GetFSHR = [&](SDValue Lo, SDValue Hi, unsigned ShiftAmt) {
8212 assert(ShiftAmt > 0 && ShiftAmt < HBitWidth);
8213 if (HasFSHR)
8214 return DAG.getNode(ISD::FSHR, dl, HiLoVT, Hi, Lo,
8215 DAG.getShiftAmountConstant(ShiftAmt, HiLoVT, dl));
8216 return DAG.getNode(
8217 ISD::OR, dl, HiLoVT,
8218 DAG.getNode(ISD::SRL, dl, HiLoVT, Lo,
8219 DAG.getShiftAmountConstant(ShiftAmt, HiLoVT, dl)),
8220 DAG.getNode(
8221 ISD::SHL, dl, HiLoVT, Hi,
8222 DAG.getShiftAmountConstant(HBitWidth - ShiftAmt, HiLoVT, dl)));
8223 };
8224
8225 // Helper to perform a right shift on a 128-bit value split into two halves.
8226 // Handles shifts >= HBitWidth by moving Hi to Lo and shifting Hi.
8227 auto ShiftRight = [&](SDValue &Lo, SDValue &Hi, unsigned ShiftAmt) {
8228 if (ShiftAmt == 0)
8229 return;
8230 if (ShiftAmt < HBitWidth) {
8231 Lo = GetFSHR(Lo, Hi, ShiftAmt);
8232 Hi = DAG.getNode(ISD::SRL, dl, HiLoVT, Hi,
8233 DAG.getShiftAmountConstant(ShiftAmt, HiLoVT, dl));
8234 } else if (ShiftAmt == HBitWidth) {
8235 Lo = Hi;
8236 Hi = DAG.getConstant(0, dl, HiLoVT);
8237 } else {
8238 Lo = DAG.getNode(
8239 ISD::SRL, dl, HiLoVT, Hi,
8240 DAG.getShiftAmountConstant(ShiftAmt - HBitWidth, HiLoVT, dl));
8241 Hi = DAG.getConstant(0, dl, HiLoVT);
8242 }
8243 };
8244
8245 // Shift the input by the number of TrailingZeros in the divisor. The
8246 // shifted out bits will be added to the remainder later.
8247 SDValue PartialRemL, PartialRemH;
8248 if (TrailingZeros && Opcode != ISD::UDIV) {
8249 // Save the shifted off bits if we need the remainder.
8250 if (TrailingZeros < HBitWidth) {
8251 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
8252 PartialRemL = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
8253 DAG.getConstant(Mask, dl, HiLoVT));
8254 } else if (TrailingZeros == HBitWidth) {
8255 // All of LL is part of the remainder.
8256 PartialRemL = LL;
8257 } else {
8258 // TrailingZeros > HBitWidth: LL and part of LH are the remainder.
8259 PartialRemL = LL;
8260 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros - HBitWidth);
8261 PartialRemH = DAG.getNode(ISD::AND, dl, HiLoVT, LH,
8262 DAG.getConstant(Mask, dl, HiLoVT));
8263 }
8264 }
8265
8266 SDValue Sum;
8267 // If BestChunkWidth is HBitWidth add low and high half. If there is a carry
8268 // out, add that to the final sum.
8269 if (BestChunkWidth == HBitWidth) {
8270 assert(!Alternate);
8271 // Shift LH:LL right if there were trailing zeros in the divisor.
8272 ShiftRight(LL, LH, TrailingZeros);
8273
8274 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
8275 EVT SetCCType =
8276 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
8278 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
8279 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
8280 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
8281 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
8282 } else {
8283 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
8284 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
8285 // If the boolean for the target is 0 or 1, we can add the setcc result
8286 // directly.
8287 if (getBooleanContents(HiLoVT) ==
8289 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
8290 else
8291 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
8292 DAG.getConstant(0, dl, HiLoVT));
8293 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
8294 }
8295 } else {
8296 // Otherwise split into multple chunks and add them together. We chose
8297 // BestChunkWidth so that the sum will not overflow.
8298 SDValue Mask = DAG.getConstant(
8299 APInt::getLowBitsSet(HBitWidth, BestChunkWidth), dl, HiLoVT);
8300
8301 for (unsigned I = 0; I < BitWidth - TrailingZeros; I += BestChunkWidth) {
8302 // If there were trailing zeros in the divisor, increase the shift amount.
8303 unsigned Shift = I + TrailingZeros;
8304 SDValue Chunk;
8305 if (Shift == 0)
8306 Chunk = LL;
8307 else if (Shift >= HBitWidth)
8308 Chunk = DAG.getNode(
8309 ISD::SRL, dl, HiLoVT, LH,
8310 DAG.getShiftAmountConstant(Shift - HBitWidth, HiLoVT, dl));
8311 else
8312 Chunk = GetFSHR(LL, LH, Shift);
8313 // If we're on the last chunk, we don't need an AND.
8314 if (I + BestChunkWidth < BitWidth - TrailingZeros)
8315 Chunk = DAG.getNode(ISD::AND, dl, HiLoVT, Chunk, Mask);
8316 if (!Sum) {
8317 Sum = Chunk;
8318 } else {
8319 // For Alternate, we need to subtract odd chunks.
8320 unsigned ChunkNum = I / BestChunkWidth;
8321 unsigned Opc = (Alternate && (ChunkNum % 2) != 0) ? ISD::SUB : ISD::ADD;
8322 Sum = DAG.getNode(Opc, dl, HiLoVT, Sum, Chunk);
8323 }
8324 }
8325
8326 // For Alternate, the sum may be negative, but we need a positive sum. We
8327 // can increase it by a multiple of the divisor to make it positive. For 3
8328 // chunks the largest negative value is -(2^BestChunkWidth - 1). For 4
8329 // chunks, it's 2*-(2^BestChunkWidth - 1). We know that 2^BestChunkWidth + 1
8330 // is a multiple of the divisor. Add that 1 or 2 times to make the sum
8331 // positive.
8332 if (Alternate) {
8333 unsigned NumChunks = divideCeil(BitWidth - TrailingZeros, BestChunkWidth);
8334 assert(NumChunks <= 4);
8335
8336 APInt Adjust = APInt::getOneBitSet(HBitWidth, BestChunkWidth);
8337 Adjust.setBit(0);
8338 // If there are 4 chunks, we need to adjust twice.
8339 if (NumChunks == 4)
8340 Adjust <<= 1;
8341 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum,
8342 DAG.getConstant(Adjust, dl, HiLoVT));
8343 }
8344 }
8345
8346 // Perform a HiLoVT urem on the Sum using truncated divisor.
8347 SDValue RemL =
8348 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
8349 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
8350 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
8351
8352 if (Opcode != ISD::UREM) {
8353 // If we didn't shift LH/LR earlier, do it now.
8354 if (BestChunkWidth != HBitWidth)
8355 ShiftRight(LL, LH, TrailingZeros);
8356
8357 // Subtract the remainder from the shifted dividend.
8358 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
8359 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
8360
8361 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
8362
8363 // Multiply by the multiplicative inverse of the divisor modulo
8364 // (1 << BitWidth).
8365 APInt MulFactor = Divisor.multiplicativeInverse();
8366
8367 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
8368 DAG.getConstant(MulFactor, dl, VT));
8369
8370 // Split the quotient into low and high parts.
8371 SDValue QuotL, QuotH;
8372 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
8373 Result.push_back(QuotL);
8374 Result.push_back(QuotH);
8375 }
8376
8377 if (Opcode != ISD::UDIV) {
8378 // If we shifted the input, shift the remainder left and add the bits we
8379 // shifted off the input.
8380 if (TrailingZeros) {
8381 if (TrailingZeros < HBitWidth) {
8382 // Shift RemH:RemL left by TrailingZeros.
8383 // RemH gets the high bits shifted out of RemL.
8384 RemH = DAG.getNode(
8385 ISD::SRL, dl, HiLoVT, RemL,
8386 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros, HiLoVT, dl));
8387 RemL =
8388 DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
8389 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8390 // OR in the partial remainder.
8391 RemL = DAG.getNode(ISD::OR, dl, HiLoVT, RemL, PartialRemL,
8393 } else if (TrailingZeros == HBitWidth) {
8394 // Shift left by exactly HBitWidth: RemH becomes RemL, RemL becomes
8395 // PartialRemL.
8396 RemH = RemL;
8397 RemL = PartialRemL;
8398 } else {
8399 // Shift left by more than HBitWidth.
8400 RemH = DAG.getNode(
8401 ISD::SHL, dl, HiLoVT, RemL,
8402 DAG.getShiftAmountConstant(TrailingZeros - HBitWidth, HiLoVT, dl));
8403 RemH = DAG.getNode(ISD::OR, dl, HiLoVT, RemH, PartialRemH,
8405 RemL = PartialRemL;
8406 }
8407 }
8408 Result.push_back(RemL);
8409 Result.push_back(RemH);
8410 }
8411
8412 return true;
8413}
8414
8415bool TargetLowering::expandUDIVREMByConstantViaUMulHiMagic(
8416 SDNode *N, const APInt &Divisor, SmallVectorImpl<SDValue> &Result,
8417 EVT HiLoVT, SelectionDAG &DAG, SDValue LL, SDValue LH) const {
8418
8419 SDValue N0 = N->getOperand(0);
8420 EVT VT = N0->getValueType(0);
8421 SDLoc DL{N};
8422
8423 assert(!Divisor.isOne() && "Magic algorithm does not work for division by 1");
8424
8425 // This helper creates a MUL_LOHI of the pair (LL, LH) by a constant.
8426 auto MakeMUL_LOHIByConst = [&](unsigned Opc, SDValue LL, SDValue LH,
8427 const APInt &Const,
8428 SmallVectorImpl<SDValue> &Result) {
8429 SDValue LHS = DAG.getNode(ISD::BUILD_PAIR, DL, VT, LL, LH);
8430 SDValue RHS = DAG.getConstant(Const, DL, VT);
8431 auto [RL, RH] = DAG.SplitScalar(RHS, DL, HiLoVT, HiLoVT);
8432 return expandMUL_LOHI(Opc, VT, DL, LHS, RHS, Result, HiLoVT, DAG,
8434 LL, LH, RL, RH);
8435 };
8436
8437 // This helper creates an ADD/SUB of the pairs (LL, LH) and (RL, RH).
8438 auto MakeAddSubLong = [&](unsigned Opc, SDValue LL, SDValue LH, SDValue RL,
8439 SDValue RH) {
8440 SDValue AddSubNode =
8442 DAG.getVTList(HiLoVT, MVT::i1), LL, RL);
8443 SDValue OutL = AddSubNode.getValue(0);
8444 SDValue Overflow = AddSubNode.getValue(1);
8445 SDValue AddSubWithOverflow =
8447 DAG.getVTList(HiLoVT, MVT::i1), LH, RH, Overflow);
8448 SDValue OutH = AddSubWithOverflow.getValue(0);
8449 return std::make_pair(OutL, OutH);
8450 };
8451
8452 // This helper creates a SRL of the pair (LL, LH) by Shift.
8453 auto MakeSRLLong = [&](SDValue LL, SDValue LH, unsigned Shift) {
8454 unsigned HBitWidth = HiLoVT.getScalarSizeInBits();
8455 if (Shift < HBitWidth) {
8456 SDValue ShAmt = DAG.getShiftAmountConstant(Shift, HiLoVT, DL);
8457 SDValue ResL = DAG.getNode(ISD::FSHR, DL, HiLoVT, LH, LL, ShAmt);
8458 SDValue ResH = DAG.getNode(ISD::SRL, DL, HiLoVT, LH, ShAmt);
8459 return std::make_pair(ResL, ResH);
8460 }
8461 SDValue Zero = DAG.getConstant(0, DL, HiLoVT);
8462 if (Shift == HBitWidth)
8463 return std::make_pair(LH, Zero);
8464 assert(Shift - HBitWidth < HBitWidth &&
8465 "We shouldn't generate an undefined shift");
8466 SDValue ShAmt = DAG.getShiftAmountConstant(Shift - HBitWidth, HiLoVT, DL);
8467 return std::make_pair(DAG.getNode(ISD::SRL, DL, HiLoVT, LH, ShAmt), Zero);
8468 };
8469
8470 // Knowledge of leading zeros may help to reduce the multiplier.
8471 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
8472
8473 UnsignedDivisionByConstantInfo Magics = UnsignedDivisionByConstantInfo::get(
8474 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
8475
8476 assert(!LL == !LH && "Expected both input halves or no input halves!");
8477 if (!LL)
8478 std::tie(LL, LH) = DAG.SplitScalar(N0, DL, HiLoVT, HiLoVT);
8479 SDValue QL = LL;
8480 SDValue QH = LH;
8481 if (Magics.PreShift != 0)
8482 std::tie(QL, QH) = MakeSRLLong(QL, QH, Magics.PreShift);
8483
8484 SmallVector<SDValue, 4> UMulResult;
8485 if (!MakeMUL_LOHIByConst(ISD::UMUL_LOHI, QL, QH, Magics.Magic, UMulResult))
8486 return false;
8487
8488 QL = UMulResult[2];
8489 QH = UMulResult[3];
8490
8491 if (Magics.IsAdd) {
8492 auto [NPQL, NPQH] = MakeAddSubLong(ISD::SUB, LL, LH, QL, QH);
8493 std::tie(NPQL, NPQH) = MakeSRLLong(NPQL, NPQH, 1);
8494 std::tie(QL, QH) = MakeAddSubLong(ISD::ADD, NPQL, NPQH, QL, QH);
8495 }
8496
8497 if (Magics.PostShift != 0)
8498 std::tie(QL, QH) = MakeSRLLong(QL, QH, Magics.PostShift);
8499
8500 unsigned Opcode = N->getOpcode();
8501 if (Opcode != ISD::UREM) {
8502 Result.push_back(QL);
8503 Result.push_back(QH);
8504 }
8505
8506 if (Opcode != ISD::UDIV) {
8507 SmallVector<SDValue, 2> MulResult;
8508 if (!MakeMUL_LOHIByConst(ISD::MUL, QL, QH, Divisor, MulResult))
8509 return false;
8510
8511 assert(MulResult.size() == 2);
8512
8513 auto [RemL, RemH] =
8514 MakeAddSubLong(ISD::SUB, LL, LH, MulResult[0], MulResult[1]);
8515
8516 Result.push_back(RemL);
8517 Result.push_back(RemH);
8518 }
8519
8520 return true;
8521}
8522
8525 EVT HiLoVT, SelectionDAG &DAG,
8526 SDValue LL, SDValue LH) const {
8527 unsigned Opcode = N->getOpcode();
8528
8529 // TODO: Support signed division/remainder.
8530 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
8531 return false;
8532 assert(
8533 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
8534 "Unexpected opcode");
8535
8536 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
8537 if (!CN)
8538 return false;
8539
8540 APInt Divisor = CN->getAPIntValue();
8541
8542 // We depend on the UREM by constant optimization in DAGCombiner that requires
8543 // high multiply.
8544 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
8546 return false;
8547
8548 // Don't expand if optimizing for size.
8549 if (DAG.shouldOptForSize())
8550 return false;
8551
8552 // Early out for 0 or 1 divisors.
8553 if (Divisor.ule(1))
8554 return false;
8555
8556 if (expandUDIVREMByConstantViaUREMDecomposition(N, Divisor, Result, HiLoVT,
8557 DAG, LL, LH))
8558 return true;
8559
8560 if (expandUDIVREMByConstantViaUMulHiMagic(N, Divisor, Result, HiLoVT, DAG, LL,
8561 LH))
8562 return true;
8563
8564 return false;
8565}
8566
8567// Check that (every element of) Z is undef or not an exact multiple of BW.
8568static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
8570 Z,
8571 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
8572 /*AllowUndefs=*/true, /*AllowTruncation=*/true);
8573}
8574
8576 EVT VT = Node->getValueType(0);
8577 SDValue ShX, ShY;
8578 SDValue ShAmt, InvShAmt;
8579 SDValue X = Node->getOperand(0);
8580 SDValue Y = Node->getOperand(1);
8581 SDValue Z = Node->getOperand(2);
8582 SDValue Mask = Node->getOperand(3);
8583 SDValue VL = Node->getOperand(4);
8584
8585 unsigned BW = VT.getScalarSizeInBits();
8586 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
8587 SDLoc DL(SDValue(Node, 0));
8588
8589 EVT ShVT = Z.getValueType();
8590 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8591 // fshl: X << C | Y >> (BW - C)
8592 // fshr: X << (BW - C) | Y >> C
8593 // where C = Z % BW is not zero
8594 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8595 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8596 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
8597 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
8598 VL);
8599 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
8600 VL);
8601 } else {
8602 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8603 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8604 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
8605 if (isPowerOf2_32(BW)) {
8606 // Z % BW -> Z & (BW - 1)
8607 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
8608 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8609 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
8610 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
8611 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
8612 } else {
8613 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8614 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8615 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
8616 }
8617
8618 SDValue One = DAG.getConstant(1, DL, ShVT);
8619 if (IsFSHL) {
8620 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
8621 SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
8622 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
8623 } else {
8624 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
8625 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
8626 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
8627 }
8628 }
8629 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
8630}
8631
8633 SelectionDAG &DAG) const {
8634 if (Node->isVPOpcode())
8635 return expandVPFunnelShift(Node, DAG);
8636
8637 EVT VT = Node->getValueType(0);
8638
8639 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8643 return SDValue();
8644
8645 SDValue X = Node->getOperand(0);
8646 SDValue Y = Node->getOperand(1);
8647 SDValue Z = Node->getOperand(2);
8648
8649 unsigned BW = VT.getScalarSizeInBits();
8650 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8651 SDLoc DL(SDValue(Node, 0));
8652
8653 EVT ShVT = Z.getValueType();
8654
8655 // If a funnel shift in the other direction is more supported, use it.
8656 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8657 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8658 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
8659 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8660 // fshl X, Y, Z -> fshr X, Y, -Z
8661 // fshr X, Y, Z -> fshl X, Y, -Z
8662 Z = DAG.getNegative(Z, DL, ShVT);
8663 } else {
8664 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8665 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8666 SDValue One = DAG.getConstant(1, DL, ShVT);
8667 if (IsFSHL) {
8668 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8669 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8670 } else {
8671 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8672 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8673 }
8674 Z = DAG.getNOT(DL, Z, ShVT);
8675 }
8676 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8677 }
8678
8679 SDValue ShX, ShY;
8680 SDValue ShAmt, InvShAmt;
8681 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8682 // fshl: X << C | Y >> (BW - C)
8683 // fshr: X << (BW - C) | Y >> C
8684 // where C = Z % BW is not zero
8685 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8686 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8687 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8688 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8689 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8690 } else {
8691 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8692 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8693 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8694 if (isPowerOf2_32(BW)) {
8695 // Z % BW -> Z & (BW - 1)
8696 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8697 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8698 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8699 } else {
8700 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8701 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8702 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8703 }
8704
8705 SDValue One = DAG.getConstant(1, DL, ShVT);
8706 if (IsFSHL) {
8707 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8708 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8709 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8710 } else {
8711 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8712 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8713 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8714 }
8715 }
8716 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8717}
8718
8719// TODO: Merge with expandFunnelShift.
8721 SelectionDAG &DAG) const {
8722 EVT VT = Node->getValueType(0);
8723 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8724 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8725 SDValue Op0 = Node->getOperand(0);
8726 SDValue Op1 = Node->getOperand(1);
8727 SDLoc DL(SDValue(Node, 0));
8728
8729 EVT ShVT = Op1.getValueType();
8730 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8731
8732 // If a rotate in the other direction is more supported, use it.
8733 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8734 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8735 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8736 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8737 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8738 }
8739
8740 if (!AllowVectorOps && VT.isVector() &&
8746 return SDValue();
8747
8748 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8749 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8750 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8751 SDValue ShVal;
8752 SDValue HsVal;
8753 if (isPowerOf2_32(EltSizeInBits)) {
8754 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8755 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8756 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8757 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8758 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8759 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8760 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8761 } else {
8762 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8763 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8764 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8765 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8766 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8767 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8768 SDValue One = DAG.getConstant(1, DL, ShVT);
8769 HsVal =
8770 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8771 }
8772 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8773}
8774
8775/// Check if CLMUL on VT can eventually reach a type with legal CLMUL through
8776/// a chain of halving decompositions (halving element width) and/or vector
8777/// widening (doubling element count). This guides expansion strategy selection:
8778/// if true, the halving/widening path produces better code than bit-by-bit.
8779///
8780/// HalveDepth tracks halving steps only (each creates ~4x more operations).
8781/// Widening steps are cheap (O(1) pad/extract) and don't count.
8782/// Limiting halvings to 2 prevents exponential blowup:
8783/// 1 halving: ~4 sub-CLMULs (good, e.g. v8i16 -> v8i8)
8784/// 2 halvings: ~16 sub-CLMULs (acceptable, e.g. v4i32 -> v4i16 -> v8i8)
8785/// 3 halvings: ~64 sub-CLMULs (worse than bit-by-bit expansion)
8787 EVT VT, unsigned HalveDepth = 0,
8788 unsigned TotalDepth = 0) {
8789 if (HalveDepth > 2 || TotalDepth > 8 || !VT.isFixedLengthVector())
8790 return false;
8792 return true;
8793 if (!TLI.isTypeLegal(VT))
8794 return false;
8795
8796 unsigned BW = VT.getScalarSizeInBits();
8797
8798 // Halve: halve element width, same element count.
8799 // This is the expensive step -- each halving creates ~4x more operations.
8800 if (BW % 2 == 0) {
8801 EVT HalfEltVT = EVT::getIntegerVT(Ctx, BW / 2);
8802 EVT HalfVT = VT.changeVectorElementType(Ctx, HalfEltVT);
8803 if (TLI.isTypeLegal(HalfVT) &&
8804 canNarrowCLMULToLegal(TLI, Ctx, HalfVT, HalveDepth + 1, TotalDepth + 1))
8805 return true;
8806 }
8807
8808 // Widen: double element count (fixed-width vectors only).
8809 // This is cheap -- just INSERT_SUBVECTOR + EXTRACT_SUBVECTOR.
8810 EVT WideVT = VT.getDoubleNumVectorElementsVT(Ctx);
8811 if (TLI.isTypeLegal(WideVT) &&
8812 canNarrowCLMULToLegal(TLI, Ctx, WideVT, HalveDepth, TotalDepth + 1))
8813 return true;
8814
8815 return false;
8816}
8817
8819 SDLoc DL(Node);
8820 EVT VT = Node->getValueType(0);
8821 SDValue X = Node->getOperand(0);
8822 SDValue Y = Node->getOperand(1);
8823 unsigned BW = VT.getScalarSizeInBits();
8824 unsigned Opcode = Node->getOpcode();
8825 LLVMContext &Ctx = *DAG.getContext();
8826
8827 switch (Opcode) {
8828 case ISD::CLMUL: {
8829 // For vector types, try decomposition strategies that leverage legal
8830 // CLMUL on narrower or wider element types, avoiding the expensive
8831 // bit-by-bit expansion.
8832 if (VT.isVector()) {
8833 // Strategy 1: Halving decomposition to half-element-width CLMUL.
8834 // Applies ExpandIntRes_CLMUL's identity element-wise:
8835 // CLMUL(X, Y) = (Hi << HalfBW) | Lo
8836 // where:
8837 // Lo = CLMUL(XLo, YLo)
8838 // Hi = CLMULH(XLo, YLo) ^ CLMUL(XLo, YHi) ^ CLMUL(XHi, YLo)
8839 unsigned HalfBW = BW / 2;
8840 if (BW % 2 == 0) {
8841 EVT HalfEltVT = EVT::getIntegerVT(Ctx, HalfBW);
8842 EVT HalfVT =
8843 EVT::getVectorVT(Ctx, HalfEltVT, VT.getVectorElementCount());
8844 if (isTypeLegal(HalfVT) && canNarrowCLMULToLegal(*this, Ctx, HalfVT,
8845 /*HalveDepth=*/1)) {
8846 SDValue ShAmt = DAG.getShiftAmountConstant(HalfBW, VT, DL);
8847
8848 // Extract low and high halves of each element.
8849 SDValue XLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, X);
8850 SDValue XHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT,
8851 DAG.getNode(ISD::SRL, DL, VT, X, ShAmt));
8852 SDValue YLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, Y);
8853 SDValue YHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT,
8854 DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt));
8855
8856 // Lo = CLMUL(XLo, YLo)
8857 SDValue Lo = DAG.getNode(ISD::CLMUL, DL, HalfVT, XLo, YLo);
8858
8859 // Hi = CLMULH(XLo, YLo) ^ CLMUL(XLo, YHi) ^ CLMUL(XHi, YLo)
8860 SDValue LoH = DAG.getNode(ISD::CLMULH, DL, HalfVT, XLo, YLo);
8861 SDValue Cross1 = DAG.getNode(ISD::CLMUL, DL, HalfVT, XLo, YHi);
8862 SDValue Cross2 = DAG.getNode(ISD::CLMUL, DL, HalfVT, XHi, YLo);
8863 SDValue Cross = DAG.getNode(ISD::XOR, DL, HalfVT, Cross1, Cross2);
8864 SDValue Hi = DAG.getNode(ISD::XOR, DL, HalfVT, LoH, Cross);
8865
8866 // Reassemble: Result = ZExt(Lo) | (AnyExt(Hi) << HalfBW)
8867 SDValue LoExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo);
8868 SDValue HiExt = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Hi);
8869 SDValue HiShifted = DAG.getNode(ISD::SHL, DL, VT, HiExt, ShAmt);
8870 return DAG.getNode(ISD::OR, DL, VT, LoExt, HiShifted);
8871 }
8872 }
8873
8874 // Strategy 2: Promote to double-element-width CLMUL.
8875 // CLMUL(X, Y) = Trunc(CLMUL(AnyExt(X), AnyExt(Y)))
8876 {
8877 EVT ExtVT = VT.widenIntegerElementType(Ctx);
8878 if (isTypeLegal(ExtVT) && isOperationLegalOrCustom(ISD::CLMUL, ExtVT)) {
8879 // If CLMUL on ExtVT is Custom (not Legal), the target may
8880 // scalarize it, costing O(NumElements) scalar ops. The bit-by-bit
8881 // fallback costs O(BW) vectorized iterations. Only widen when
8882 // element count is small enough that scalarization is cheaper.
8883 unsigned NumElts = VT.getVectorMinNumElements();
8884 if (isOperationLegal(ISD::CLMUL, ExtVT) || NumElts < BW) {
8885 SDValue XExt = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, X);
8886 SDValue YExt = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, Y);
8887 SDValue Mul = DAG.getNode(ISD::CLMUL, DL, ExtVT, XExt, YExt);
8888 return DAG.getNode(ISD::TRUNCATE, DL, VT, Mul);
8889 }
8890 }
8891 }
8892
8893 // Strategy 3: Widen element count (pad with undef, do CLMUL on wider
8894 // vector, extract lower result). CLMUL is element-wise, so upper
8895 // (undef) lanes don't affect the lower results.
8896 // e.g. v4i16 => pad to v8i16 => halve to v8i8 PMUL => extract v4i16.
8897 if (auto EC = VT.getVectorElementCount(); EC.isFixed()) {
8898 EVT WideVT = EVT::getVectorVT(Ctx, VT.getVectorElementType(), EC * 2);
8899 if (isTypeLegal(WideVT) && canNarrowCLMULToLegal(*this, Ctx, WideVT)) {
8900 SDValue Undef = DAG.getUNDEF(WideVT);
8901 SDValue XWide = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, Undef,
8902 X, DAG.getVectorIdxConstant(0, DL));
8903 SDValue YWide = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, Undef,
8904 Y, DAG.getVectorIdxConstant(0, DL));
8905 SDValue WideRes = DAG.getNode(ISD::CLMUL, DL, WideVT, XWide, YWide);
8906 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, WideRes,
8907 DAG.getVectorIdxConstant(0, DL));
8908 }
8909 }
8910 }
8911
8912 // Special case: clmul(X, ~0) is equivalent to a "parallel prefix XOR" or
8913 // "bitwise parity" operation.
8915 SDValue R = X;
8916 for (unsigned I = 1; I < BW; I <<= 1) {
8917 SDValue ShAmt = DAG.getShiftAmountConstant(I, VT, DL);
8918 SDValue Shifted = DAG.getNode(ISD::SHL, DL, VT, R, ShAmt);
8919 R = DAG.getNode(ISD::XOR, DL, VT, R, Shifted);
8920 }
8921 return R;
8922 }
8923
8924 // NOTE: If you change this expansion, please update the cost model
8925 // calculation in BasicTTIImpl::getTypeBasedIntrinsicInstrCost for
8926 // Intrinsic::clmul.
8927
8928 // Strategy 4: multiplication with holes.
8929 //
8930 // Uses "holes" (sequences of zeroes) to avoid carry spilling. When carries
8931 // do occur, they wind up in a "hole" and are subsequently masked out of the
8932 // result.
8933 //
8934 // A hole of 3 bits is optimal for 32-bit and 64-bit inputs. 128-bit
8935 // integers need a larger hole, and for smaller integers the fallback below
8936 // is more efficient.
8937 //
8938 // Based on bmul64 in bearssl and bmul in the rust polyval crate.
8939 if (BW >= 32 && BW <= 64 &&
8941
8942 // Set every fourth bit of each nibble, equivalent to 0b00010001...0001.
8943 APInt MaskVal = APInt::getSplat(BW, APInt(4, 0b0001));
8944
8945 // Create versions of X and Y that keep only the I-th bit of
8946 // each nibble.
8947 SDValue M[4], Xp[4], Yp[4];
8948 for (unsigned I = 0; I < 4; ++I) {
8949 M[I] = DAG.getConstant(MaskVal.shl(I), DL, VT);
8950 Xp[I] = DAG.getNode(ISD::AND, DL, VT, X, M[I]);
8951 Yp[I] = DAG.getNode(ISD::AND, DL, VT, Y, M[I]);
8952 }
8953
8954 // Codegens these expressions (16 multiplications):
8955 //
8956 // z0 = (x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1);
8957 // z1 = (x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2);
8958 // z2 = (x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3);
8959 // z3 = (x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0);
8960 SDValue Res = DAG.getConstant(0, DL, VT);
8961 for (unsigned I = 0; I < 4; ++I) {
8962 SDValue Zi = DAG.getConstant(0, DL, VT);
8963 for (unsigned J = 0; J < 4; ++J) {
8964 unsigned K = (I + 4 - J) % 4;
8965 SDValue P = DAG.getNode(ISD::MUL, DL, VT, Xp[J], Yp[K]);
8966 Zi = DAG.getNode(ISD::XOR, DL, VT, Zi, P);
8967 }
8968
8969 // Keep only the bits belonging to this iteration, and bitwise or it all
8970 // together.
8971 Zi = DAG.getNode(ISD::AND, DL, VT, Zi, M[I]);
8972 Res = DAG.getNode(ISD::OR, DL, VT, Res, Zi, SDNodeFlags::Disjoint);
8973 }
8974 return Res;
8975 }
8976
8977 // Strategy 5: the naive fallback.
8978 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), Ctx, VT);
8979
8980 SDValue Res = DAG.getConstant(0, DL, VT);
8981 for (unsigned I = 0; I < BW; ++I) {
8982 SDValue ShiftAmt = DAG.getShiftAmountConstant(I, VT, DL);
8983 SDValue Mask = DAG.getConstant(APInt::getOneBitSet(BW, I), DL, VT);
8984 SDValue YMasked = DAG.getNode(ISD::AND, DL, VT, Y, Mask);
8985
8986 // For targets with a fast bit test instruction (e.g., x86 BT) or without
8987 // multiply, use a shift-based expansion to avoid expensive MUL
8988 // instructions.
8989 SDValue Part;
8990 if (!hasBitTest(Y, ShiftAmt) &&
8993 Part = DAG.getNode(ISD::MUL, DL, VT, X, YMasked);
8994 } else {
8995 // Canonical bit test: (Y & (1 << I)) != 0
8996 SDValue Zero = DAG.getConstant(0, DL, VT);
8997 SDValue Cond = DAG.getSetCC(DL, SetCCVT, YMasked, Zero, ISD::SETEQ);
8998 SDValue XShifted = DAG.getNode(ISD::SHL, DL, VT, X, ShiftAmt);
8999 Part = DAG.getSelect(DL, VT, Cond, Zero, XShifted);
9000 }
9001 Res = DAG.getNode(ISD::XOR, DL, VT, Res, Part);
9002 }
9003 return Res;
9004 }
9005 case ISD::CLMULR:
9006 // If we have CLMUL/CLMULH, merge the shifted results to form CLMULR.
9009 SDValue Lo = DAG.getNode(ISD::CLMUL, DL, VT, X, Y);
9010 SDValue Hi = DAG.getNode(ISD::CLMULH, DL, VT, X, Y);
9011 Lo = DAG.getNode(ISD::SRL, DL, VT, Lo,
9012 DAG.getShiftAmountConstant(BW - 1, VT, DL));
9013 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
9014 DAG.getShiftAmountConstant(1, VT, DL));
9015 return DAG.getNode(ISD::OR, DL, VT, Lo, Hi);
9016 }
9017 [[fallthrough]];
9018 case ISD::CLMULH: {
9019 EVT ExtVT = VT.widenIntegerElementType(Ctx);
9020 // Use bitreverse-based lowering (CLMULR/H = rev(CLMUL(rev,rev)) >> S)
9021 // when any of these hold:
9022 // (a) ZERO_EXTEND to ExtVT or SRL on ExtVT isn't legal.
9023 // (b) CLMUL is legal on VT but not on ExtVT (e.g. v8i8 on AArch64).
9024 // (c) CLMUL on ExtVT isn't legal, but CLMUL on VT can be efficiently
9025 // expanded via halving/widening to reach legal CLMUL. The bitreverse
9026 // path creates CLMUL(VT) which will be expanded efficiently. The
9027 // promote path would create CLMUL(ExtVT) => halving => CLMULH(VT),
9028 // causing a cycle.
9029 // Note: when CLMUL is legal on ExtVT, the zext => CLMUL(ExtVT) => shift
9030 // => trunc path is preferred over the bitreverse path, as it avoids the
9031 // cost of 3 bitreverse operations.
9036 canNarrowCLMULToLegal(*this, Ctx, VT)))) {
9037 SDValue XRev = DAG.getNode(ISD::BITREVERSE, DL, VT, X);
9038 SDValue YRev = DAG.getNode(ISD::BITREVERSE, DL, VT, Y);
9039 SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, VT, XRev, YRev);
9040 SDValue Res = DAG.getNode(ISD::BITREVERSE, DL, VT, ClMul);
9041 if (Opcode == ISD::CLMULH)
9042 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
9043 DAG.getShiftAmountConstant(1, VT, DL));
9044 return Res;
9045 }
9046 SDValue XExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, X);
9047 SDValue YExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Y);
9048 SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, ExtVT, XExt, YExt);
9049 unsigned ShAmt = Opcode == ISD::CLMULR ? BW - 1 : BW;
9050 SDValue HiBits = DAG.getNode(ISD::SRL, DL, ExtVT, ClMul,
9051 DAG.getShiftAmountConstant(ShAmt, ExtVT, DL));
9052 return DAG.getNode(ISD::TRUNCATE, DL, VT, HiBits);
9053 }
9054 }
9055 llvm_unreachable("Expected CLMUL, CLMULR, or CLMULH");
9056}
9057
9059 SDLoc DL(Node);
9060 EVT VT = Node->getValueType(0);
9061 SDValue Val = Node->getOperand(0);
9062 SDValue Msk = Node->getOperand(1);
9063 unsigned BW = VT.getScalarSizeInBits();
9064
9065 // Hacker's Delight §7-4: Compress, or Generalized Extract
9066 SDValue X = DAG.getNode(ISD::AND, DL, VT, Val, Msk);
9067 SDValue M = Msk;
9068 SDValue One = DAG.getShiftAmountConstant(1, VT, DL);
9069 SDValue Mk = DAG.getNode(ISD::SHL, DL, VT, DAG.getNOT(DL, M, VT), One);
9070
9071 // Repeatedly compute which bits would shift to the right by an odd amount,
9072 // shift all such bits in parallel using a mask, and double the shift amount.
9073 for (unsigned I = 1; I < BW; I *= 2) {
9074 // This expands the "parallel prefix" operation to clmul(Mk, ~0).
9075 SDValue Mp =
9076 DAG.getNode(ISD::CLMUL, DL, VT, Mk, DAG.getAllOnesConstant(DL, VT));
9077 SDValue Mv = DAG.getNode(ISD::AND, DL, VT, Mp, M);
9078 SDValue ShiftI = DAG.getShiftAmountConstant(I, VT, DL);
9079 SDValue MvS = DAG.getNode(ISD::SRL, DL, VT, Mv, ShiftI);
9080 M = DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ISD::XOR, DL, VT, M, Mv), MvS,
9082 SDValue T = DAG.getNode(ISD::AND, DL, VT, X, Mv);
9083 SDValue TS = DAG.getNode(ISD::SRL, DL, VT, T, ShiftI);
9084 X = DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ISD::XOR, DL, VT, X, T), TS,
9086 if (I * 2 < BW)
9087 Mk = DAG.getNode(ISD::AND, DL, VT, Mk, DAG.getNOT(DL, Mp, VT));
9088 }
9089
9090 return X;
9091}
9092
9094 SDLoc DL(Node);
9095 EVT VT = Node->getValueType(0);
9096 SDValue Val = Node->getOperand(0);
9097 SDValue Msk = Node->getOperand(1);
9098 unsigned BW = VT.getScalarSizeInBits();
9099
9100 // Hacker's Delight §7-5: Expand, or Generalized Insert.
9101 unsigned LogBW = Log2_32_Ceil(BW);
9102 SmallVector<SDValue, 8> MvArray(LogBW);
9103 SDValue One = DAG.getShiftAmountConstant(1, VT, DL);
9104 SDValue Mc = Msk;
9105 SDValue Mk = DAG.getNode(ISD::SHL, DL, VT, DAG.getNOT(DL, Msk, VT), One);
9106
9107 // First pass: compute move masks for each power of two that a bit moves by.
9108 for (unsigned S = 0; S < LogBW; ++S) {
9109 unsigned ShiftS = 1u << S;
9110 // This expands the "parallel prefix" operation to clmul(Mk, ~0).
9111 SDValue Mp =
9112 DAG.getNode(ISD::CLMUL, DL, VT, Mk, DAG.getAllOnesConstant(DL, VT));
9113 SDValue Mv = DAG.getNode(ISD::AND, DL, VT, Mp, Mc);
9114 MvArray[S] = Mv;
9115 if (S + 1 < LogBW) {
9116 SDValue McXorMv = DAG.getNode(ISD::XOR, DL, VT, Mc, Mv);
9117 SDValue MvShifted = DAG.getNode(
9118 ISD::SRL, DL, VT, Mv, DAG.getShiftAmountConstant(ShiftS, VT, DL));
9119 Mc = DAG.getNode(ISD::OR, DL, VT, McXorMv, MvShifted,
9121 Mk = DAG.getNode(ISD::AND, DL, VT, Mk, DAG.getNOT(DL, Mp, VT));
9122 }
9123 }
9124
9125 // Second pass: move bits by 32, 16, 8, 4, 2, 1, using masks, in parallel.
9126 // Each pass handles half the shift amount of the previous pass.
9127 SDValue X = Val;
9128 for (int S = (int)LogBW - 1; S >= 0; --S) {
9129 SDValue ShiftSv = DAG.getShiftAmountConstant(1u << S, VT, DL);
9130 SDValue T = DAG.getNode(ISD::SHL, DL, VT, X, ShiftSv);
9131 SDValue UnshiftedBits =
9132 DAG.getNode(ISD::AND, DL, VT, X, DAG.getNOT(DL, MvArray[S], VT));
9133 SDValue ShiftedBits = DAG.getNode(ISD::AND, DL, VT, T, MvArray[S]);
9134 X = DAG.getNode(ISD::OR, DL, VT, UnshiftedBits, ShiftedBits,
9136 }
9137
9138 return DAG.getNode(ISD::AND, DL, VT, X, Msk);
9139}
9140
9142 SelectionDAG &DAG) const {
9143 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
9144 EVT VT = Node->getValueType(0);
9145 unsigned VTBits = VT.getScalarSizeInBits();
9146 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
9147
9148 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
9149 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
9150 SDValue ShOpLo = Node->getOperand(0);
9151 SDValue ShOpHi = Node->getOperand(1);
9152 SDValue ShAmt = Node->getOperand(2);
9153 EVT ShAmtVT = ShAmt.getValueType();
9154 EVT ShAmtCCVT =
9155 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
9156 SDLoc dl(Node);
9157
9158 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
9159 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
9160 // away during isel.
9161 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
9162 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
9163 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
9164 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
9165 : DAG.getConstant(0, dl, VT);
9166
9167 SDValue Tmp2, Tmp3;
9168 if (IsSHL) {
9169 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
9170 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
9171 } else {
9172 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
9173 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
9174 }
9175
9176 // If the shift amount is larger or equal than the width of a part we don't
9177 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
9178 // values for large shift amounts.
9179 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
9180 DAG.getConstant(VTBits, dl, ShAmtVT));
9181 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
9182 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
9183
9184 if (IsSHL) {
9185 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
9186 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
9187 } else {
9188 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
9189 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
9190 }
9191}
9192
9194 SelectionDAG &DAG) const {
9195 // This implements llvm.canonicalize.f* by multiplication with 1.0, as
9196 // suggested in
9197 // https://llvm.org/docs/LangRef.html#llvm-canonicalize-intrinsic.
9198 // It uses strict_fp operations even outside a strict_fp context in order
9199 // to guarantee that the canonicalization is not optimized away by later
9200 // passes. The result chain introduced by that is intentionally ignored
9201 // since no ordering requirement is intended here.
9202 EVT VT = Node->getValueType(0);
9203 SDLoc DL(Node);
9204 SDNodeFlags Flags = Node->getFlags();
9205 Flags.setNoFPExcept(true);
9206 SDValue One = DAG.getConstantFP(1.0, DL, VT);
9207 SDValue Mul =
9208 DAG.getNode(ISD::STRICT_FMUL, DL, {VT, MVT::Other},
9209 {DAG.getEntryNode(), Node->getOperand(0), One}, Flags);
9210 return Mul;
9211}
9212
9214 SelectionDAG &DAG) const {
9215 // Expand conversion from a native IEEE float type to an arbitrary FP format
9216 // returning the result as an integer using bit manipulation.
9217 EVT ResVT = Node->getValueType(0);
9218 SDLoc dl(Node);
9219
9220 SDValue FloatVal = Node->getOperand(0);
9221 const uint64_t SemEnum = Node->getConstantOperandVal(1);
9222 const auto Sem = static_cast<APFloatBase::Semantics>(SemEnum);
9223 const auto RoundMode =
9224 static_cast<RoundingMode>(Node->getConstantOperandVal(2));
9225 const bool Saturate = Node->getConstantOperandVal(3) != 0;
9226
9227 // Supported destination formats.
9228 switch (Sem) {
9234 break;
9235 default:
9236 DAG.getContext()->emitError("CONVERT_TO_ARBITRARY_FP: not implemented "
9237 "destination format (semantics enum " +
9238 Twine(SemEnum) + ")");
9239 return SDValue();
9240 }
9241
9242 // Supported rounding modes.
9243 switch (RoundMode) {
9249 break;
9250 default:
9251 DAG.getContext()->emitError(
9252 "CONVERT_TO_ARBITRARY_FP: unsupported rounding mode (enum " +
9253 Twine(static_cast<int>(RoundMode)) + ")");
9254 return SDValue();
9255 }
9256
9257 // Destination format parameters.
9258 const fltSemantics &DstSem = APFloatBase::EnumToSemantics(Sem);
9259 const unsigned DstBits = APFloat::getSizeInBits(DstSem);
9260 const unsigned DstPrecision = APFloat::semanticsPrecision(DstSem);
9261 const unsigned DstMant = DstPrecision - 1;
9262 const unsigned DstExpBits = DstBits - DstMant - 1;
9263 const int DstBias = 1 - APFloat::semanticsMinExponent(DstSem);
9264 const unsigned DstExpMax = (1U << DstExpBits) - 1;
9265 const uint64_t DstMantMask = (DstMant > 0) ? ((1ULL << DstMant) - 1) : 0;
9266 const fltNonfiniteBehavior DstNFBehavior = DstSem.nonFiniteBehavior;
9267 const fltNanEncoding DstNanEnc = DstSem.nanEncoding;
9268
9269 // Compute the maximum normal exponent for the destination format.
9270 const unsigned DstExpMaxNormal =
9271 DstNFBehavior == fltNonfiniteBehavior::IEEE754 ? DstExpMax - 1
9272 : DstExpMax;
9273
9274 // For NanOnly formats the max exponent field for finite values
9275 // is DstExpMax, but the encoding with exp = DstExpMax and
9276 // mant = all-ones is NaN. So DstExpMaxNormal = DstExpMax, but max
9277 // mantissa at that exponent is DstMantMask - 1 (if NanEnc == AllOnes) to
9278 // avoid the NaN encoding.
9279 uint64_t DstMaxMantAtMaxExp = DstMantMask;
9280 if (DstNFBehavior == fltNonfiniteBehavior::NanOnly &&
9281 DstNanEnc == fltNanEncoding::AllOnes)
9282 DstMaxMantAtMaxExp = DstMantMask - 1;
9283
9284 // Source format parameters.
9285 EVT SrcVT = FloatVal.getValueType();
9286 const fltSemantics &SrcSem = SrcVT.getScalarType().getFltSemantics();
9287 const unsigned SrcBits = APFloat::getSizeInBits(SrcSem);
9288 const unsigned SrcPrecision = APFloat::semanticsPrecision(SrcSem);
9289 const unsigned SrcMant = SrcPrecision - 1;
9290 const uint64_t SrcMantMask = (1ULL << SrcMant) - 1;
9291
9292 // Work in the source integer type. Match the destination shape so the
9293 // expansion stays vector when ResVT is a vector.
9294 EVT IntScalarVT = EVT::getIntegerVT(*DAG.getContext(), SrcBits);
9295 EVT IntVT = ResVT.changeElementType(*DAG.getContext(), IntScalarVT);
9296 EVT SetCCVT =
9297 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), IntVT);
9298 EVT FPSetCCVT =
9299 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
9300
9301 SDValue Zero = DAG.getConstant(0, dl, IntVT);
9302 SDValue One = DAG.getConstant(1, dl, IntVT);
9303
9304 // Bitcast source float to integer to extract the sign bit.
9305 SDValue Src = DAG.getNode(ISD::BITCAST, dl, IntVT, FloatVal);
9306 SDValue SignBit =
9307 DAG.getNode(ISD::SRL, dl, IntVT, Src,
9308 DAG.getShiftAmountConstant(SrcBits - 1, IntVT, dl));
9309
9310 // Classify the input.
9311 SDValue FPZero = DAG.getConstantFP(0.0, dl, SrcVT);
9312 SDValue FPInf = DAG.getConstantFP(APFloat::getInf(SrcSem), dl, SrcVT);
9313 SDValue AbsVal = DAG.getNode(ISD::FABS, dl, SrcVT, FloatVal);
9314 SDValue IsNaN = DAG.getSetCC(dl, FPSetCCVT, FloatVal, FPZero, ISD::SETUO);
9315 SDValue IsInf = DAG.getSetCC(dl, FPSetCCVT, AbsVal, FPInf, ISD::SETOEQ);
9316 SDValue IsZero = DAG.getSetCC(dl, FPSetCCVT, FloatVal, FPZero, ISD::SETOEQ);
9317
9318 // Split into a normalized fraction and unbiased exponent. FFREXP normalizes
9319 // source denormals automatically. The result is unspecified for Inf/NaN, but
9320 // those inputs are detected above and override the final result.
9321 EVT FrexpExpScalarVT =
9323 EVT FrexpExpVT = SrcVT.changeElementType(*DAG.getContext(), FrexpExpScalarVT);
9324 SDValue Frexp =
9325 DAG.getNode(ISD::FFREXP, dl, DAG.getVTList(SrcVT, FrexpExpVT), FloatVal);
9326 SDValue FrexpFrac = Frexp.getValue(0);
9327 SDValue FrexpExp = Frexp.getValue(1);
9328
9329 SDValue FrexpFracInt = DAG.getNode(ISD::BITCAST, dl, IntVT, FrexpFrac);
9330 SDValue EffSrcMant = DAG.getNode(ISD::AND, dl, IntVT, FrexpFracInt,
9331 DAG.getConstant(SrcMantMask, dl, IntVT));
9332
9333 SDValue FrexpExpExt = DAG.getSExtOrTrunc(FrexpExp, dl, IntVT);
9334 SDValue NewExp = DAG.getNode(ISD::ADD, dl, IntVT, FrexpExpExt,
9335 DAG.getConstant(DstBias - 1, dl, IntVT));
9336
9337 // Compute rounding increment given the round bit, sticky bits, and LSB
9338 // of the truncated mantissa.
9339 auto ComputeRoundUp = [&](SDValue RoundBit, SDValue StickyBits,
9340 SDValue LSB) -> SDValue {
9341 switch (RoundMode) {
9343 // Round up if round_bit && (sticky || lsb)
9344 SDValue StickyOrLSB = DAG.getNode(ISD::OR, dl, IntVT, StickyBits, LSB);
9345 return DAG.getNode(ISD::AND, dl, IntVT, RoundBit, StickyOrLSB);
9346 }
9348 return Zero;
9350 // Round up if positive and any truncated bits are set.
9351 SDValue AnyTruncBits =
9352 DAG.getNode(ISD::OR, dl, IntVT, RoundBit, StickyBits);
9353 SDValue HasTruncBits =
9354 DAG.getSetCC(dl, SetCCVT, AnyTruncBits, Zero, ISD::SETNE);
9355 SDValue IsPositive = DAG.getSetCC(dl, SetCCVT, SignBit, Zero, ISD::SETEQ);
9356 SDValue DoRound =
9357 DAG.getNode(ISD::AND, dl, SetCCVT, HasTruncBits, IsPositive);
9358 return DAG.getNode(ISD::ZERO_EXTEND, dl, IntVT, DoRound);
9359 }
9361 // Round up if negative and any truncated bits are set (to -Inf).
9362 SDValue AnyTruncBits =
9363 DAG.getNode(ISD::OR, dl, IntVT, RoundBit, StickyBits);
9364 SDValue HasTruncBits =
9365 DAG.getSetCC(dl, SetCCVT, AnyTruncBits, Zero, ISD::SETNE);
9366 SDValue IsNegative = DAG.getSetCC(dl, SetCCVT, SignBit, Zero, ISD::SETNE);
9367 SDValue DoRound =
9368 DAG.getNode(ISD::AND, dl, SetCCVT, HasTruncBits, IsNegative);
9369 return DAG.getNode(ISD::ZERO_EXTEND, dl, IntVT, DoRound);
9370 }
9372 return RoundBit;
9373 default:
9374 llvm_unreachable("unsupported rounding mode");
9375 }
9376 };
9377
9378 // Round mantissa from SrcMant bits to DstMant bits.
9379 SDValue TruncMant;
9380 SDValue RoundUp;
9381 if (SrcMant > DstMant) {
9382 const unsigned Shift = SrcMant - DstMant;
9383 SDValue ShiftConst = DAG.getShiftAmountConstant(Shift, IntVT, dl);
9384 TruncMant = DAG.getNode(ISD::SRL, dl, IntVT, EffSrcMant, ShiftConst);
9385
9386 // Check bit at position Shift - 1 aka the round bit.
9387 SDValue RoundBit;
9388 if (Shift >= 1) {
9389 SDValue RoundBitShift = DAG.getShiftAmountConstant(Shift - 1, IntVT, dl);
9390 SDValue ShiftedMant =
9391 DAG.getNode(ISD::SRL, dl, IntVT, EffSrcMant, RoundBitShift);
9392 RoundBit = DAG.getNode(ISD::AND, dl, IntVT, ShiftedMant, One);
9393 } else {
9394 RoundBit = Zero;
9395 }
9396
9397 // OR of all bits below the round bit to get sticky bits.
9398 SDValue StickyBits;
9399 if (Shift >= 2) {
9400 uint64_t StickyMask = maskTrailingOnes<uint64_t>(Shift - 1);
9401 StickyBits = DAG.getNode(ISD::AND, dl, IntVT, EffSrcMant,
9402 DAG.getConstant(StickyMask, dl, IntVT));
9403 StickyBits = DAG.getSetCC(dl, SetCCVT, StickyBits, Zero, ISD::SETNE);
9404 StickyBits = DAG.getNode(ISD::ZERO_EXTEND, dl, IntVT, StickyBits);
9405 } else {
9406 StickyBits = Zero;
9407 }
9408
9409 // LSB of truncated mantissa.
9410 SDValue LSB = DAG.getNode(ISD::AND, dl, IntVT, TruncMant, One);
9411
9412 RoundUp = ComputeRoundUp(RoundBit, StickyBits, LSB);
9413 } else {
9414 // If DstMant >= SrcMant, then no rounding needed, just shift left.
9415 SDValue MantShift =
9416 DAG.getShiftAmountConstant(DstMant - SrcMant, IntVT, dl);
9417 TruncMant = DAG.getNode(ISD::SHL, dl, IntVT, EffSrcMant, MantShift);
9418 RoundUp = Zero;
9419 }
9420
9421 // Apply rounding.
9422 SDValue RoundedMant = DAG.getNode(ISD::ADD, dl, IntVT, TruncMant, RoundUp);
9423
9424 // Handle mantissa overflow from rounding.
9425 // If rounded_mant > DstMantMask, carry into exponent.
9426 SDValue MantOverflow =
9427 DAG.getSetCC(dl, SetCCVT, RoundedMant,
9428 DAG.getConstant(DstMantMask, dl, IntVT), ISD::SETGT);
9429 // On overflow: mant = 0, exp += 1.
9430 SDValue AdjMant = DAG.getSelect(dl, IntVT, MantOverflow, Zero, RoundedMant);
9431 SDValue AdjExp =
9432 DAG.getNode(ISD::ADD, dl, IntVT, NewExp,
9433 DAG.getNode(ISD::ZERO_EXTEND, dl, IntVT, MantOverflow));
9434
9435 // Precompute sign shifted to MSB of destination.
9436 SDValue SignShifted =
9437 DAG.getNode(ISD::SHL, dl, IntVT, SignBit,
9438 DAG.getShiftAmountConstant(DstBits - 1, IntVT, dl));
9439
9440 // Destination denormal conversion (when new_exp <= 0).
9441 // Shift the mantissa right by 1 - new_exp additional bits and set the
9442 // exponent field to 0.
9443 SDValue ExpIsNeg = DAG.getSetCC(dl, SetCCVT, AdjExp,
9444 DAG.getConstant(1, dl, IntVT), ISD::SETLT);
9445
9446 SDValue DenormResult;
9447 {
9448 // denorm_shift = 1 - NewExp.
9449 SDValue DenormShift = DAG.getNode(ISD::SUB, dl, IntVT, One, NewExp);
9450
9451 // full_src_mant = (1 << SrcMant) | EffSrcMant.
9452 SDValue ImplicitOne =
9453 DAG.getNode(ISD::SHL, dl, IntVT, One,
9454 DAG.getShiftAmountConstant(SrcMant, IntVT, dl));
9455 SDValue FullSrcMant =
9456 DAG.getNode(ISD::OR, dl, IntVT, EffSrcMant, ImplicitOne);
9457
9458 // Total right shift = DenormShift + (SrcMant - DstMant).
9459 int64_t MantDelta = static_cast<int64_t>(SrcMant) - DstMant;
9460 SDValue TotalShift =
9461 DAG.getNode(ISD::ADD, dl, IntVT, DenormShift,
9462 DAG.getSignedConstant(MantDelta, dl, IntVT));
9463
9464 // Clamp total shift to avoid UB, then truncate denorm mantissa.
9465 EVT ShiftVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
9466 SDValue MaxShift = DAG.getConstant(SrcBits - 1, dl, IntVT);
9467 SDValue ClampedShift =
9468 DAG.getNode(ISD::UMIN, dl, IntVT, TotalShift, MaxShift);
9469 SDValue DenormTruncMant =
9470 DAG.getNode(ISD::SRL, dl, IntVT, FullSrcMant,
9471 DAG.getZExtOrTrunc(ClampedShift, dl, ShiftVT));
9472
9473 // Rounding for denorm path.
9474 SDValue DenormRoundUp;
9475 {
9476 // Round bit is at position TotalShift - 1 of FullSrcMant.
9477 // Clamp to at least 1 so the subtraction doesn't underflow and create
9478 // shift nodes with invalid shift amounts.
9479 SDValue SafeShift = DAG.getNode(ISD::UMAX, dl, IntVT, ClampedShift, One);
9480 SDValue RoundBitPos = DAG.getNode(ISD::SUB, dl, IntVT, SafeShift, One);
9481 SDValue RoundBitPosAmt = DAG.getZExtOrTrunc(RoundBitPos, dl, ShiftVT);
9482 SDValue DenormRoundBit = DAG.getNode(
9483 ISD::AND, dl, IntVT,
9484 DAG.getNode(ISD::SRL, dl, IntVT, FullSrcMant, RoundBitPosAmt), One);
9485
9486 // Sticky: all bits below round bit.
9487 // sticky_mask = (1 << RoundBitPos) - 1
9488 SDValue StickyMask = DAG.getNode(
9489 ISD::SUB, dl, IntVT,
9490 DAG.getNode(ISD::SHL, dl, IntVT, One, RoundBitPosAmt), One);
9491 SDValue DenormStickyBits =
9492 DAG.getNode(ISD::AND, dl, IntVT, FullSrcMant, StickyMask);
9493 SDValue HasSticky = DAG.getNode(
9494 ISD::ZERO_EXTEND, dl, IntVT,
9495 DAG.getSetCC(dl, SetCCVT, DenormStickyBits, Zero, ISD::SETNE));
9496
9497 SDValue DenormLSB =
9498 DAG.getNode(ISD::AND, dl, IntVT, DenormTruncMant, One);
9499
9500 DenormRoundUp = ComputeRoundUp(DenormRoundBit, HasSticky, DenormLSB);
9501
9502 // Only apply rounding if TotalShift >= 1 (i.e., there are bits to round).
9503 SDValue ShiftGEOne =
9504 DAG.getSetCC(dl, SetCCVT, ClampedShift, One, ISD::SETUGE);
9505 DenormRoundUp = DAG.getSelect(dl, IntVT, ShiftGEOne, DenormRoundUp, Zero);
9506 }
9507
9508 SDValue DenormRoundedMant =
9509 DAG.getNode(ISD::ADD, dl, IntVT, DenormTruncMant, DenormRoundUp);
9510
9511 // If rounding caused overflow into the normal range, then we get the
9512 // smallest normal number.
9513 SDValue DenormMantOF =
9514 DAG.getSetCC(dl, SetCCVT, DenormRoundedMant,
9515 DAG.getConstant(DstMantMask, dl, IntVT), ISD::SETGT);
9516 SDValue DenormFinalMant =
9517 DAG.getSelect(dl, IntVT, DenormMantOF, Zero, DenormRoundedMant);
9518 SDValue DenormFinalExp = DAG.getSelect(dl, IntVT, DenormMantOF, One, Zero);
9519
9520 // Assemble: sign | (exp << DstMant) | mant
9521 SDValue DenormExpShifted =
9522 DAG.getNode(ISD::SHL, dl, IntVT, DenormFinalExp,
9523 DAG.getShiftAmountConstant(DstMant, IntVT, dl));
9524 DenormResult = DAG.getNode(
9525 ISD::OR, dl, IntVT,
9526 DAG.getNode(ISD::OR, dl, IntVT, SignShifted, DenormExpShifted),
9527 DenormFinalMant);
9528 }
9529
9530 // Exponent overflow detection.
9531 SDValue ExpOF =
9532 DAG.getSetCC(dl, SetCCVT, AdjExp,
9533 DAG.getConstant(DstExpMaxNormal, dl, IntVT), ISD::SETGT);
9534
9535 // Also check if AdjExp == DstExpMaxNormal and mantissa overflow into
9536 // a value that exceeds the max allowed mantissa at that exponent.
9537 SDValue ExpAtMax =
9538 DAG.getSetCC(dl, SetCCVT, AdjExp,
9539 DAG.getConstant(DstExpMaxNormal, dl, IntVT), ISD::SETEQ);
9540 SDValue MantExceedsMax =
9541 DAG.getSetCC(dl, SetCCVT, AdjMant,
9542 DAG.getConstant(DstMaxMantAtMaxExp, dl, IntVT), ISD::SETGT);
9543 SDValue ExpMantOF =
9544 DAG.getNode(ISD::AND, dl, SetCCVT, ExpAtMax, MantExceedsMax);
9545 SDValue IsOverflow = DAG.getNode(ISD::OR, dl, SetCCVT, ExpOF, ExpMantOF);
9546
9547 // Build overflow result.
9549
9550 if (Saturate) {
9551 // Clamp to max finite value:
9552 // sign | (DstExpMaxNormal << DstMant) | DstMaxMantAtMaxExp
9553 uint64_t MaxFinite =
9554 ((uint64_t)DstExpMaxNormal << DstMant) | DstMaxMantAtMaxExp;
9555 OverflowResult = DAG.getNode(ISD::OR, dl, IntVT, SignShifted,
9556 DAG.getConstant(MaxFinite, dl, IntVT));
9557 } else if (DstNFBehavior == fltNonfiniteBehavior::IEEE754) {
9558 // Produce infinity.
9559 uint64_t InfBits = (uint64_t)DstExpMax << DstMant;
9560 OverflowResult = DAG.getNode(ISD::OR, dl, IntVT, SignShifted,
9561 DAG.getConstant(InfBits, dl, IntVT));
9562 } else {
9563 // Emit poison if no Inf in format and not saturating.
9564 OverflowResult = DAG.getPOISON(IntVT);
9565 }
9566
9567 // Assemble normal result: sign | (AdjExp << DstMant) | AdjMant
9568 SDValue NormExpShifted =
9569 DAG.getNode(ISD::SHL, dl, IntVT, AdjExp,
9570 DAG.getShiftAmountConstant(DstMant, IntVT, dl));
9571 SDValue NormResult = DAG.getNode(
9572 ISD::OR, dl, IntVT,
9573 DAG.getNode(ISD::OR, dl, IntVT, SignShifted, NormExpShifted), AdjMant);
9574
9575 // Build special-value results.
9576 SDValue NaNResult;
9577 if (DstNFBehavior == fltNonfiniteBehavior::IEEE754) {
9578 // Produce canonical NaN.
9579 const uint64_t QNaNBit = (DstMant > 0) ? (1ULL << (DstMant - 1)) : 0;
9580 NaNResult =
9581 DAG.getConstant(((uint64_t)DstExpMax << DstMant) | QNaNBit, dl, IntVT);
9582 } else if (DstNFBehavior == fltNonfiniteBehavior::NanOnly &&
9583 DstNanEnc == fltNanEncoding::AllOnes) {
9584 // E4M3FN-style: NaN is exp=all-ones, mant=all-ones.
9585 NaNResult = DAG.getConstant(((uint64_t)DstExpMax << DstMant) | DstMantMask,
9586 dl, IntVT);
9587 } else {
9588 // NaN -> poison for finite only values.
9589 NaNResult = DAG.getPOISON(IntVT);
9590 }
9591
9592 // Inf handling.
9593 SDValue InfResult;
9594 if (DstNFBehavior == fltNonfiniteBehavior::IEEE754) {
9595 // Produce signed infinity.
9596 uint64_t InfBits = (uint64_t)DstExpMax << DstMant;
9597 InfResult = DAG.getNode(ISD::OR, dl, IntVT, SignShifted,
9598 DAG.getConstant(InfBits, dl, IntVT));
9599 } else if (Saturate) {
9600 // Inf saturates to max finite.
9601 uint64_t MaxFinite =
9602 ((uint64_t)DstExpMaxNormal << DstMant) | DstMaxMantAtMaxExp;
9603 InfResult = DAG.getNode(ISD::OR, dl, IntVT, SignShifted,
9604 DAG.getConstant(MaxFinite, dl, IntVT));
9605 } else {
9606 // No Inf and not saturating -> poison.
9607 InfResult = DAG.getPOISON(IntVT);
9608 }
9609
9610 SDValue ZeroResult = SignShifted;
9611
9612 // Final selection in an order: NaN takes priority, then Inf, then Zero.
9613 SDValue FiniteResult =
9614 DAG.getSelect(dl, IntVT, ExpIsNeg, DenormResult, NormResult);
9615 FiniteResult =
9616 DAG.getSelect(dl, IntVT, IsOverflow, OverflowResult, FiniteResult);
9617
9618 SDValue Result = FiniteResult;
9619 Result = DAG.getSelect(dl, IntVT, IsZero, ZeroResult, Result);
9620 Result = DAG.getSelect(dl, IntVT, IsInf, InfResult, Result);
9621 Result = DAG.getSelect(dl, IntVT, IsNaN, NaNResult, Result);
9622
9623 // Truncate to destination integer type.
9624 return DAG.getZExtOrTrunc(Result, dl, ResVT);
9625}
9626
9627SDValue
9629 SelectionDAG &DAG) const {
9630 SDLoc dl(Node);
9631 EVT DstVT = Node->getValueType(0);
9632 EVT DstScalarVT = DstVT.getScalarType();
9633
9634 SDValue IntVal = Node->getOperand(0);
9635 const uint64_t SemEnum = Node->getConstantOperandVal(1);
9636 const auto Sem = static_cast<APFloatBase::Semantics>(SemEnum);
9637
9638 // Supported source formats.
9639 switch (Sem) {
9645 break;
9646 default:
9647 DAG.getContext()->emitError("CONVERT_FROM_ARBITRARY_FP: not implemented "
9648 "source format (semantics enum " +
9649 Twine(SemEnum) + ")");
9650 return SDValue();
9651 }
9652
9653 const fltSemantics &SrcSem = APFloatBase::EnumToSemantics(Sem);
9654 const unsigned SrcBits = APFloat::getSizeInBits(SrcSem);
9655 const unsigned SrcPrecision = APFloat::semanticsPrecision(SrcSem);
9656 const unsigned SrcMant = SrcPrecision - 1;
9657 const unsigned SrcExp = SrcBits - SrcMant - 1;
9658 const int SrcBias = 1 - APFloat::semanticsMinExponent(SrcSem);
9659 const fltNonfiniteBehavior NFBehavior = SrcSem.nonFiniteBehavior;
9660
9661 // Destination format parameters.
9662 const fltSemantics &DstSem = DstScalarVT.getFltSemantics();
9663 const unsigned DstBits = APFloat::getSizeInBits(DstSem);
9664 const unsigned DstMant = APFloat::semanticsPrecision(DstSem) - 1;
9665 const unsigned DstExpBits = DstBits - DstMant - 1;
9666 const int DstMinExp = APFloat::semanticsMinExponent(DstSem);
9667 const int DstBias = 1 - DstMinExp;
9668 const uint64_t DstExpAllOnes = (1ULL << DstExpBits) - 1;
9669
9670 // Work in an integer type matching the destination float width.
9671 EVT IntScalarVT = EVT::getIntegerVT(*DAG.getContext(), DstBits);
9672 EVT IntVT = DstVT.isVector()
9673 ? EVT::getVectorVT(*DAG.getContext(), IntScalarVT,
9674 DstVT.getVectorElementCount())
9675 : IntScalarVT;
9676
9677 SDValue Src = DAG.getZExtOrTrunc(IntVal, dl, IntVT);
9678
9679 EVT SetCCVT =
9680 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), IntVT);
9681
9682 SDValue Zero = DAG.getConstant(0, dl, IntVT);
9683 SDValue One = DAG.getConstant(1, dl, IntVT);
9684
9685 // Extract bit fields.
9686 const uint64_t MantMask = (SrcMant > 0) ? ((1ULL << SrcMant) - 1) : 0;
9687 const uint64_t ExpMask = (1ULL << SrcExp) - 1;
9688
9689 SDValue MantField = DAG.getNode(ISD::AND, dl, IntVT, Src,
9690 DAG.getConstant(MantMask, dl, IntVT));
9691
9692 SDValue ExpField =
9693 DAG.getNode(ISD::AND, dl, IntVT,
9694 DAG.getNode(ISD::SRL, dl, IntVT, Src,
9695 DAG.getShiftAmountConstant(SrcMant, IntVT, dl)),
9696 DAG.getConstant(ExpMask, dl, IntVT));
9697
9698 SDValue SignBit =
9699 DAG.getNode(ISD::SRL, dl, IntVT, Src,
9700 DAG.getShiftAmountConstant(SrcBits - 1, IntVT, dl));
9701
9702 SDValue SignShifted =
9703 DAG.getNode(ISD::SHL, dl, IntVT, SignBit,
9704 DAG.getShiftAmountConstant(DstBits - 1, IntVT, dl));
9705
9706 // Classify the input.
9707 SDValue ExpAllOnes = DAG.getConstant(ExpMask, dl, IntVT);
9708 SDValue IsExpAllOnes =
9709 DAG.getSetCC(dl, SetCCVT, ExpField, ExpAllOnes, ISD::SETEQ);
9710 SDValue IsExpZero = DAG.getSetCC(dl, SetCCVT, ExpField, Zero, ISD::SETEQ);
9711 SDValue IsMantZero = DAG.getSetCC(dl, SetCCVT, MantField, Zero, ISD::SETEQ);
9712 SDValue IsMantNonZero =
9713 DAG.getSetCC(dl, SetCCVT, MantField, Zero, ISD::SETNE);
9714
9715 SDValue IsNaN;
9716 if (NFBehavior == fltNonfiniteBehavior::FiniteOnly) {
9717 IsNaN = DAG.getBoolConstant(false, dl, SetCCVT, IntVT);
9718 } else if (NFBehavior == fltNonfiniteBehavior::IEEE754) {
9719 IsNaN = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpAllOnes, IsMantNonZero);
9720 } else {
9722 SDValue MantAllOnes = DAG.getConstant(MantMask, dl, IntVT);
9723 SDValue IsMantAllOnes =
9724 DAG.getSetCC(dl, SetCCVT, MantField, MantAllOnes, ISD::SETEQ);
9725 IsNaN = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpAllOnes, IsMantAllOnes);
9726 }
9727
9728 SDValue IsInf;
9729 if (NFBehavior == fltNonfiniteBehavior::IEEE754)
9730 IsInf = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpAllOnes, IsMantZero);
9731 else
9732 IsInf = DAG.getBoolConstant(false, dl, SetCCVT, IntVT);
9733
9734 SDValue IsZero = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpZero, IsMantZero);
9735 SDValue IsDenorm =
9736 DAG.getNode(ISD::AND, dl, SetCCVT, IsExpZero, IsMantNonZero);
9737
9738 // Normal value conversion.
9739 const int BiasAdjust = DstBias - SrcBias;
9740 SDValue NormDstExp =
9741 DAG.getNode(ISD::ADD, dl, IntVT, ExpField,
9742 DAG.getConstant(APInt(DstBits, BiasAdjust, true), dl, IntVT));
9743
9744 SDValue NormDstMant;
9745 if (DstMant > SrcMant) {
9746 SDValue NormDstMantShift =
9747 DAG.getShiftAmountConstant(DstMant - SrcMant, IntVT, dl);
9748 NormDstMant = DAG.getNode(ISD::SHL, dl, IntVT, MantField, NormDstMantShift);
9749 } else {
9750 NormDstMant = MantField;
9751 }
9752
9753 SDValue DstMantShift = DAG.getShiftAmountConstant(DstMant, IntVT, dl);
9754 SDValue NormExpShifted =
9755 DAG.getNode(ISD::SHL, dl, IntVT, NormDstExp, DstMantShift);
9756 SDValue NormResult =
9757 DAG.getNode(ISD::OR, dl, IntVT,
9758 DAG.getNode(ISD::OR, dl, IntVT, SignShifted, NormExpShifted),
9759 NormDstMant);
9760
9761 // Denormal value conversion.
9762 SDValue DenormResult;
9763 {
9764 const unsigned IntVTBits = DstBits;
9765 SDValue LeadingZeros =
9766 DAG.getNode(ISD::CTLZ_ZERO_POISON, dl, IntVT, MantField);
9767
9768 const int DenormExpConst =
9769 (int)IntVTBits + DstBias - SrcBias - (int)SrcMant;
9770 SDValue DenormDstExp = DAG.getNode(
9771 ISD::SUB, dl, IntVT,
9772 DAG.getConstant(APInt(DstBits, DenormExpConst, true), dl, IntVT),
9773 LeadingZeros);
9774
9775 SDValue MantMSB =
9776 DAG.getNode(ISD::SUB, dl, IntVT,
9777 DAG.getConstant(IntVTBits - 1, dl, IntVT), LeadingZeros);
9778
9779 SDValue LeadingOne = DAG.getNode(ISD::SHL, dl, IntVT, One, MantMSB);
9780 SDValue Frac = DAG.getNode(ISD::XOR, dl, IntVT, MantField, LeadingOne);
9781
9782 const unsigned ShiftSub = IntVTBits - 1 - DstMant;
9783 SDValue ShiftAmount = DAG.getNode(ISD::SUB, dl, IntVT, LeadingZeros,
9784 DAG.getConstant(ShiftSub, dl, IntVT));
9785
9786 SDValue DenormDstMant = DAG.getNode(ISD::SHL, dl, IntVT, Frac, ShiftAmount);
9787
9788 SDValue DenormExpShifted =
9789 DAG.getNode(ISD::SHL, dl, IntVT, DenormDstExp, DstMantShift);
9790 DenormResult = DAG.getNode(
9791 ISD::OR, dl, IntVT,
9792 DAG.getNode(ISD::OR, dl, IntVT, SignShifted, DenormExpShifted),
9793 DenormDstMant);
9794 }
9795
9796 SDValue FiniteResult =
9797 DAG.getSelect(dl, IntVT, IsDenorm, DenormResult, NormResult);
9798
9799 const uint64_t QNaNBit = (DstMant > 0) ? (1ULL << (DstMant - 1)) : 0;
9800 SDValue NaNResult =
9801 DAG.getConstant((DstExpAllOnes << DstMant) | QNaNBit, dl, IntVT);
9802
9803 SDValue InfResult =
9804 DAG.getNode(ISD::OR, dl, IntVT, SignShifted,
9805 DAG.getConstant(DstExpAllOnes << DstMant, dl, IntVT));
9806
9807 SDValue ZeroResult = SignShifted;
9808
9809 SDValue Result = FiniteResult;
9810 Result = DAG.getSelect(dl, IntVT, IsZero, ZeroResult, Result);
9811 Result = DAG.getSelect(dl, IntVT, IsInf, InfResult, Result);
9812 Result = DAG.getSelect(dl, IntVT, IsNaN, NaNResult, Result);
9813
9814 return DAG.getNode(ISD::BITCAST, dl, DstVT, Result);
9815}
9816
9818 SelectionDAG &DAG) const {
9819 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
9820 SDValue Src = Node->getOperand(OpNo);
9821 EVT SrcVT = Src.getValueType();
9822 EVT DstVT = Node->getValueType(0);
9823 SDLoc dl(SDValue(Node, 0));
9824
9825 // FIXME: Only f32 to i64 conversions are supported.
9826 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
9827 return false;
9828
9829 if (Node->isStrictFPOpcode())
9830 // When a NaN is converted to an integer a trap is allowed. We can't
9831 // use this expansion here because it would eliminate that trap. Other
9832 // traps are also allowed and cannot be eliminated. See
9833 // IEEE 754-2008 sec 5.8.
9834 return false;
9835
9836 // Expand f32 -> i64 conversion
9837 // This algorithm comes from compiler-rt's implementation of fixsfdi:
9838 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
9839 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
9840 EVT IntVT = SrcVT.changeTypeToInteger();
9841 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
9842
9843 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
9844 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
9845 SDValue Bias = DAG.getConstant(127, dl, IntVT);
9846 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
9847 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
9848 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
9849
9850 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
9851
9852 SDValue ExponentBits = DAG.getNode(
9853 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
9854 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
9855 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
9856
9857 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
9858 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
9859 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
9860 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
9861
9862 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
9863 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
9864 DAG.getConstant(0x00800000, dl, IntVT));
9865
9866 R = DAG.getZExtOrTrunc(R, dl, DstVT);
9867
9868 R = DAG.getSelectCC(
9869 dl, Exponent, ExponentLoBit,
9870 DAG.getNode(ISD::SHL, dl, DstVT, R,
9871 DAG.getZExtOrTrunc(
9872 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
9873 dl, IntShVT)),
9874 DAG.getNode(ISD::SRL, dl, DstVT, R,
9875 DAG.getZExtOrTrunc(
9876 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
9877 dl, IntShVT)),
9878 ISD::SETGT);
9879
9880 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
9881 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
9882
9883 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
9884 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
9885 return true;
9886}
9887
9889 SDValue &Chain,
9890 SelectionDAG &DAG) const {
9891 SDLoc dl(SDValue(Node, 0));
9892 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
9893 SDValue Src = Node->getOperand(OpNo);
9894
9895 EVT SrcVT = Src.getValueType();
9896 EVT DstVT = Node->getValueType(0);
9897 EVT SetCCVT =
9898 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
9899 EVT DstSetCCVT =
9900 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
9901
9902 // Only expand vector types if we have the appropriate vector bit operations.
9903 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
9905 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
9907 return false;
9908
9909 // If the maximum float value is smaller then the signed integer range,
9910 // the destination signmask can't be represented by the float, so we can
9911 // just use FP_TO_SINT directly.
9912 const fltSemantics &APFSem = SrcVT.getFltSemantics();
9913 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
9914 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
9916 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
9917 if (Node->isStrictFPOpcode()) {
9918 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
9919 { Node->getOperand(0), Src });
9920 Chain = Result.getValue(1);
9921 } else
9922 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
9923 return true;
9924 }
9925
9926 // Don't expand it if there isn't cheap fsub instruction.
9928 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
9929 return false;
9930
9931 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
9932 SDValue Sel;
9933
9934 if (Node->isStrictFPOpcode()) {
9935 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
9936 Node->getOperand(0), /*IsSignaling*/ true);
9937 Chain = Sel.getValue(1);
9938 } else {
9939 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
9940 }
9941
9942 bool Strict = Node->isStrictFPOpcode() ||
9943 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
9944
9945 if (Strict) {
9946 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
9947 // signmask then offset (the result of which should be fully representable).
9948 // Sel = Src < 0x8000000000000000
9949 // FltOfs = select Sel, 0, 0x8000000000000000
9950 // IntOfs = select Sel, 0, 0x8000000000000000
9951 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
9952
9953 // TODO: Should any fast-math-flags be set for the FSUB?
9954 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
9955 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
9956 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
9957 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
9958 DAG.getConstant(0, dl, DstVT),
9959 DAG.getConstant(SignMask, dl, DstVT));
9960 SDValue SInt;
9961 if (Node->isStrictFPOpcode()) {
9962 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
9963 { Chain, Src, FltOfs });
9964 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
9965 { Val.getValue(1), Val });
9966 Chain = SInt.getValue(1);
9967 } else {
9968 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
9969 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
9970 }
9971 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
9972 } else {
9973 // Expand based on maximum range of FP_TO_SINT:
9974 // True = fp_to_sint(Src)
9975 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
9976 // Result = select (Src < 0x8000000000000000), True, False
9977
9978 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
9979 // TODO: Should any fast-math-flags be set for the FSUB?
9980 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
9981 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
9982 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
9983 DAG.getConstant(SignMask, dl, DstVT));
9984 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
9985 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
9986 }
9987 return true;
9988}
9989
9991 SDValue &Chain, SelectionDAG &DAG) const {
9992 // This transform is not correct for converting 0 when rounding mode is set
9993 // to round toward negative infinity which will produce -0.0. So disable
9994 // under strictfp.
9995 if (Node->isStrictFPOpcode())
9996 return false;
9997
9998 SDValue Src = Node->getOperand(0);
9999 EVT SrcVT = Src.getValueType();
10000 EVT DstVT = Node->getValueType(0);
10001
10002 // If the input is known to be non-negative and SINT_TO_FP is legal then use
10003 // it.
10004 if (Node->getFlags().hasNonNeg() &&
10006 Result =
10007 DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
10008 return true;
10009 }
10010
10011 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
10012 return false;
10013
10014 // Only expand vector types if we have the appropriate vector bit
10015 // operations.
10016 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
10021 return false;
10022
10023 SDLoc dl(SDValue(Node, 0));
10024
10025 // Implementation of unsigned i64 to f64 following the algorithm in
10026 // __floatundidf in compiler_rt. This implementation performs rounding
10027 // correctly in all rounding modes with the exception of converting 0
10028 // when rounding toward negative infinity. In that case the fsub will
10029 // produce -0.0. This will be added to +0.0 and produce -0.0 which is
10030 // incorrect.
10031 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
10032 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
10033 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
10034 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
10035 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
10036 SDValue HiShift = DAG.getShiftAmountConstant(32, SrcVT, dl);
10037
10038 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
10039 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
10040 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
10041 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
10042 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
10043 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
10044 SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
10045 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
10046 return true;
10047}
10048
10049SDValue
10051 SelectionDAG &DAG) const {
10052 unsigned Opcode = Node->getOpcode();
10053 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
10054 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
10055 "Wrong opcode");
10056
10057 if (Node->getFlags().hasNoNaNs()) {
10058 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
10059 EVT VT = Node->getValueType(0);
10060 if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
10062 VT.isVector())
10063 return SDValue();
10064 SDValue Op1 = Node->getOperand(0);
10065 SDValue Op2 = Node->getOperand(1);
10066 return DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred,
10067 Node->getFlags());
10068 }
10069
10070 return SDValue();
10071}
10072
10074 SelectionDAG &DAG) const {
10075 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
10076 return Expanded;
10077
10078 EVT VT = Node->getValueType(0);
10079 if (VT.isScalableVector())
10081 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
10082
10083 SDLoc dl(Node);
10084 unsigned NewOp =
10086
10087 if (isOperationLegalOrCustom(NewOp, VT)) {
10088 SDValue Quiet0 = Node->getOperand(0);
10089 SDValue Quiet1 = Node->getOperand(1);
10090
10091 if (!Node->getFlags().hasNoNaNs()) {
10092 // Insert canonicalizes if it's possible we need to quiet to get correct
10093 // sNaN behavior.
10094 if (!DAG.isKnownNeverSNaN(Quiet0)) {
10095 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
10096 Node->getFlags());
10097 }
10098 if (!DAG.isKnownNeverSNaN(Quiet1)) {
10099 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
10100 Node->getFlags());
10101 }
10102 }
10103
10104 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
10105 }
10106
10107 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
10108 // instead if there are no NaNs.
10109 if (Node->getFlags().hasNoNaNs() ||
10110 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
10111 DAG.isKnownNeverNaN(Node->getOperand(1)))) {
10112 unsigned IEEE2018Op =
10113 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
10114 if (isOperationLegalOrCustom(IEEE2018Op, VT))
10115 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
10116 Node->getOperand(1), Node->getFlags());
10117 }
10118
10120 return SelCC;
10121
10122 return SDValue();
10123}
10124
10126 SelectionDAG &DAG) const {
10127 if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
10128 return Expanded;
10129
10130 SDLoc DL(N);
10131 SDValue LHS = N->getOperand(0);
10132 SDValue RHS = N->getOperand(1);
10133 unsigned Opc = N->getOpcode();
10134 EVT VT = N->getValueType(0);
10135 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10136 bool IsMax = Opc == ISD::FMAXIMUM;
10137 SDNodeFlags Flags = N->getFlags();
10138
10139 // First, implement comparison not propagating NaN. If no native fmin or fmax
10140 // available, use plain select with setcc instead.
10142 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
10143 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
10144
10145 // FIXME: We should probably define fminnum/fmaxnum variants with correct
10146 // signed zero behavior.
10147 bool MinMaxMustRespectOrderedZero = false;
10148
10149 if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
10150 MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
10151 MinMaxMustRespectOrderedZero = true;
10152 } else if (isOperationLegalOrCustom(CompOpc, VT)) {
10153 MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
10154 } else {
10156 return DAG.UnrollVectorOp(N);
10157
10158 // NaN (if exists) will be propagated later, so orderness doesn't matter.
10159 SDValue Compare =
10160 DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
10161 MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
10162 }
10163
10164 // Propagate any NaN of both operands
10165 if (!N->getFlags().hasNoNaNs() &&
10166 (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
10167 ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
10169 MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
10170 DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
10171 }
10172
10173 // fminimum/fmaximum requires -0.0 less than +0.0
10174 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
10175 !DAG.isKnownNeverLogicalZero(RHS) && !DAG.isKnownNeverLogicalZero(LHS)) {
10176 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
10177 DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
10178 SDValue TestZero =
10179 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
10180 SDValue LCmp = DAG.getSelect(
10181 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
10182 MinMax, Flags);
10183 SDValue RCmp = DAG.getSelect(
10184 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
10185 LCmp, Flags);
10186 MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
10187 }
10188
10189 return MinMax;
10190}
10191
10193 SelectionDAG &DAG) const {
10194 SDLoc DL(Node);
10195 SDValue LHS = Node->getOperand(0);
10196 SDValue RHS = Node->getOperand(1);
10197 unsigned Opc = Node->getOpcode();
10198 EVT VT = Node->getValueType(0);
10199 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10200 bool IsMax = Opc == ISD::FMAXIMUMNUM;
10201 SDNodeFlags Flags = Node->getFlags();
10202
10203 unsigned NewOp =
10205
10206 if (isOperationLegalOrCustom(NewOp, VT)) {
10207 if (!Flags.hasNoNaNs()) {
10208 // Insert canonicalizes if it's possible we need to quiet to get correct
10209 // sNaN behavior.
10210 if (!DAG.isKnownNeverSNaN(LHS)) {
10211 LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
10212 }
10213 if (!DAG.isKnownNeverSNaN(RHS)) {
10214 RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
10215 }
10216 }
10217
10218 return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
10219 }
10220
10221 // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
10222 // same behaviors for all of other cases: +0.0 vs -0.0 included.
10223 if (Flags.hasNoNaNs() ||
10224 (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
10225 unsigned IEEE2019Op =
10227 if (isOperationLegalOrCustom(IEEE2019Op, VT))
10228 return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
10229 }
10230
10231 // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
10232 // either one for +0.0 vs -0.0.
10233 if ((Flags.hasNoNaNs() ||
10234 (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
10235 (Flags.hasNoSignedZeros() || DAG.isKnownNeverLogicalZero(LHS) ||
10236 DAG.isKnownNeverLogicalZero(RHS))) {
10237 unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
10238 if (isOperationLegalOrCustom(IEEE2008Op, VT))
10239 return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
10240 }
10241
10242 if (VT.isVector() &&
10245 return DAG.UnrollVectorOp(Node);
10246
10247 // If only one operand is NaN, override it with another operand.
10248 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
10249 LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
10250 }
10251 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
10252 RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
10253 }
10254
10255 // Always prefer RHS if equal.
10256 SDValue MinMax =
10257 DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
10258
10259 // TODO: We need quiet sNaN if strictfp.
10260
10261 // Fixup signed zero behavior.
10262 if (Flags.hasNoSignedZeros() || DAG.isKnownNeverLogicalZero(LHS) ||
10263 DAG.isKnownNeverLogicalZero(RHS)) {
10264 return MinMax;
10265 }
10266 SDValue TestZero =
10267 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
10268 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
10269 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
10270 EVT IntVT = VT.changeTypeToInteger();
10271 EVT FloatVT = VT.changeElementType(*DAG.getContext(), MVT::f32);
10272 SDValue LHSTrunc = LHS;
10274 LHSTrunc = DAG.getNode(ISD::FP_ROUND, DL, FloatVT, LHS,
10275 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
10276 }
10277 // It's OK to select from LHS and MinMax, with only one ISD::IS_FPCLASS, as
10278 // we preferred RHS when generate MinMax, if the operands are equal.
10279 SDValue RetZero = DAG.getSelect(
10280 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHSTrunc, TestZero), LHS,
10281 MinMax, Flags);
10282 return DAG.getSelect(DL, VT, IsZero, RetZero, MinMax, Flags);
10283}
10284
10285/// Returns a true value if if this FPClassTest can be performed with an ordered
10286/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
10287/// std::nullopt if it cannot be performed as a compare with 0.
10288static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
10289 const fltSemantics &Semantics,
10290 const MachineFunction &MF) {
10291 FPClassTest OrderedMask = Test & ~fcNan;
10292 FPClassTest NanTest = Test & fcNan;
10293 bool IsOrdered = NanTest == fcNone;
10294 bool IsUnordered = NanTest == fcNan;
10295
10296 // Skip cases that are testing for only a qnan or snan.
10297 if (!IsOrdered && !IsUnordered)
10298 return std::nullopt;
10299
10300 if (OrderedMask == fcZero &&
10301 MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
10302 return IsOrdered;
10303 if (OrderedMask == (fcZero | fcSubnormal) &&
10304 MF.getDenormalMode(Semantics).inputsAreZero())
10305 return IsOrdered;
10306 return std::nullopt;
10307}
10308
10310 const FPClassTest OrigTestMask,
10311 SDNodeFlags Flags, const SDLoc &DL,
10312 SelectionDAG &DAG) const {
10313 EVT OperandVT = Op.getValueType();
10314 assert(OperandVT.isFloatingPoint());
10315 FPClassTest Test = OrigTestMask;
10316
10317 // Degenerated cases.
10318 if (Test == fcNone)
10319 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
10320 if (Test == fcAllFlags)
10321 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
10322
10323 // PPC double double is a pair of doubles, of which the higher part determines
10324 // the value class.
10325 if (OperandVT == MVT::ppcf128) {
10326 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
10327 DAG.getConstant(1, DL, MVT::i32));
10328 OperandVT = MVT::f64;
10329 }
10330
10331 // Floating-point type properties.
10332 EVT ScalarFloatVT = OperandVT.getScalarType();
10333 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
10334 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
10335 bool IsF80 = (ScalarFloatVT == MVT::f80);
10336
10337 // Some checks can be implemented using float comparisons, if floating point
10338 // exceptions are ignored.
10339 if (Flags.hasNoFPExcept() &&
10341 FPClassTest FPTestMask = Test;
10342 bool IsInvertedFP = false;
10343
10344 if (FPClassTest InvertedFPCheck =
10345 invertFPClassTestIfSimpler(FPTestMask, true)) {
10346 FPTestMask = InvertedFPCheck;
10347 IsInvertedFP = true;
10348 }
10349
10350 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
10351 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
10352
10353 // See if we can fold an | fcNan into an unordered compare.
10354 FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
10355
10356 // Can't fold the ordered check if we're only testing for snan or qnan
10357 // individually.
10358 if ((FPTestMask & fcNan) != fcNan)
10359 OrderedFPTestMask = FPTestMask;
10360
10361 const bool IsOrdered = FPTestMask == OrderedFPTestMask;
10362
10363 if (std::optional<bool> IsCmp0 =
10364 isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
10365 IsCmp0 && (isCondCodeLegalOrCustom(
10366 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
10367 OperandVT.getScalarType().getSimpleVT()))) {
10368
10369 // If denormals could be implicitly treated as 0, this is not equivalent
10370 // to a compare with 0 since it will also be true for denormals.
10371 return DAG.getSetCC(DL, ResultVT, Op,
10372 DAG.getConstantFP(0.0, DL, OperandVT),
10373 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
10374 }
10375
10376 if (FPTestMask == fcNan &&
10378 OperandVT.getScalarType().getSimpleVT()))
10379 return DAG.getSetCC(DL, ResultVT, Op, Op,
10380 IsInvertedFP ? ISD::SETO : ISD::SETUO);
10381
10382 bool IsOrderedInf = FPTestMask == fcInf;
10383 if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
10384 isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
10385 : UnorderedCmpOpcode,
10386 OperandVT.getScalarType().getSimpleVT()) &&
10389 (OperandVT.isVector() &&
10391 // isinf(x) --> fabs(x) == inf
10392 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
10393 SDValue Inf =
10394 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
10395 return DAG.getSetCC(DL, ResultVT, Abs, Inf,
10396 IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
10397 }
10398
10399 if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
10400 isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
10401 : UnorderedCmpOpcode,
10402 OperandVT.getSimpleVT())) {
10403 // isposinf(x) --> x == inf
10404 // isneginf(x) --> x == -inf
10405 // isposinf(x) || nan --> x u== inf
10406 // isneginf(x) || nan --> x u== -inf
10407
10408 SDValue Inf = DAG.getConstantFP(
10409 APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
10410 OperandVT);
10411 return DAG.getSetCC(DL, ResultVT, Op, Inf,
10412 IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
10413 }
10414
10415 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
10416 // TODO: Could handle ordered case, but it produces worse code for
10417 // x86. Maybe handle ordered if fabs is free?
10418
10419 ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
10420 ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
10421
10422 if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
10423 OperandVT.getScalarType().getSimpleVT())) {
10424 // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
10425
10426 // TODO: Maybe only makes sense if fabs is free. Integer test of
10427 // exponent bits seems better for x86.
10428 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
10429 SDValue SmallestNormal = DAG.getConstantFP(
10430 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
10431 return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
10432 IsOrdered ? OrderedOp : UnorderedOp);
10433 }
10434 }
10435
10436 if (FPTestMask == fcNormal) {
10437 // TODO: Handle unordered
10438 ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
10439 ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
10440
10441 if (isCondCodeLegalOrCustom(IsFiniteOp,
10442 OperandVT.getScalarType().getSimpleVT()) &&
10443 isCondCodeLegalOrCustom(IsNormalOp,
10444 OperandVT.getScalarType().getSimpleVT()) &&
10445 isFAbsFree(OperandVT)) {
10446 // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
10447 SDValue Inf =
10448 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
10449 SDValue SmallestNormal = DAG.getConstantFP(
10450 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
10451
10452 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
10453 SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
10454 SDValue IsNormal =
10455 DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
10456 unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
10457 return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
10458 }
10459 }
10460 }
10461
10462 // Some checks may be represented as inversion of simpler check, for example
10463 // "inf|normal|subnormal|zero" => !"nan".
10464 bool IsInverted = false;
10465
10466 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
10467 Test = InvertedCheck;
10468 IsInverted = true;
10469 }
10470
10471 // In the general case use integer operations.
10472 unsigned BitSize = OperandVT.getScalarSizeInBits();
10473 EVT IntVT = OperandVT.changeElementType(
10474 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), BitSize));
10475 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
10476
10477 // Various masks.
10478 APInt SignBit = APInt::getSignMask(BitSize);
10479 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
10480 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
10481 const unsigned ExplicitIntBitInF80 = 63;
10482 APInt ExpMask = Inf;
10483 if (IsF80)
10484 ExpMask.clearBit(ExplicitIntBitInF80);
10485 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
10486 APInt QNaNBitMask =
10487 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
10488 APInt InversionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
10489
10490 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
10491 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
10492 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
10493 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
10494 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
10495 SDValue ResultInversionMask = DAG.getConstant(InversionMask, DL, ResultVT);
10496
10497 SDValue Res;
10498 const auto appendResult = [&](SDValue PartialRes) {
10499 if (PartialRes) {
10500 if (Res)
10501 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
10502 else
10503 Res = PartialRes;
10504 }
10505 };
10506
10507 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
10508 const auto getIntBitIsSet = [&]() -> SDValue {
10509 if (!IntBitIsSetV) {
10510 APInt IntBitMask(BitSize, 0);
10511 IntBitMask.setBit(ExplicitIntBitInF80);
10512 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
10513 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
10514 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
10515 }
10516 return IntBitIsSetV;
10517 };
10518
10519 // Split the value into sign bit and absolute value.
10520 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
10521 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
10522 DAG.getConstant(0, DL, IntVT), ISD::SETLT);
10523
10524 // Tests that involve more than one class should be processed first.
10525 SDValue PartialRes;
10526
10527 if (IsF80)
10528 ; // Detect finite numbers of f80 by checking individual classes because
10529 // they have different settings of the explicit integer bit.
10530 else if ((Test & fcFinite) == fcFinite) {
10531 // finite(V) ==> (a << 1) < (inf << 1)
10532 //
10533 // See https://github.com/llvm/llvm-project/issues/169270, this is slightly
10534 // shorter than the `finite(V) ==> abs(V) < exp_mask` formula used before.
10535
10537 "finite check requires IEEE-like FP");
10538
10539 SDValue One = DAG.getShiftAmountConstant(1, IntVT, DL);
10540 SDValue TwiceOp = DAG.getNode(ISD::SHL, DL, IntVT, OpAsInt, One);
10541 SDValue TwiceInf = DAG.getNode(ISD::SHL, DL, IntVT, ExpMaskV, One);
10542
10543 PartialRes = DAG.getSetCC(DL, ResultVT, TwiceOp, TwiceInf, ISD::SETULT);
10544 Test &= ~fcFinite;
10545 } else if ((Test & fcFinite) == fcPosFinite) {
10546 // finite(V) && V > 0 ==> V < exp_mask
10547 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
10548 Test &= ~fcPosFinite;
10549 } else if ((Test & fcFinite) == fcNegFinite) {
10550 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
10551 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
10552 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
10553 Test &= ~fcNegFinite;
10554 }
10555 appendResult(PartialRes);
10556
10557 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
10558 // fcZero | fcSubnormal => test all exponent bits are 0
10559 // TODO: Handle sign bit specific cases
10560 if (PartialCheck == (fcZero | fcSubnormal)) {
10561 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
10562 SDValue ExpIsZero =
10563 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
10564 appendResult(ExpIsZero);
10565 Test &= ~PartialCheck & fcAllFlags;
10566 }
10567 }
10568
10569 // Check for individual classes.
10570
10571 if (unsigned PartialCheck = Test & fcZero) {
10572 if (PartialCheck == fcPosZero)
10573 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
10574 else if (PartialCheck == fcZero)
10575 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
10576 else // ISD::fcNegZero
10577 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
10578 appendResult(PartialRes);
10579 }
10580
10581 if (unsigned PartialCheck = Test & fcSubnormal) {
10582 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
10583 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
10584 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
10585 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
10586 SDValue VMinusOneV =
10587 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
10588 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
10589 if (PartialCheck == fcNegSubnormal)
10590 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
10591 appendResult(PartialRes);
10592 }
10593
10594 if (unsigned PartialCheck = Test & fcInf) {
10595 if (PartialCheck == fcPosInf)
10596 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
10597 else if (PartialCheck == fcInf)
10598 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
10599 else { // ISD::fcNegInf
10600 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
10601 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
10602 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
10603 }
10604 appendResult(PartialRes);
10605 }
10606
10607 if (unsigned PartialCheck = Test & fcNan) {
10608 APInt InfWithQnanBit = Inf | QNaNBitMask;
10609 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
10610 if (PartialCheck == fcNan) {
10611 // isnan(V) ==> abs(V) > int(inf)
10612 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
10613 if (IsF80) {
10614 // Recognize unsupported values as NaNs for compatibility with glibc.
10615 // In them (exp(V)==0) == int_bit.
10616 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
10617 SDValue ExpIsZero =
10618 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
10619 SDValue IsPseudo =
10620 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
10621 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
10622 }
10623 } else if (PartialCheck == fcQNan) {
10624 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
10625 PartialRes =
10626 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
10627 } else { // ISD::fcSNan
10628 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
10629 // abs(V) < (unsigned(Inf) | quiet_bit)
10630 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
10631 SDValue IsNotQnan =
10632 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
10633 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
10634 }
10635 appendResult(PartialRes);
10636 }
10637
10638 if (unsigned PartialCheck = Test & fcNormal) {
10639 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
10640 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
10641 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
10642 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
10643 APInt ExpLimit = ExpMask - ExpLSB;
10644 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
10645 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
10646 if (PartialCheck == fcNegNormal)
10647 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
10648 else if (PartialCheck == fcPosNormal) {
10649 SDValue PosSignV =
10650 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInversionMask);
10651 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
10652 }
10653 if (IsF80)
10654 PartialRes =
10655 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
10656 appendResult(PartialRes);
10657 }
10658
10659 if (!Res)
10660 return DAG.getConstant(IsInverted, DL, ResultVT);
10661 if (IsInverted)
10662 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInversionMask);
10663 return Res;
10664}
10665
10666// Only expand vector types if we have the appropriate vector bit operations.
10667static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
10668 assert(VT.isVector() && "Expected vector type");
10669 unsigned Len = VT.getScalarSizeInBits();
10670 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
10673 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
10675}
10676
10678 SDLoc dl(Node);
10679 EVT VT = Node->getValueType(0);
10680 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
10681 SDValue Op = Node->getOperand(0);
10682 unsigned Len = VT.getScalarSizeInBits();
10683 assert(VT.isInteger() && "CTPOP not implemented for this type.");
10684
10685 // TODO: Add support for irregular type lengths.
10686 if (!(Len <= 128 && Len % 8 == 0))
10687 return SDValue();
10688
10689 // Only expand vector types if we have the appropriate vector bit operations.
10690 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
10691 return SDValue();
10692
10693 // This is the "best" algorithm from
10694 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
10695 SDValue Mask55 =
10696 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
10697 SDValue Mask33 =
10698 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
10699 SDValue Mask0F =
10700 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
10701
10702 // v = v - ((v >> 1) & 0x55555555...)
10703 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
10704 DAG.getNode(ISD::AND, dl, VT,
10705 DAG.getNode(ISD::SRL, dl, VT, Op,
10706 DAG.getConstant(1, dl, ShVT)),
10707 Mask55));
10708 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
10709 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
10710 DAG.getNode(ISD::AND, dl, VT,
10711 DAG.getNode(ISD::SRL, dl, VT, Op,
10712 DAG.getConstant(2, dl, ShVT)),
10713 Mask33));
10714 // v = (v + (v >> 4)) & 0x0F0F0F0F...
10715 Op = DAG.getNode(ISD::AND, dl, VT,
10716 DAG.getNode(ISD::ADD, dl, VT, Op,
10717 DAG.getNode(ISD::SRL, dl, VT, Op,
10718 DAG.getConstant(4, dl, ShVT))),
10719 Mask0F);
10720
10721 if (Len <= 8)
10722 return Op;
10723
10724 // Avoid the multiply if we only have 2 bytes to add.
10725 // TODO: Only doing this for scalars because vectors weren't as obviously
10726 // improved.
10727 if (Len == 16 && !VT.isVector()) {
10728 // v = (v + (v >> 8)) & 0x00FF;
10729 return DAG.getNode(ISD::AND, dl, VT,
10730 DAG.getNode(ISD::ADD, dl, VT, Op,
10731 DAG.getNode(ISD::SRL, dl, VT, Op,
10732 DAG.getConstant(8, dl, ShVT))),
10733 DAG.getConstant(0xFF, dl, VT));
10734 }
10735
10736 // v = (v * 0x01010101...) >> (Len - 8)
10737 SDValue V;
10740 SDValue Mask01 =
10741 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
10742 V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
10743 } else {
10744 V = Op;
10745 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
10746 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
10747 V = DAG.getNode(ISD::ADD, dl, VT, V,
10748 DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
10749 }
10750 }
10751 return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
10752}
10753
10755 SDLoc dl(Node);
10756 EVT VT = Node->getValueType(0);
10757 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
10758 SDValue Op = Node->getOperand(0);
10759 SDValue Mask = Node->getOperand(1);
10760 SDValue VL = Node->getOperand(2);
10761 unsigned Len = VT.getScalarSizeInBits();
10762 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
10763
10764 // TODO: Add support for irregular type lengths.
10765 if (!(Len <= 128 && Len % 8 == 0))
10766 return SDValue();
10767
10768 // This is same algorithm of expandCTPOP from
10769 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
10770 SDValue Mask55 =
10771 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
10772 SDValue Mask33 =
10773 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
10774 SDValue Mask0F =
10775 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
10776
10777 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
10778
10779 // v = v - ((v >> 1) & 0x55555555...)
10780 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
10781 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
10782 DAG.getConstant(1, dl, ShVT), Mask, VL),
10783 Mask55, Mask, VL);
10784 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
10785
10786 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
10787 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
10788 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
10789 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
10790 DAG.getConstant(2, dl, ShVT), Mask, VL),
10791 Mask33, Mask, VL);
10792 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
10793
10794 // v = (v + (v >> 4)) & 0x0F0F0F0F...
10795 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
10796 Mask, VL),
10797 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
10798 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
10799
10800 if (Len <= 8)
10801 return Op;
10802
10803 // v = (v * 0x01010101...) >> (Len - 8)
10804 SDValue V;
10806 ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
10807 SDValue Mask01 =
10808 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
10809 V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
10810 } else {
10811 V = Op;
10812 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
10813 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
10814 V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
10815 DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
10816 Mask, VL);
10817 }
10818 }
10819 return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
10820 Mask, VL);
10821}
10822
10824 SDLoc dl(Node);
10825 EVT VT = Node->getValueType(0);
10826 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
10827 SDValue Op = Node->getOperand(0);
10828 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10829
10830 // If the non-ZERO_POISON version is supported we can use that instead.
10831 if (Node->getOpcode() == ISD::CTLZ_ZERO_POISON &&
10833 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
10834
10835 // If the ZERO_POISON version is supported use that and handle the zero case.
10837 EVT SetCCVT =
10838 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10839 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_POISON, dl, VT, Op);
10840 SDValue Zero = DAG.getConstant(0, dl, VT);
10841 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
10842 return DAG.getSelect(dl, VT, SrcIsZero,
10843 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
10844 }
10845
10846 // Only expand vector types if we have the appropriate vector bit operations.
10847 // This includes the operations needed to expand CTPOP if it isn't supported.
10848 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
10850 !canExpandVectorCTPOP(*this, VT)) ||
10853 return SDValue();
10854
10855 // for now, we do this:
10856 // x = x | (x >> 1);
10857 // x = x | (x >> 2);
10858 // ...
10859 // x = x | (x >>16);
10860 // x = x | (x >>32); // for 64-bit input
10861 // return popcount(~x);
10862 //
10863 // Ref: "Hacker's Delight" by Henry Warren
10864 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
10865 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
10866 Op = DAG.getNode(ISD::OR, dl, VT, Op,
10867 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
10868 }
10869 Op = DAG.getNOT(dl, Op, VT);
10870 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
10871}
10872
10874 SDLoc dl(Node);
10875 EVT VT = Node->getValueType(0);
10876 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
10877 SDValue Op = Node->getOperand(0);
10878 SDValue Mask = Node->getOperand(1);
10879 SDValue VL = Node->getOperand(2);
10880 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10881
10882 // do this:
10883 // x = x | (x >> 1);
10884 // x = x | (x >> 2);
10885 // ...
10886 // x = x | (x >>16);
10887 // x = x | (x >>32); // for 64-bit input
10888 // return popcount(~x);
10889 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
10890 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
10891 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
10892 DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
10893 VL);
10894 }
10895 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getAllOnesConstant(dl, VT),
10896 Mask, VL);
10897 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
10898}
10899
10901 SDLoc dl(Node);
10902 EVT VT = Node->getValueType(0);
10903 SDValue Op = DAG.getFreeze(Node->getOperand(0));
10904 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10905
10906 // CTLS(x) = CTLZ(OR(SHL(XOR(x, SRA(x, BW-1)), 1), 1))
10907 // This transforms the sign bits into leading zeros that can be counted.
10908 SDValue ShiftAmt = DAG.getShiftAmountConstant(NumBitsPerElt - 1, VT, dl);
10909 SDValue SignBit = DAG.getNode(ISD::SRA, dl, VT, Op, ShiftAmt);
10910 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, SignBit);
10911 SDValue Shl =
10912 DAG.getNode(ISD::SHL, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
10913 SDValue Or = DAG.getNode(ISD::OR, dl, VT, Shl, DAG.getConstant(1, dl, VT));
10914 return DAG.getNode(ISD::CTLZ_ZERO_POISON, dl, VT, Or);
10915}
10916
10918 const SDLoc &DL, EVT VT, SDValue Op,
10919 unsigned BitWidth) const {
10920 if (BitWidth != 32 && BitWidth != 64)
10921 return SDValue();
10922
10923 const DataLayout &TD = DAG.getDataLayout();
10925 return SDValue();
10926
10927 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
10928 : APInt(64, 0x0218A392CD3D5DBFULL);
10929 MachinePointerInfo PtrInfo =
10931 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
10932 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
10933 SDValue Lookup = DAG.getNode(
10934 ISD::SRL, DL, VT,
10935 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
10936 DAG.getConstant(DeBruijn, DL, VT)),
10937 DAG.getShiftAmountConstant(ShiftAmt, VT, DL));
10939
10941 for (unsigned i = 0; i < BitWidth; i++) {
10942 APInt Shl = DeBruijn.shl(i);
10943 APInt Lshr = Shl.lshr(ShiftAmt);
10944 Table[Lshr.getZExtValue()] = i;
10945 }
10946
10947 // Create a ConstantArray in Constant Pool
10948 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
10949 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
10950 TD.getPrefTypeAlign(CA->getType()));
10951 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
10952 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
10953 PtrInfo, MVT::i8);
10954 if (Node->getOpcode() == ISD::CTTZ_ZERO_POISON)
10955 return ExtLoad;
10956
10957 EVT SetCCVT =
10958 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10959 SDValue Zero = DAG.getConstant(0, DL, VT);
10960 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
10961 return DAG.getSelect(DL, VT, SrcIsZero,
10962 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
10963}
10964
10966 SDLoc dl(Node);
10967 EVT VT = Node->getValueType(0);
10968 SDValue Op = Node->getOperand(0);
10969 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10970
10971 // If the non-ZERO_POISON version is supported we can use that instead.
10972 if (Node->getOpcode() == ISD::CTTZ_ZERO_POISON &&
10974 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
10975
10976 // If the ZERO_POISON version is supported use that and handle the zero case.
10978 EVT SetCCVT =
10979 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10980 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_POISON, dl, VT, Op);
10981 SDValue Zero = DAG.getConstant(0, dl, VT);
10982 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
10983 return DAG.getSelect(dl, VT, SrcIsZero,
10984 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
10985 }
10986
10987 // Only expand vector types if we have the appropriate vector bit operations.
10988 // This includes the operations needed to expand CTPOP if it isn't supported.
10989 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
10992 !canExpandVectorCTPOP(*this, VT)) ||
10996 return SDValue();
10997
10998 // Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
10999 // to be expanded or converted to a libcall.
11002 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
11003 return V;
11004
11005 // for now, we use: { return popcount(~x & (x - 1)); }
11006 // unless the target has ctlz but not ctpop, in which case we use:
11007 // { return 32 - nlz(~x & (x-1)); }
11008 // Ref: "Hacker's Delight" by Henry Warren
11009 SDValue Tmp = DAG.getNode(
11010 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
11011 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
11012
11013 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
11015 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
11016 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
11017 }
11018
11019 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
11020}
11021
11023 SDValue Op = Node->getOperand(0);
11024 SDValue Mask = Node->getOperand(1);
11025 SDValue VL = Node->getOperand(2);
11026 SDLoc dl(Node);
11027 EVT VT = Node->getValueType(0);
11028
11029 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
11030 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
11031 DAG.getAllOnesConstant(dl, VT), Mask, VL);
11032 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
11033 DAG.getConstant(1, dl, VT), Mask, VL);
11034 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
11035 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
11036}
11037
11039 SelectionDAG &DAG) const {
11040 // %cond = to_bool_vec %source
11041 // %splat = splat /*val=*/VL
11042 // %tz = step_vector
11043 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
11044 // %r = vp.reduce.umin %v
11045 SDLoc DL(N);
11046 SDValue Source = N->getOperand(0);
11047 SDValue Mask = N->getOperand(1);
11048 SDValue EVL = N->getOperand(2);
11049 EVT SrcVT = Source.getValueType();
11050 EVT ResVT = N->getValueType(0);
11051 EVT ResVecVT =
11052 EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
11053
11054 // Convert to boolean vector.
11055 if (SrcVT.getScalarType() != MVT::i1) {
11056 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
11057 SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
11058 SrcVT.getVectorElementCount());
11059 Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
11060 DAG.getCondCode(ISD::SETNE), Mask, EVL);
11061 }
11062
11063 SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
11064 SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
11065 SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
11066 SDValue Select =
11067 DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
11068 return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
11069}
11070
11071/// Returns a type-legalized version of \p Mask as the first item in the
11072/// pair. The second item contains a type-legalized step vector that's
11073/// guaranteed to fit the number of elements in \p Mask.
11074/// If the stepvector would require splitting, returns an empty SDValue
11075/// as the second item to signal that the operation should be split instead.
11076static std::pair<SDValue, SDValue>
11078 SelectionDAG &DAG) {
11079 EVT MaskVT = Mask.getValueType();
11080 EVT BoolVT = MaskVT.getScalarType();
11081
11082 // Find a suitable type for a stepvector.
11083 // If zero is poison, we can assume the upper limit of the result is VF-1.
11084 ConstantRange VScaleRange(1, /*isFullSet=*/true); // Fixed length default.
11085 if (MaskVT.isScalableVector())
11086 VScaleRange = getVScaleRange(&DAG.getMachineFunction().getFunction(), 64);
11087 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11088 uint64_t EltWidth = TLI.getBitWidthForCttzElements(
11089 EVT(TLI.getVectorIdxTy(DAG.getDataLayout())),
11090 MaskVT.getVectorElementCount(), ZeroIsPoison, &VScaleRange);
11091 // If the step vector element type is smaller than the mask element type,
11092 // use the mask type directly to avoid widening issues.
11093 EltWidth = std::max(EltWidth, BoolVT.getFixedSizeInBits());
11094 EVT StepVT = MVT::getIntegerVT(EltWidth);
11095 EVT StepVecVT = MaskVT.changeVectorElementType(*DAG.getContext(), StepVT);
11096
11097 // If promotion or widening is required to make the type legal, do it here.
11098 // Promotion of integers within LegalizeVectorOps is looking for types of
11099 // the same size but with a smaller number of larger elements, not the usual
11100 // larger size with the same number of larger elements.
11102 TLI.getTypeAction(*DAG.getContext(), StepVecVT);
11103 SDValue StepVec;
11104 if (TypeAction == TargetLowering::TypePromoteInteger) {
11105 StepVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
11106 StepVec = DAG.getStepVector(DL, StepVecVT);
11107 } else if (TypeAction == TargetLowering::TypeWidenVector) {
11108 // For widening, the element count changes. Create a step vector with only
11109 // the original elements valid and zeros for padding. Also widen the mask.
11110 EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
11111 unsigned WideNumElts = WideVecVT.getVectorNumElements();
11112
11113 // Build widened step vector: <0, 1, ..., OrigNumElts-1, poison, poison, ..>
11114 SDValue OrigStepVec = DAG.getStepVector(DL, StepVecVT);
11115 SDValue UndefStep = DAG.getPOISON(WideVecVT);
11116 StepVec = DAG.getInsertSubvector(DL, UndefStep, OrigStepVec, 0);
11117
11118 // Widen mask: pad with zeros.
11119 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), BoolVT, WideNumElts);
11120 SDValue ZeroMask = DAG.getConstant(0, DL, WideMaskVT);
11121 Mask = DAG.getInsertSubvector(DL, ZeroMask, Mask, 0);
11122 } else if (TypeAction == TargetLowering::TypeSplitVector) {
11123 // The stepvector type would require splitting. Signal to the caller
11124 // that the operation should be split instead of expanded.
11125 return {Mask, SDValue()};
11126 } else {
11127 StepVec = DAG.getStepVector(DL, StepVecVT);
11128 }
11129
11130 return {Mask, StepVec};
11131}
11132
11134 SelectionDAG &DAG) const {
11135 SDLoc DL(N);
11136 auto [Mask, StepVec] = getLegalMaskAndStepVector(
11137 N->getOperand(0), /*ZeroIsPoison=*/true, DL, DAG);
11138
11139 // If StepVec is empty, the stepvector would require splitting.
11140 // Split the operation instead and let it be recursively legalized.
11141 if (!StepVec) {
11142 EVT MaskVT = N->getOperand(0).getValueType();
11143 EVT ResVT = N->getValueType(0);
11144
11145 // Split the mask
11146 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(MaskVT);
11147 auto [MaskLo, MaskHi] = DAG.SplitVector(N->getOperand(0), DL);
11148
11149 // Create split VECTOR_FIND_LAST_ACTIVE operations
11150 SDValue LoResult =
11151 DAG.getNode(ISD::VECTOR_FIND_LAST_ACTIVE, DL, ResVT, MaskLo);
11152 SDValue HiResult =
11153 DAG.getNode(ISD::VECTOR_FIND_LAST_ACTIVE, DL, ResVT, MaskHi);
11154
11155 // Check if any lane is active in the high mask.
11156 SDValue AnyHiActive = DAG.getNode(ISD::VECREDUCE_OR, DL, MVT::i1, MaskHi);
11158 AnyHiActive, DL,
11159 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i1),
11160 MVT::i1);
11161
11162 // Adjust HiResult by adding the number of elements in Lo
11163 SDValue LoNumElts =
11164 DAG.getElementCount(DL, ResVT, LoVT.getVectorElementCount());
11165 SDValue AdjustedHiResult =
11166 DAG.getNode(ISD::ADD, DL, ResVT, HiResult, LoNumElts);
11167
11168 // Return: AnyHiActive ? AdjustedHiResult : LoResult;
11169 return DAG.getNode(ISD::SELECT, DL, ResVT, Cond, AdjustedHiResult,
11170 LoResult);
11171 }
11172
11173 EVT StepVecVT = StepVec.getValueType();
11174 EVT StepVT = StepVec.getValueType().getVectorElementType();
11175
11176 // Zero out lanes with inactive elements, then find the highest remaining
11177 // value from the stepvector.
11178 SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT);
11179 SDValue ActiveElts = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
11180 SDValue HighestIdx = DAG.getNode(ISD::VECREDUCE_UMAX, DL, StepVT, ActiveElts);
11181 return DAG.getZExtOrTrunc(HighestIdx, DL, N->getValueType(0));
11182}
11183
11185 SelectionDAG &DAG) const {
11186 SDLoc DL(N);
11187 EVT VT = N->getValueType(0);
11188 SDValue SourceValue = N->getOperand(0);
11189 SDValue SinkValue = N->getOperand(1);
11190 SDValue EltSizeInBytes = N->getOperand(2);
11191
11192 // Note: The lane offset is scalable if the mask is scalable.
11193 ElementCount LaneOffsetEC =
11194 ElementCount::get(N->getConstantOperandVal(3), VT.isScalableVT());
11195
11196 EVT AddrVT = SourceValue->getValueType(0);
11197 bool IsReadAfterWrite = N->getOpcode() == ISD::LOOP_DEPENDENCE_RAW_MASK;
11198
11199 // Take the difference between the pointers and divided by the element size,
11200 // to see how many lanes separate them.
11201 SDValue Diff = DAG.getNode(ISD::SUB, DL, AddrVT, SinkValue, SourceValue);
11202 if (IsReadAfterWrite)
11203 Diff = DAG.getNode(ISD::ABS, DL, AddrVT, Diff);
11204 Diff = DAG.getNode(ISD::SDIV, DL, AddrVT, Diff, EltSizeInBytes);
11205
11206 // The pointers do not alias if:
11207 // * Diff <= 0 (WAR_MASK)
11208 // * Diff == 0 (RAW_MASK)
11209 EVT CmpVT =
11210 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), AddrVT);
11211 SDValue Zero = DAG.getConstant(0, DL, AddrVT);
11212 SDValue Cmp = DAG.getSetCC(DL, CmpVT, Diff, Zero,
11213 IsReadAfterWrite ? ISD::SETEQ : ISD::SETLE);
11214
11215 // The pointers do not alias if:
11216 // Lane + LaneOffset < Diff (WAR/RAW_MASK)
11217 SDValue LaneOffset = DAG.getElementCount(DL, AddrVT, LaneOffsetEC);
11218 SDValue MaskN = DAG.getSelect(
11219 DL, AddrVT, Cmp,
11221 AddrVT),
11222 Diff);
11223
11224 return DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, VT, LaneOffset, MaskN);
11225}
11226
11228 bool IsNegative) const {
11229 SDLoc dl(N);
11230 EVT VT = N->getValueType(0);
11231 SDValue Op = N->getOperand(0);
11232
11233 // If expanding ABS_MIN_POISON, fall back to ABS if the target supports it.
11234 if (N->getOpcode() == ISD::ABS_MIN_POISON &&
11236 SDValue AbsVal = DAG.getNode(ISD::ABS, dl, VT, Op);
11237 if (IsNegative)
11238 return DAG.getNegative(AbsVal, dl, VT);
11239 return AbsVal;
11240 }
11241
11242 // abs(x) -> smax(x,sub(0,x))
11243 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
11245 SDValue Zero = DAG.getConstant(0, dl, VT);
11246 Op = DAG.getFreeze(Op);
11247 return DAG.getNode(ISD::SMAX, dl, VT, Op,
11248 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
11249 }
11250
11251 // abs(x) -> umin(x,sub(0,x))
11252 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
11254 SDValue Zero = DAG.getConstant(0, dl, VT);
11255 Op = DAG.getFreeze(Op);
11256 return DAG.getNode(ISD::UMIN, dl, VT, Op,
11257 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
11258 }
11259
11260 // 0 - abs(x) -> smin(x, sub(0,x))
11261 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
11263 SDValue Zero = DAG.getConstant(0, dl, VT);
11264 Op = DAG.getFreeze(Op);
11265 return DAG.getNode(ISD::SMIN, dl, VT, Op,
11266 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
11267 }
11268
11269 // Only expand vector types if we have the appropriate vector operations.
11270 if (VT.isVector() &&
11272 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
11273 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
11275 return SDValue();
11276
11277 Op = DAG.getFreeze(Op);
11278 SDValue Shift = DAG.getNode(
11279 ISD::SRA, dl, VT, Op,
11280 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
11281 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
11282
11283 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
11284 if (!IsNegative)
11285 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
11286
11287 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
11288 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
11289}
11290
11292 SDLoc dl(N);
11293 EVT VT = N->getValueType(0);
11294 SDValue LHS = N->getOperand(0);
11295 SDValue RHS = N->getOperand(1);
11296 bool IsSigned = N->getOpcode() == ISD::ABDS;
11297
11298 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
11299 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
11300 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
11301 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
11302 if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
11303 LHS = DAG.getFreeze(LHS);
11304 RHS = DAG.getFreeze(RHS);
11305 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
11306 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
11307 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
11308 }
11309
11310 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
11311 if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT)) {
11312 LHS = DAG.getFreeze(LHS);
11313 RHS = DAG.getFreeze(RHS);
11314 return DAG.getNode(ISD::OR, dl, VT,
11315 DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
11316 DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
11317 }
11318
11319 // If the subtract doesn't overflow then just use abs(sub())
11320 bool IsNonNegative = DAG.SignBitIsZero(LHS) && DAG.SignBitIsZero(RHS);
11321
11322 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, LHS, RHS))
11323 return DAG.getNode(ISD::ABS, dl, VT,
11324 DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
11325
11326 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, RHS, LHS))
11327 return DAG.getNode(ISD::ABS, dl, VT,
11328 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
11329
11330 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11332 LHS = DAG.getFreeze(LHS);
11333 RHS = DAG.getFreeze(RHS);
11334 SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
11335
11336 // Branchless expansion iff cmp result is allbits:
11337 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
11338 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
11339 if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
11340 SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
11341 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
11342 return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
11343 }
11344
11345 // Similar to the branchless expansion, if we don't prefer selects, use the
11346 // (sign-extended) usubo overflow flag if the (scalar) type is illegal as this
11347 // is more likely to legalize cleanly: abdu(lhs, rhs) -> sub(xor(sub(lhs,
11348 // rhs), uof(lhs, rhs)), uof(lhs, rhs))
11349 if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT) &&
11351 SDValue USubO =
11352 DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
11353 SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
11354 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
11355 return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
11356 }
11357
11358 // FIXME: Should really try to split the vector in case it's legal on a
11359 // subvector.
11361 return DAG.UnrollVectorOp(N);
11362
11363 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
11364 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
11365 return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
11366 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
11367}
11368
11370 SDLoc dl(N);
11371 EVT VT = N->getValueType(0);
11372 SDValue LHS = N->getOperand(0);
11373 SDValue RHS = N->getOperand(1);
11374
11375 unsigned Opc = N->getOpcode();
11376 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
11377 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
11378 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
11379 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
11380 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
11381 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11383 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
11384 "Unknown AVG node");
11385
11386 // If the operands are already extended, we can add+shift.
11387 bool IsExt =
11388 (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
11389 DAG.ComputeNumSignBits(RHS) >= 2) ||
11390 (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
11391 DAG.computeKnownBits(RHS).countMinLeadingZeros() >= 1);
11392 if (IsExt) {
11393 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
11394 if (!IsFloor)
11395 Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
11396 return DAG.getNode(ShiftOpc, dl, VT, Sum,
11397 DAG.getShiftAmountConstant(1, VT, dl));
11398 }
11399
11400 // For scalars, see if we can efficiently extend/truncate to use add+shift.
11401 if (VT.isScalarInteger()) {
11402 EVT ExtVT = VT.widenIntegerElementType(*DAG.getContext());
11403 if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
11404 LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
11405 RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
11406 SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
11407 if (!IsFloor)
11408 Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
11409 DAG.getConstant(1, dl, ExtVT));
11410 // Just use SRL as we will be truncating away the extended sign bits.
11411 Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
11412 DAG.getShiftAmountConstant(1, ExtVT, dl));
11413 return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
11414 }
11415 }
11416
11417 // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
11418 if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT) &&
11421 SDValue UAddWithOverflow =
11422 DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
11423
11424 SDValue Sum = UAddWithOverflow.getValue(0);
11425 SDValue Overflow = UAddWithOverflow.getValue(1);
11426
11427 // Right shift the sum by 1
11428 SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,
11429 DAG.getShiftAmountConstant(1, VT, dl));
11430
11431 SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
11432 SDValue OverflowShl = DAG.getNode(
11433 ISD::SHL, dl, VT, ZeroExtOverflow,
11434 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
11435
11436 return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
11437 }
11438
11439 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
11440 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
11441 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
11442 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
11443 LHS = DAG.getFreeze(LHS);
11444 RHS = DAG.getFreeze(RHS);
11445 SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
11446 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
11447 SDValue Shift =
11448 DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
11449 return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
11450}
11451
11453 SDLoc dl(N);
11454 EVT VT = N->getValueType(0);
11455 SDValue Op = N->getOperand(0);
11456
11457 if (!VT.isSimple())
11458 return SDValue();
11459
11460 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
11461 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
11462 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
11463 default:
11464 return SDValue();
11465 case MVT::i16:
11466 // Use a rotate by 8. This can be further expanded if necessary.
11467 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
11468 case MVT::i32:
11469 // This is meant for ARM specifically, which has ROTR but no ROTL.
11470 // t = x ^ rotr(x, 16)
11471 // t = bic(t, 0x00ff0000)
11472 // t = lshr(t, 8)
11473 // x = t ^ rotr(x, 8)
11475 SDValue Rotr16 =
11476 DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(16, dl, SHVT));
11477 SDValue Tmp = DAG.getNode(ISD::XOR, dl, VT, Op, Rotr16);
11478 Tmp = DAG.getNode(ISD::AND, dl, VT, Tmp,
11479 DAG.getConstant(0xFF00FFFF, dl, VT));
11480 Tmp = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(8, dl, SHVT));
11481 SDValue Rotr8 =
11482 DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
11483 return DAG.getNode(ISD::XOR, dl, VT, Tmp, Rotr8);
11484 }
11485 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
11486 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
11487 DAG.getConstant(0xFF00, dl, VT));
11488 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
11489 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
11490 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
11491 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
11492 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
11493 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
11494 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
11495 case MVT::i64:
11496 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
11497 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
11498 DAG.getConstant(255ULL<<8, dl, VT));
11499 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
11500 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
11501 DAG.getConstant(255ULL<<16, dl, VT));
11502 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
11503 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
11504 DAG.getConstant(255ULL<<24, dl, VT));
11505 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
11506 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
11507 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
11508 DAG.getConstant(255ULL<<24, dl, VT));
11509 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
11510 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
11511 DAG.getConstant(255ULL<<16, dl, VT));
11512 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
11513 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
11514 DAG.getConstant(255ULL<<8, dl, VT));
11515 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
11516 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
11517 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
11518 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
11519 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
11520 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
11521 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
11522 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
11523 }
11524}
11525
11527 SDLoc dl(N);
11528 EVT VT = N->getValueType(0);
11529 SDValue Op = N->getOperand(0);
11530 SDValue Mask = N->getOperand(1);
11531 SDValue EVL = N->getOperand(2);
11532
11533 if (!VT.isSimple())
11534 return SDValue();
11535
11536 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
11537 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
11538 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
11539 default:
11540 return SDValue();
11541 case MVT::i16:
11542 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
11543 Mask, EVL);
11544 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
11545 Mask, EVL);
11546 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
11547 case MVT::i32:
11548 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
11549 Mask, EVL);
11550 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
11551 Mask, EVL);
11552 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
11553 Mask, EVL);
11554 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
11555 Mask, EVL);
11556 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
11557 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
11558 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
11559 Mask, EVL);
11560 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
11561 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
11562 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
11563 case MVT::i64:
11564 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
11565 Mask, EVL);
11566 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
11567 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
11568 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
11569 Mask, EVL);
11570 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
11571 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
11572 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
11573 Mask, EVL);
11574 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
11575 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
11576 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
11577 Mask, EVL);
11578 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
11579 Mask, EVL);
11580 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
11581 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
11582 Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
11583 Mask, EVL);
11584 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
11585 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
11586 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
11587 Mask, EVL);
11588 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
11589 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
11590 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
11591 Mask, EVL);
11592 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
11593 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
11594 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
11595 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
11596 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
11597 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
11598 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
11599 }
11600}
11601
11603 SDLoc dl(N);
11604 EVT VT = N->getValueType(0);
11605 SDValue Op = N->getOperand(0);
11606 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
11607 unsigned Sz = VT.getScalarSizeInBits();
11608
11609 SDValue Tmp, Tmp2, Tmp3;
11610
11611 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
11612 // and finally the i1 pairs.
11613 // TODO: We can easily support i4/i2 legal types if any target ever does.
11614 if (Sz >= 8 && isPowerOf2_32(Sz)) {
11615 // Create the masks - repeating the pattern every byte.
11616 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
11617 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
11618 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
11619
11620 // BSWAP if the type is wider than a single byte.
11621 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
11622
11623 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
11624 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
11625 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
11626 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
11627 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
11628 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
11629
11630 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
11631 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
11632 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
11633 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
11634 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
11635 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
11636
11637 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
11638 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
11639 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
11640 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
11641 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
11642 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
11643 return Tmp;
11644 }
11645
11646 Tmp = DAG.getConstant(0, dl, VT);
11647 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
11648 if (I < J)
11649 Tmp2 =
11650 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
11651 else
11652 Tmp2 =
11653 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
11654
11655 APInt Shift = APInt::getOneBitSet(Sz, J);
11656 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
11657 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
11658 }
11659
11660 return Tmp;
11661}
11662
11664 assert(N->getOpcode() == ISD::VP_BITREVERSE);
11665
11666 SDLoc dl(N);
11667 EVT VT = N->getValueType(0);
11668 SDValue Op = N->getOperand(0);
11669 SDValue Mask = N->getOperand(1);
11670 SDValue EVL = N->getOperand(2);
11671 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
11672 unsigned Sz = VT.getScalarSizeInBits();
11673
11674 SDValue Tmp, Tmp2, Tmp3;
11675
11676 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
11677 // and finally the i1 pairs.
11678 // TODO: We can easily support i4/i2 legal types if any target ever does.
11679 if (Sz >= 8 && isPowerOf2_32(Sz)) {
11680 // Create the masks - repeating the pattern every byte.
11681 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
11682 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
11683 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
11684
11685 // BSWAP if the type is wider than a single byte.
11686 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
11687
11688 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
11689 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
11690 Mask, EVL);
11691 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
11692 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
11693 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
11694 Mask, EVL);
11695 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
11696 Mask, EVL);
11697 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
11698
11699 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
11700 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
11701 Mask, EVL);
11702 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
11703 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
11704 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
11705 Mask, EVL);
11706 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
11707 Mask, EVL);
11708 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
11709
11710 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
11711 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
11712 Mask, EVL);
11713 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
11714 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
11715 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
11716 Mask, EVL);
11717 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
11718 Mask, EVL);
11719 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
11720 return Tmp;
11721 }
11722 return SDValue();
11723}
11724
11725std::pair<SDValue, SDValue>
11727 SelectionDAG &DAG) const {
11728 SDLoc SL(LD);
11729 SDValue Chain = LD->getChain();
11730 SDValue BasePTR = LD->getBasePtr();
11731 EVT SrcVT = LD->getMemoryVT();
11732 EVT DstVT = LD->getValueType(0);
11733 ISD::LoadExtType ExtType = LD->getExtensionType();
11734
11735 if (SrcVT.isScalableVector())
11736 report_fatal_error("Cannot scalarize scalable vector loads");
11737
11738 unsigned NumElem = SrcVT.getVectorNumElements();
11739
11740 EVT SrcEltVT = SrcVT.getScalarType();
11741 EVT DstEltVT = DstVT.getScalarType();
11742
11743 // A vector must always be stored in memory as-is, i.e. without any padding
11744 // between the elements, since various code depend on it, e.g. in the
11745 // handling of a bitcast of a vector type to int, which may be done with a
11746 // vector store followed by an integer load. A vector that does not have
11747 // elements that are byte-sized must therefore be stored as an integer
11748 // built out of the extracted vector elements.
11749 if (!SrcEltVT.isByteSized()) {
11750 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
11751 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
11752
11753 unsigned NumSrcBits = SrcVT.getSizeInBits();
11754 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
11755
11756 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
11757 SDValue SrcEltBitMask = DAG.getConstant(
11758 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
11759
11760 // Load the whole vector and avoid masking off the top bits as it makes
11761 // the codegen worse.
11762 SDValue Load =
11763 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
11764 LD->getPointerInfo(), SrcIntVT, LD->getBaseAlign(),
11765 LD->getMemOperand()->getFlags(), LD->getAAInfo());
11766
11768 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11769 unsigned ShiftIntoIdx =
11770 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
11771 SDValue ShiftAmount = DAG.getShiftAmountConstant(
11772 ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
11773 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
11774 SDValue Elt =
11775 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
11776 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
11777
11778 if (ExtType != ISD::NON_EXTLOAD) {
11779 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
11780 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
11781 }
11782
11783 Vals.push_back(Scalar);
11784 }
11785
11786 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
11787 return std::make_pair(Value, Load.getValue(1));
11788 }
11789
11790 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
11791 assert(SrcEltVT.isByteSized());
11792
11794 SmallVector<SDValue, 8> LoadChains;
11795
11796 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11797 SDValue ScalarLoad = DAG.getExtLoad(
11798 ExtType, SL, DstEltVT, Chain, BasePTR,
11799 LD->getPointerInfo().getWithOffset(Idx * Stride), SrcEltVT,
11800 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
11801
11802 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
11803
11804 Vals.push_back(ScalarLoad.getValue(0));
11805 LoadChains.push_back(ScalarLoad.getValue(1));
11806 }
11807
11808 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
11809 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
11810
11811 return std::make_pair(Value, NewChain);
11812}
11813
11815 SelectionDAG &DAG) const {
11816 SDLoc SL(ST);
11817
11818 SDValue Chain = ST->getChain();
11819 SDValue BasePtr = ST->getBasePtr();
11820 SDValue Value = ST->getValue();
11821 EVT StVT = ST->getMemoryVT();
11822
11823 if (StVT.isScalableVector())
11824 report_fatal_error("Cannot scalarize scalable vector stores");
11825
11826 // The type of the data we want to save
11827 EVT RegVT = Value.getValueType();
11828 EVT RegSclVT = RegVT.getScalarType();
11829
11830 // The type of data as saved in memory.
11831 EVT MemSclVT = StVT.getScalarType();
11832
11833 unsigned NumElem = StVT.getVectorNumElements();
11834
11835 // A vector must always be stored in memory as-is, i.e. without any padding
11836 // between the elements, since various code depend on it, e.g. in the
11837 // handling of a bitcast of a vector type to int, which may be done with a
11838 // vector store followed by an integer load. A vector that does not have
11839 // elements that are byte-sized must therefore be stored as an integer
11840 // built out of the extracted vector elements.
11841 if (!MemSclVT.isByteSized()) {
11842 unsigned NumBits = StVT.getSizeInBits();
11843 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
11844
11845 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
11846
11847 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11848 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
11849 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
11850 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
11851 unsigned ShiftIntoIdx =
11852 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
11853 SDValue ShiftAmount =
11854 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
11855 SDValue ShiftedElt =
11856 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
11857 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
11858 }
11859
11860 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
11861 ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
11862 ST->getAAInfo());
11863 }
11864
11865 // Store Stride in bytes
11866 unsigned Stride = MemSclVT.getSizeInBits() / 8;
11867 assert(Stride && "Zero stride!");
11868 // Extract each of the elements from the original vector and save them into
11869 // memory individually.
11871 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11872 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
11873
11874 SDValue Ptr =
11875 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
11876
11877 // This scalar TruncStore may be illegal, but we legalize it later.
11878 SDValue Store = DAG.getTruncStore(
11879 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
11880 MemSclVT, ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
11881 ST->getAAInfo());
11882
11883 Stores.push_back(Store);
11884 }
11885
11886 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
11887}
11888
11889std::pair<SDValue, SDValue>
11891 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
11892 "unaligned indexed loads not implemented!");
11893 SDValue Chain = LD->getChain();
11894 SDValue Ptr = LD->getBasePtr();
11895 EVT VT = LD->getValueType(0);
11896 EVT LoadedVT = LD->getMemoryVT();
11897 SDLoc dl(LD);
11898 auto &MF = DAG.getMachineFunction();
11899
11900 if (VT.isFloatingPoint() || VT.isVector()) {
11901 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
11902 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
11903 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
11904 LoadedVT.isVector()) {
11905 // Scalarize the load and let the individual components be handled.
11906 return scalarizeVectorLoad(LD, DAG);
11907 }
11908
11909 // Expand to a (misaligned) integer load of the same size,
11910 // then bitconvert to floating point or vector.
11911 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
11912 LD->getMemOperand());
11913 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
11914 if (LoadedVT != VT)
11915 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
11916 ISD::ANY_EXTEND, dl, VT, Result);
11917
11918 return std::make_pair(Result, newLoad.getValue(1));
11919 }
11920
11921 // Copy the value to a (aligned) stack slot using (unaligned) integer
11922 // loads and stores, then do a (aligned) load from the stack slot.
11923 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
11924 unsigned LoadedBytes = LoadedVT.getStoreSize();
11925 unsigned RegBytes = RegVT.getSizeInBits() / 8;
11926 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
11927
11928 // Make sure the stack slot is also aligned for the register type.
11929 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
11930 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
11932 SDValue StackPtr = StackBase;
11933 unsigned Offset = 0;
11934
11935 EVT PtrVT = Ptr.getValueType();
11936 EVT StackPtrVT = StackPtr.getValueType();
11937
11938 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
11939 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
11940
11941 // Do all but one copies using the full register width.
11942 for (unsigned i = 1; i < NumRegs; i++) {
11943 // Load one integer register's worth from the original location.
11944 SDValue Load = DAG.getLoad(
11945 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
11946 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
11947 // Follow the load with a store to the stack slot. Remember the store.
11948 Stores.push_back(DAG.getStore(
11949 Load.getValue(1), dl, Load, StackPtr,
11950 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
11951 // Increment the pointers.
11952 Offset += RegBytes;
11953
11954 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
11955 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
11956 }
11957
11958 // The last copy may be partial. Do an extending load.
11959 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
11960 8 * (LoadedBytes - Offset));
11961 SDValue Load = DAG.getExtLoad(
11962 ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
11963 LD->getPointerInfo().getWithOffset(Offset), MemVT, LD->getBaseAlign(),
11964 LD->getMemOperand()->getFlags(), LD->getAAInfo());
11965 // Follow the load with a store to the stack slot. Remember the store.
11966 // On big-endian machines this requires a truncating store to ensure
11967 // that the bits end up in the right place.
11968 Stores.push_back(DAG.getTruncStore(
11969 Load.getValue(1), dl, Load, StackPtr,
11970 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
11971
11972 // The order of the stores doesn't matter - say it with a TokenFactor.
11973 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
11974
11975 // Finally, perform the original load only redirected to the stack slot.
11976 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
11977 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
11978 LoadedVT);
11979
11980 // Callers expect a MERGE_VALUES node.
11981 return std::make_pair(Load, TF);
11982 }
11983
11984 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
11985 "Unaligned load of unsupported type.");
11986
11987 // Compute the new VT that is half the size of the old one. This is an
11988 // integer MVT.
11989 unsigned NumBits = LoadedVT.getSizeInBits();
11990 EVT NewLoadedVT;
11991 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
11992 NumBits >>= 1;
11993
11994 Align Alignment = LD->getBaseAlign();
11995 unsigned IncrementSize = NumBits / 8;
11996 ISD::LoadExtType HiExtType = LD->getExtensionType();
11997
11998 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
11999 if (HiExtType == ISD::NON_EXTLOAD)
12000 HiExtType = ISD::ZEXTLOAD;
12001
12002 // Load the value in two parts
12003 SDValue Lo, Hi;
12004 if (DAG.getDataLayout().isLittleEndian()) {
12005 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
12006 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
12007 LD->getAAInfo());
12008
12009 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
12010 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
12011 LD->getPointerInfo().getWithOffset(IncrementSize),
12012 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
12013 LD->getAAInfo());
12014 } else {
12015 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
12016 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
12017 LD->getAAInfo());
12018
12019 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
12020 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
12021 LD->getPointerInfo().getWithOffset(IncrementSize),
12022 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
12023 LD->getAAInfo());
12024 }
12025
12026 // aggregate the two parts
12027 SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
12028 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
12029 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
12030
12031 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
12032 Hi.getValue(1));
12033
12034 return std::make_pair(Result, TF);
12035}
12036
12038 SelectionDAG &DAG) const {
12039 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
12040 "unaligned indexed stores not implemented!");
12041 SDValue Chain = ST->getChain();
12042 SDValue Ptr = ST->getBasePtr();
12043 SDValue Val = ST->getValue();
12044 EVT VT = Val.getValueType();
12045 Align Alignment = ST->getBaseAlign();
12046 auto &MF = DAG.getMachineFunction();
12047 EVT StoreMemVT = ST->getMemoryVT();
12048
12049 SDLoc dl(ST);
12050 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
12051 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
12052 if (isTypeLegal(intVT)) {
12053 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
12054 StoreMemVT.isVector()) {
12055 // Scalarize the store and let the individual components be handled.
12056 SDValue Result = scalarizeVectorStore(ST, DAG);
12057 return Result;
12058 }
12059 // Expand to a bitconvert of the value to the integer type of the
12060 // same size, then a (misaligned) int store.
12061 // FIXME: Does not handle truncating floating point stores!
12062 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
12063 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
12064 Alignment, ST->getMemOperand()->getFlags());
12065 return Result;
12066 }
12067 // Do a (aligned) store to a stack slot, then copy from the stack slot
12068 // to the final destination using (unaligned) integer loads and stores.
12069 MVT RegVT = getRegisterType(
12070 *DAG.getContext(),
12071 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
12072 EVT PtrVT = Ptr.getValueType();
12073 unsigned StoredBytes = StoreMemVT.getStoreSize();
12074 unsigned RegBytes = RegVT.getSizeInBits() / 8;
12075 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
12076
12077 // Make sure the stack slot is also aligned for the register type.
12078 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
12079 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12080
12081 // Perform the original store, only redirected to the stack slot.
12082 SDValue Store = DAG.getTruncStore(
12083 Chain, dl, Val, StackPtr,
12084 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
12085
12086 EVT StackPtrVT = StackPtr.getValueType();
12087
12088 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
12089 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
12091 unsigned Offset = 0;
12092
12093 // Do all but one copies using the full register width.
12094 for (unsigned i = 1; i < NumRegs; i++) {
12095 // Load one integer register's worth from the stack slot.
12096 SDValue Load = DAG.getLoad(
12097 RegVT, dl, Store, StackPtr,
12098 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
12099 // Store it to the final location. Remember the store.
12100 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
12101 ST->getPointerInfo().getWithOffset(Offset),
12102 ST->getBaseAlign(),
12103 ST->getMemOperand()->getFlags()));
12104 // Increment the pointers.
12105 Offset += RegBytes;
12106 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
12107 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
12108 }
12109
12110 // The last store may be partial. Do a truncating store. On big-endian
12111 // machines this requires an extending load from the stack slot to ensure
12112 // that the bits are in the right place.
12113 EVT LoadMemVT =
12114 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
12115
12116 // Load from the stack slot.
12117 SDValue Load = DAG.getExtLoad(
12118 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
12119 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
12120
12121 Stores.push_back(DAG.getTruncStore(
12122 Load.getValue(1), dl, Load, Ptr,
12123 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
12124 ST->getBaseAlign(), ST->getMemOperand()->getFlags(), ST->getAAInfo()));
12125 // The order of the stores doesn't matter - say it with a TokenFactor.
12126 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
12127 return Result;
12128 }
12129
12130 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
12131 "Unaligned store of unknown type.");
12132 // Get the half-size VT
12133 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
12134 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
12135 unsigned IncrementSize = NumBits / 8;
12136
12137 // Divide the stored value in two parts.
12138 SDValue ShiftAmount =
12139 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
12140 SDValue Lo = Val;
12141 // If Val is a constant, replace the upper bits with 0. The SRL will constant
12142 // fold and not use the upper bits. A smaller constant may be easier to
12143 // materialize.
12144 if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
12145 Lo = DAG.getNode(
12146 ISD::AND, dl, VT, Lo,
12147 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
12148 VT));
12149 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
12150
12151 // Store the two parts
12152 SDValue Store1, Store2;
12153 Store1 = DAG.getTruncStore(Chain, dl,
12154 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
12155 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
12156 ST->getMemOperand()->getFlags());
12157
12158 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
12159 Store2 = DAG.getTruncStore(
12160 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
12161 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
12162 ST->getMemOperand()->getFlags(), ST->getAAInfo());
12163
12164 SDValue Result =
12165 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
12166 return Result;
12167}
12168
12169SDValue
12171 const SDLoc &DL, EVT DataVT,
12172 SelectionDAG &DAG,
12173 bool IsCompressedMemory) const {
12175 EVT AddrVT = Addr.getValueType();
12176 EVT MaskVT = Mask.getValueType();
12177 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
12178 "Incompatible types of Data and Mask");
12179 if (IsCompressedMemory) {
12180 // Incrementing the pointer according to number of '1's in the mask.
12181 if (DataVT.isScalableVector()) {
12182 EVT MaskExtVT = MaskVT.changeElementType(*DAG.getContext(), MVT::i32);
12183 SDValue MaskExt = DAG.getNode(ISD::ZERO_EXTEND, DL, MaskExtVT, Mask);
12184 Increment = DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, MaskExt);
12185 } else {
12186 EVT MaskIntVT =
12187 EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
12188 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
12189 if (MaskIntVT.getSizeInBits() < 32) {
12190 MaskInIntReg =
12191 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
12192 MaskIntVT = MVT::i32;
12193 }
12194 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
12195 }
12196 // Scale is an element size in bytes.
12197 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
12198 AddrVT);
12199 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
12200 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
12201 } else
12202 Increment = DAG.getTypeSize(DL, AddrVT, DataVT.getStoreSize());
12203
12204 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
12205}
12206
12208 EVT VecVT, const SDLoc &dl,
12209 ElementCount SubEC) {
12210 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
12211 "Cannot index a scalable vector within a fixed-width vector");
12212
12213 unsigned NElts = VecVT.getVectorMinNumElements();
12214 unsigned NumSubElts = SubEC.getKnownMinValue();
12215 EVT IdxVT = Idx.getValueType();
12216
12217 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
12218 // If this is a constant index and we know the value plus the number of the
12219 // elements in the subvector minus one is less than the minimum number of
12220 // elements then it's safe to return Idx.
12221 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
12222 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
12223 return Idx;
12224 SDValue VS =
12225 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
12226 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
12227 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
12228 DAG.getConstant(NumSubElts, dl, IdxVT));
12229 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
12230 }
12231 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
12232 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
12233 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
12234 DAG.getConstant(Imm, dl, IdxVT));
12235 }
12236 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
12237 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
12238 DAG.getConstant(MaxIndex, dl, IdxVT));
12239}
12240
12241SDValue
12243 EVT VecVT, SDValue Index,
12244 const SDNodeFlags PtrArithFlags) const {
12246 DAG, VecPtr, VecVT,
12248 Index, PtrArithFlags);
12249}
12250
12251SDValue
12253 EVT VecVT, EVT SubVecVT, SDValue Index,
12254 const SDNodeFlags PtrArithFlags) const {
12255 SDLoc dl(Index);
12256 // Make sure the index type is big enough to compute in.
12257 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
12258
12259 EVT EltVT = VecVT.getVectorElementType();
12260
12261 // Calculate the element offset and add it to the pointer.
12262 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
12263 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
12264 "Converting bits to bytes lost precision");
12265 assert(SubVecVT.getVectorElementType() == EltVT &&
12266 "Sub-vector must be a vector with matching element type");
12267 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
12268 SubVecVT.getVectorElementCount());
12269
12270 EVT IdxVT = Index.getValueType();
12271 if (SubVecVT.isScalableVector())
12272 Index =
12273 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
12274 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
12275
12276 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
12277 DAG.getConstant(EltSize, dl, IdxVT));
12278 return DAG.getMemBasePlusOffset(VecPtr, Index, dl, PtrArithFlags);
12279}
12280
12281//===----------------------------------------------------------------------===//
12282// Implementation of Emulated TLS Model
12283//===----------------------------------------------------------------------===//
12284
12286 SelectionDAG &DAG) const {
12287 // Access to address of TLS varialbe xyz is lowered to a function call:
12288 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
12289 EVT PtrVT = getPointerTy(DAG.getDataLayout());
12290 PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
12291 SDLoc dl(GA);
12292
12293 ArgListTy Args;
12294 const GlobalValue *GV =
12296 SmallString<32> NameString("__emutls_v.");
12297 NameString += GV->getName();
12298 StringRef EmuTlsVarName(NameString);
12299 const GlobalVariable *EmuTlsVar =
12300 GV->getParent()->getNamedGlobal(EmuTlsVarName);
12301 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
12302 Args.emplace_back(DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT), VoidPtrType);
12303
12304 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
12305
12307 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
12308 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
12309 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
12310
12311 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
12312 // At last for X86 targets, maybe good for other targets too?
12314 MFI.setAdjustsStack(true); // Is this only for X86 target?
12315 MFI.setHasCalls(true);
12316
12317 assert((GA->getOffset() == 0) &&
12318 "Emulated TLS must have zero offset in GlobalAddressSDNode");
12319 return CallResult.first;
12320}
12321
12323 SelectionDAG &DAG) const {
12324 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
12325 if (!isCtlzFast())
12326 return SDValue();
12327 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
12328 SDLoc dl(Op);
12329 if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
12330 EVT VT = Op.getOperand(0).getValueType();
12331 SDValue Zext = Op.getOperand(0);
12332 if (VT.bitsLT(MVT::i32)) {
12333 VT = MVT::i32;
12334 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
12335 }
12336 unsigned Log2b = Log2_32(VT.getSizeInBits());
12337 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
12338 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
12339 DAG.getConstant(Log2b, dl, MVT::i32));
12340 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
12341 }
12342 return SDValue();
12343}
12344
12346 SDValue Op0 = Node->getOperand(0);
12347 SDValue Op1 = Node->getOperand(1);
12348 EVT VT = Op0.getValueType();
12349 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12350 unsigned Opcode = Node->getOpcode();
12351 SDLoc DL(Node);
12352
12353 // If both sign bits are zero, flip UMIN/UMAX <-> SMIN/SMAX if legal.
12354 unsigned AltOpcode = ISD::getOppositeSignednessMinMaxOpcode(Opcode);
12355 if (isOperationLegal(AltOpcode, VT) && DAG.SignBitIsZero(Op0) &&
12356 DAG.SignBitIsZero(Op1))
12357 return DAG.getNode(AltOpcode, DL, VT, Op0, Op1);
12358
12359 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
12360 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
12362 Op0 = DAG.getFreeze(Op0);
12363 SDValue Zero = DAG.getConstant(0, DL, VT);
12364 return DAG.getNode(ISD::SUB, DL, VT, Op0,
12365 DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
12366 }
12367
12368 // umin(x,y) -> sub(x,usubsat(x,y))
12369 // TODO: Missing freeze(Op0)?
12370 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
12372 return DAG.getNode(ISD::SUB, DL, VT, Op0,
12373 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
12374 }
12375
12376 // umax(x,y) -> add(x,usubsat(y,x))
12377 // TODO: Missing freeze(Op0)?
12378 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
12380 return DAG.getNode(ISD::ADD, DL, VT, Op0,
12381 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
12382 }
12383
12384 // FIXME: Should really try to split the vector in case it's legal on a
12385 // subvector.
12387 return DAG.UnrollVectorOp(Node);
12388
12389 // Attempt to find an existing SETCC node that we can reuse.
12390 // TODO: Do we need a generic doesSETCCNodeExist?
12391 // TODO: Missing freeze(Op0)/freeze(Op1)?
12392 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
12393 ISD::CondCode PrefCommuteCC,
12394 ISD::CondCode AltCommuteCC) {
12395 SDVTList BoolVTList = DAG.getVTList(BoolVT);
12396 for (ISD::CondCode CC : {PrefCC, AltCC}) {
12397 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
12398 {Op0, Op1, DAG.getCondCode(CC)})) {
12399 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
12400 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
12401 }
12402 }
12403 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
12404 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
12405 {Op0, Op1, DAG.getCondCode(CC)})) {
12406 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
12407 return DAG.getSelect(DL, VT, Cond, Op1, Op0);
12408 }
12409 }
12410 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
12411 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
12412 };
12413
12414 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
12415 // -> Y = (A < B) ? B : A
12416 // -> Y = (A >= B) ? A : B
12417 // -> Y = (A <= B) ? B : A
12418 switch (Opcode) {
12419 case ISD::SMAX:
12420 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
12421 case ISD::SMIN:
12422 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
12423 case ISD::UMAX:
12424 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
12425 case ISD::UMIN:
12426 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
12427 }
12428
12429 llvm_unreachable("How did we get here?");
12430}
12431
12433 unsigned Opcode = Node->getOpcode();
12434 SDValue LHS = Node->getOperand(0);
12435 SDValue RHS = Node->getOperand(1);
12436 EVT VT = LHS.getValueType();
12437 SDLoc dl(Node);
12438
12439 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
12440 assert(VT.isInteger() && "Expected operands to be integers");
12441
12442 // usub.sat(a, b) -> umax(a, b) - b
12443 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
12444 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
12445 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
12446 }
12447
12448 // usub.sat(a, 1) -> sub(a, zext(a != 0))
12449 // Prefer this on targets without legal/cost-effective overflow-carry nodes.
12450 if (Opcode == ISD::USUBSAT && isOneOrOneSplat(RHS) &&
12452 LHS = DAG.getFreeze(LHS);
12453 SDValue Zero = DAG.getConstant(0, dl, VT);
12454 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12455 SDValue IsNonZero = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETNE);
12456 SDValue Subtrahend = DAG.getBoolExtOrTrunc(IsNonZero, dl, VT, BoolVT);
12457 Subtrahend =
12458 DAG.getNode(ISD::AND, dl, VT, Subtrahend, DAG.getConstant(1, dl, VT));
12459 return DAG.getNode(ISD::SUB, dl, VT, LHS, Subtrahend);
12460 }
12461
12462 // uadd.sat(a, b) -> umin(a, ~b) + b
12463 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
12464 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
12465 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
12466 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
12467 }
12468
12469 unsigned OverflowOp;
12470 switch (Opcode) {
12471 case ISD::SADDSAT:
12472 OverflowOp = ISD::SADDO;
12473 break;
12474 case ISD::UADDSAT:
12475 OverflowOp = ISD::UADDO;
12476 break;
12477 case ISD::SSUBSAT:
12478 OverflowOp = ISD::SSUBO;
12479 break;
12480 case ISD::USUBSAT:
12481 OverflowOp = ISD::USUBO;
12482 break;
12483 default:
12484 llvm_unreachable("Expected method to receive signed or unsigned saturation "
12485 "addition or subtraction node.");
12486 }
12487
12488 // FIXME: Should really try to split the vector in case it's legal on a
12489 // subvector.
12491 return DAG.UnrollVectorOp(Node);
12492
12493 unsigned BitWidth = LHS.getScalarValueSizeInBits();
12494 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12495 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
12496 SDValue SumDiff = Result.getValue(0);
12497 SDValue Overflow = Result.getValue(1);
12498 SDValue Zero = DAG.getConstant(0, dl, VT);
12499 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
12500
12501 if (Opcode == ISD::UADDSAT) {
12503 // (LHS + RHS) | OverflowMask
12504 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
12505 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
12506 }
12507 // Overflow ? 0xffff.... : (LHS + RHS)
12508 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
12509 }
12510
12511 if (Opcode == ISD::USUBSAT) {
12513 // (LHS - RHS) & ~OverflowMask
12514 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
12515 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
12516 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
12517 }
12518 // Overflow ? 0 : (LHS - RHS)
12519 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
12520 }
12521
12522 assert((Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) &&
12523 "Expected signed saturating add/sub opcode");
12524
12525 const APInt MinVal = APInt::getSignedMinValue(BitWidth);
12526 const APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
12527
12528 KnownBits KnownLHS = DAG.computeKnownBits(LHS);
12529 KnownBits KnownRHS = DAG.computeKnownBits(RHS);
12530
12531 // If either of the operand signs are known, then they are guaranteed to
12532 // only saturate in one direction. If non-negative they will saturate
12533 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
12534 //
12535 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
12536 // sign of 'y' has to be flipped.
12537
12538 bool LHSIsNonNegative = KnownLHS.isNonNegative();
12539 bool RHSIsNonNegative =
12540 Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative() : KnownRHS.isNegative();
12541 if (LHSIsNonNegative || RHSIsNonNegative) {
12542 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
12543 return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
12544 }
12545
12546 bool LHSIsNegative = KnownLHS.isNegative();
12547 bool RHSIsNegative =
12548 Opcode == ISD::SADDSAT ? KnownRHS.isNegative() : KnownRHS.isNonNegative();
12549 if (LHSIsNegative || RHSIsNegative) {
12550 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
12551 return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
12552 }
12553
12554 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
12555 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
12556 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
12557 DAG.getConstant(BitWidth - 1, dl, VT));
12558 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
12559 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
12560}
12561
12563 unsigned Opcode = Node->getOpcode();
12564 SDValue LHS = Node->getOperand(0);
12565 SDValue RHS = Node->getOperand(1);
12566 EVT VT = LHS.getValueType();
12567 EVT ResVT = Node->getValueType(0);
12568 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12569 SDLoc dl(Node);
12570
12571 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
12572 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
12573 SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
12574 SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
12575
12576 // We can't perform arithmetic on i1 values. Extending them would
12577 // probably result in worse codegen, so let's just use two selects instead.
12578 // Some targets are also just better off using selects rather than subtraction
12579 // because one of the conditions can be merged with one of the selects.
12580 // And finally, if we don't know the contents of high bits of a boolean value
12581 // we can't perform any arithmetic either.
12583 BoolVT.getScalarSizeInBits() == 1 ||
12585 SDValue SelectZeroOrOne =
12586 DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
12587 DAG.getConstant(0, dl, ResVT));
12588 return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
12589 SelectZeroOrOne);
12590 }
12591
12593 std::swap(IsGT, IsLT);
12594 return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
12595 ResVT);
12596}
12597
12599 unsigned Opcode = Node->getOpcode();
12600 bool IsSigned = Opcode == ISD::SSHLSAT;
12601 SDValue LHS = Node->getOperand(0);
12602 SDValue RHS = Node->getOperand(1);
12603 EVT VT = LHS.getValueType();
12604 SDLoc dl(Node);
12605
12606 assert((Node->getOpcode() == ISD::SSHLSAT ||
12607 Node->getOpcode() == ISD::USHLSAT) &&
12608 "Expected a SHLSAT opcode");
12609 assert(VT.isInteger() && "Expected operands to be integers");
12610
12612 return DAG.UnrollVectorOp(Node);
12613
12614 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
12615
12616 unsigned BW = VT.getScalarSizeInBits();
12617 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12618 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
12619 SDValue Orig =
12620 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
12621
12622 SDValue SatVal;
12623 if (IsSigned) {
12624 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
12625 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
12626 SDValue Cond =
12627 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
12628 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
12629 } else {
12630 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
12631 }
12632 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
12633 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
12634}
12635
12637 bool Signed, SDValue &Lo, SDValue &Hi,
12638 SDValue LHS, SDValue RHS,
12639 SDValue HiLHS, SDValue HiRHS) const {
12640 EVT VT = LHS.getValueType();
12641 assert(RHS.getValueType() == VT && "Mismatching operand types");
12642
12643 assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));
12644 assert((!Signed || !HiLHS) &&
12645 "Signed flag should only be set when HiLHS and RiRHS are null");
12646
12647 // We'll expand the multiplication by brute force because we have no other
12648 // options. This is a trivially-generalized version of the code from
12649 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
12650 // 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
12651 // sign bits while calculating the Hi half.
12652 unsigned Bits = VT.getSizeInBits();
12653 unsigned HalfBits = Bits / 2;
12654 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
12655 SDValue LL = DAG.getNode(ISD::AND, dl, VT, LHS, Mask);
12656 SDValue RL = DAG.getNode(ISD::AND, dl, VT, RHS, Mask);
12657
12658 SDValue T = DAG.getNode(ISD::MUL, dl, VT, LL, RL);
12659 SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
12660
12661 SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
12662 // This is always an unsigned shift.
12663 SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
12664
12665 unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
12666 SDValue LH = DAG.getNode(ShiftOpc, dl, VT, LHS, Shift);
12667 SDValue RH = DAG.getNode(ShiftOpc, dl, VT, RHS, Shift);
12668
12669 SDValue U =
12670 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RL), TH);
12671 SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
12672 SDValue UH = DAG.getNode(ShiftOpc, dl, VT, U, Shift);
12673
12674 SDValue V =
12675 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LL, RH), UL);
12676 SDValue VH = DAG.getNode(ShiftOpc, dl, VT, V, Shift);
12677
12678 Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
12679 DAG.getNode(ISD::SHL, dl, VT, V, Shift));
12680
12681 Hi = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RH),
12682 DAG.getNode(ISD::ADD, dl, VT, UH, VH));
12683
12684 // If HiLHS and HiRHS are set, multiply them by the opposite low part and add
12685 // the products to Hi.
12686 if (HiLHS) {
12687 SDValue RHLL = DAG.getNode(ISD::MUL, dl, VT, HiRHS, LHS);
12688 SDValue RLLH = DAG.getNode(ISD::MUL, dl, VT, RHS, HiLHS);
12689 Hi = DAG.getNode(ISD::ADD, dl, VT, Hi,
12690 DAG.getNode(ISD::ADD, dl, VT, RHLL, RLLH));
12691 }
12692}
12693
12695 bool Signed, const SDValue LHS,
12696 const SDValue RHS, SDValue &Lo,
12697 SDValue &Hi) const {
12698 EVT VT = LHS.getValueType();
12699 assert(RHS.getValueType() == VT && "Mismatching operand types");
12700 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
12701 // We can fall back to a libcall with an illegal type for the MUL if we
12702 // have a libcall big enough.
12703 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
12704 if (WideVT == MVT::i16)
12705 LC = RTLIB::MUL_I16;
12706 else if (WideVT == MVT::i32)
12707 LC = RTLIB::MUL_I32;
12708 else if (WideVT == MVT::i64)
12709 LC = RTLIB::MUL_I64;
12710 else if (WideVT == MVT::i128)
12711 LC = RTLIB::MUL_I128;
12712
12713 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC);
12714 if (LibcallImpl == RTLIB::Unsupported) {
12715 forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS);
12716 return;
12717 }
12718
12719 SDValue HiLHS, HiRHS;
12720 if (Signed) {
12721 // The high part is obtained by SRA'ing all but one of the bits of low
12722 // part.
12723 unsigned LoSize = VT.getFixedSizeInBits();
12724 SDValue Shift = DAG.getShiftAmountConstant(LoSize - 1, VT, dl);
12725 HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, Shift);
12726 HiRHS = DAG.getNode(ISD::SRA, dl, VT, RHS, Shift);
12727 } else {
12728 HiLHS = DAG.getConstant(0, dl, VT);
12729 HiRHS = DAG.getConstant(0, dl, VT);
12730 }
12731
12732 // Attempt a libcall.
12733 SDValue Ret;
12735 CallOptions.setIsSigned(Signed);
12736 CallOptions.setIsPostTypeLegalization(true);
12738 // Halves of WideVT are packed into registers in different order
12739 // depending on platform endianness. This is usually handled by
12740 // the C calling convention, but we can't defer to it in
12741 // the legalizer.
12742 SDValue Args[] = {LHS, HiLHS, RHS, HiRHS};
12743 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
12744 } else {
12745 SDValue Args[] = {HiLHS, LHS, HiRHS, RHS};
12746 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
12747 }
12749 "Ret value is a collection of constituent nodes holding result.");
12750 if (DAG.getDataLayout().isLittleEndian()) {
12751 // Same as above.
12752 Lo = Ret.getOperand(0);
12753 Hi = Ret.getOperand(1);
12754 } else {
12755 Lo = Ret.getOperand(1);
12756 Hi = Ret.getOperand(0);
12757 }
12758}
12759
12760SDValue
12762 assert((Node->getOpcode() == ISD::SMULFIX ||
12763 Node->getOpcode() == ISD::UMULFIX ||
12764 Node->getOpcode() == ISD::SMULFIXSAT ||
12765 Node->getOpcode() == ISD::UMULFIXSAT) &&
12766 "Expected a fixed point multiplication opcode");
12767
12768 SDLoc dl(Node);
12769 SDValue LHS = Node->getOperand(0);
12770 SDValue RHS = Node->getOperand(1);
12771 EVT VT = LHS.getValueType();
12772 unsigned Scale = Node->getConstantOperandVal(2);
12773 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
12774 Node->getOpcode() == ISD::UMULFIXSAT);
12775 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
12776 Node->getOpcode() == ISD::SMULFIXSAT);
12777 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12778 unsigned VTSize = VT.getScalarSizeInBits();
12779
12780 if (!Scale) {
12781 // [us]mul.fix(a, b, 0) -> mul(a, b)
12782 if (!Saturating) {
12784 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
12785 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
12786 SDValue Result =
12787 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
12788 SDValue Product = Result.getValue(0);
12789 SDValue Overflow = Result.getValue(1);
12790 SDValue Zero = DAG.getConstant(0, dl, VT);
12791
12792 APInt MinVal = APInt::getSignedMinValue(VTSize);
12793 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
12794 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
12795 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
12796 // Xor the inputs, if resulting sign bit is 0 the product will be
12797 // positive, else negative.
12798 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
12799 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
12800 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
12801 return DAG.getSelect(dl, VT, Overflow, Result, Product);
12802 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
12803 SDValue Result =
12804 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
12805 SDValue Product = Result.getValue(0);
12806 SDValue Overflow = Result.getValue(1);
12807
12808 APInt MaxVal = APInt::getMaxValue(VTSize);
12809 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
12810 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
12811 }
12812 }
12813
12814 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
12815 "Expected scale to be less than the number of bits if signed or at "
12816 "most the number of bits if unsigned.");
12817 assert(LHS.getValueType() == RHS.getValueType() &&
12818 "Expected both operands to be the same type");
12819
12820 // Get the upper and lower bits of the result.
12821 SDValue Lo, Hi;
12822 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
12823 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
12824 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
12825 if (isOperationLegalOrCustom(LoHiOp, VT)) {
12826 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
12827 Lo = Result.getValue(0);
12828 Hi = Result.getValue(1);
12829 } else if (isOperationLegalOrCustom(HiOp, VT)) {
12830 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
12831 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
12832 } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
12833 // Try for a multiplication using a wider type.
12834 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
12835 SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
12836 SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
12837 SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
12838 Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
12839 SDValue Shifted =
12840 DAG.getNode(ISD::SRA, dl, WideVT, Res,
12841 DAG.getShiftAmountConstant(VTSize, WideVT, dl));
12842 Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
12843 } else if (VT.isVector()) {
12844 return SDValue();
12845 } else {
12846 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
12847 }
12848
12849 if (Scale == VTSize)
12850 // Result is just the top half since we'd be shifting by the width of the
12851 // operand. Overflow impossible so this works for both UMULFIX and
12852 // UMULFIXSAT.
12853 return Hi;
12854
12855 // The result will need to be shifted right by the scale since both operands
12856 // are scaled. The result is given to us in 2 halves, so we only want part of
12857 // both in the result.
12858 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
12859 DAG.getShiftAmountConstant(Scale, VT, dl));
12860 if (!Saturating)
12861 return Result;
12862
12863 if (!Signed) {
12864 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
12865 // widened multiplication) aren't all zeroes.
12866
12867 // Saturate to max if ((Hi >> Scale) != 0),
12868 // which is the same as if (Hi > ((1 << Scale) - 1))
12869 APInt MaxVal = APInt::getMaxValue(VTSize);
12870 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
12871 dl, VT);
12872 Result = DAG.getSelectCC(dl, Hi, LowMask,
12873 DAG.getConstant(MaxVal, dl, VT), Result,
12874 ISD::SETUGT);
12875
12876 return Result;
12877 }
12878
12879 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
12880 // widened multiplication) aren't all ones or all zeroes.
12881
12882 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
12883 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
12884
12885 if (Scale == 0) {
12886 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
12887 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
12888 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
12889 // Saturated to SatMin if wide product is negative, and SatMax if wide
12890 // product is positive ...
12891 SDValue Zero = DAG.getConstant(0, dl, VT);
12892 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
12893 ISD::SETLT);
12894 // ... but only if we overflowed.
12895 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
12896 }
12897
12898 // We handled Scale==0 above so all the bits to examine is in Hi.
12899
12900 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
12901 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
12902 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
12903 dl, VT);
12904 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
12905 // Saturate to min if (Hi >> (Scale - 1)) < -1),
12906 // which is the same as if (HI < (-1 << (Scale - 1))
12907 SDValue HighMask =
12908 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
12909 dl, VT);
12910 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
12911 return Result;
12912}
12913
12914SDValue
12916 SDValue LHS, SDValue RHS,
12917 unsigned Scale, SelectionDAG &DAG) const {
12918 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
12919 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
12920 "Expected a fixed point division opcode");
12921
12922 EVT VT = LHS.getValueType();
12923 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
12924 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
12925 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12926
12927 // If there is enough room in the type to upscale the LHS or downscale the
12928 // RHS before the division, we can perform it in this type without having to
12929 // resize. For signed operations, the LHS headroom is the number of
12930 // redundant sign bits, and for unsigned ones it is the number of zeroes.
12931 // The headroom for the RHS is the number of trailing zeroes.
12932 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
12934 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
12935
12936 // For signed saturating operations, we need to be able to detect true integer
12937 // division overflow; that is, when you have MIN / -EPS. However, this
12938 // is undefined behavior and if we emit divisions that could take such
12939 // values it may cause undesired behavior (arithmetic exceptions on x86, for
12940 // example).
12941 // Avoid this by requiring an extra bit so that we never get this case.
12942 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
12943 // signed saturating division, we need to emit a whopping 32-bit division.
12944 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
12945 return SDValue();
12946
12947 unsigned LHSShift = std::min(LHSLead, Scale);
12948 unsigned RHSShift = Scale - LHSShift;
12949
12950 // At this point, we know that if we shift the LHS up by LHSShift and the
12951 // RHS down by RHSShift, we can emit a regular division with a final scaling
12952 // factor of Scale.
12953
12954 if (LHSShift)
12955 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
12956 DAG.getShiftAmountConstant(LHSShift, VT, dl));
12957 if (RHSShift)
12958 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
12959 DAG.getShiftAmountConstant(RHSShift, VT, dl));
12960
12961 SDValue Quot;
12962 if (Signed) {
12963 // For signed operations, if the resulting quotient is negative and the
12964 // remainder is nonzero, subtract 1 from the quotient to round towards
12965 // negative infinity.
12966 SDValue Rem;
12967 // FIXME: Ideally we would always produce an SDIVREM here, but if the
12968 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
12969 // we couldn't just form a libcall, but the type legalizer doesn't do it.
12970 if (isTypeLegal(VT) &&
12972 Quot = DAG.getNode(ISD::SDIVREM, dl,
12973 DAG.getVTList(VT, VT),
12974 LHS, RHS);
12975 Rem = Quot.getValue(1);
12976 Quot = Quot.getValue(0);
12977 } else {
12978 Quot = DAG.getNode(ISD::SDIV, dl, VT,
12979 LHS, RHS);
12980 Rem = DAG.getNode(ISD::SREM, dl, VT,
12981 LHS, RHS);
12982 }
12983 SDValue Zero = DAG.getConstant(0, dl, VT);
12984 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
12985 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
12986 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
12987 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
12988 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
12989 DAG.getConstant(1, dl, VT));
12990 Quot = DAG.getSelect(dl, VT,
12991 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
12992 Sub1, Quot);
12993 } else
12994 Quot = DAG.getNode(ISD::UDIV, dl, VT,
12995 LHS, RHS);
12996
12997 return Quot;
12998}
12999
13001 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
13002 SDLoc dl(Node);
13003 SDValue LHS = Node->getOperand(0);
13004 SDValue RHS = Node->getOperand(1);
13005 bool IsAdd = Node->getOpcode() == ISD::UADDO;
13006
13007 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
13008 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
13009 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
13010 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
13011 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
13012 { LHS, RHS, CarryIn });
13013 Result = SDValue(NodeCarry.getNode(), 0);
13014 Overflow = SDValue(NodeCarry.getNode(), 1);
13015 return;
13016 }
13017
13018 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
13019 LHS.getValueType(), LHS, RHS);
13020
13021 EVT ResultType = Node->getValueType(1);
13022 EVT SetCCType = getSetCCResultType(
13023 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
13024 SDValue SetCC;
13025 if (IsAdd && isOneConstant(RHS)) {
13026 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
13027 // the live range of X. We assume comparing with 0 is cheap.
13028 // The general case (X + C) < C is not necessarily beneficial. Although we
13029 // reduce the live range of X, we may introduce the materialization of
13030 // constant C.
13031 SetCC =
13032 DAG.getSetCC(dl, SetCCType, Result,
13033 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
13034 } else if (IsAdd && isAllOnesConstant(RHS)) {
13035 // Special case: uaddo X, -1 overflows if X != 0.
13036 SetCC =
13037 DAG.getSetCC(dl, SetCCType, LHS,
13038 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
13039 } else {
13040 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
13041 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
13042 }
13043 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
13044}
13045
13047 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
13048 SDLoc dl(Node);
13049 SDValue LHS = Node->getOperand(0);
13050 SDValue RHS = Node->getOperand(1);
13051 bool IsAdd = Node->getOpcode() == ISD::SADDO;
13052
13053 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
13054 LHS.getValueType(), LHS, RHS);
13055
13056 EVT ResultType = Node->getValueType(1);
13057 EVT OType = getSetCCResultType(
13058 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
13059
13060 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
13061 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
13062 if (isOperationLegal(OpcSat, LHS.getValueType())) {
13063 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
13064 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
13065 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
13066 return;
13067 }
13068
13069 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
13070
13071 if (IsAdd) {
13072 // For an addition, the result should be less than one of the operands (LHS)
13073 // if and only if the other operand (RHS) is negative, otherwise there will
13074 // be overflow.
13075 SDValue ResultLowerThanLHS =
13076 DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
13077 SDValue RHSNegative = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETLT);
13078 Overflow = DAG.getBoolExtOrTrunc(
13079 DAG.getNode(ISD::XOR, dl, OType, RHSNegative, ResultLowerThanLHS), dl,
13080 ResultType, ResultType);
13081 } else {
13082 // For subtraction, overflow occurs when the signed comparison of operands
13083 // doesn't match the sign of the result.
13084 SDValue LHSLessThanRHS = DAG.getSetCC(dl, OType, LHS, RHS, ISD::SETLT);
13085 SDValue ResultNegative = DAG.getSetCC(dl, OType, Result, Zero, ISD::SETLT);
13086 Overflow = DAG.getBoolExtOrTrunc(
13087 DAG.getNode(ISD::XOR, dl, OType, LHSLessThanRHS, ResultNegative), dl,
13088 ResultType, ResultType);
13089 }
13090}
13091
13093 SDValue &Overflow, SelectionDAG &DAG) const {
13094 SDLoc dl(Node);
13095 EVT VT = Node->getValueType(0);
13096 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
13097 SDValue LHS = Node->getOperand(0);
13098 SDValue RHS = Node->getOperand(1);
13099 bool isSigned = Node->getOpcode() == ISD::SMULO;
13100
13101 // For power-of-two multiplications we can use a simpler shift expansion.
13102 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
13103 const APInt &C = RHSC->getAPIntValue();
13104 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
13105 if (C.isPowerOf2()) {
13106 // smulo(x, signed_min) is same as umulo(x, signed_min).
13107 bool UseArithShift = isSigned && !C.isMinSignedValue();
13108 SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
13109 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
13110 Overflow = DAG.getSetCC(dl, SetCCVT,
13111 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
13112 dl, VT, Result, ShiftAmt),
13113 LHS, ISD::SETNE);
13114 return true;
13115 }
13116 }
13117
13118 SDValue BottomHalf;
13119 SDValue TopHalf;
13120 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
13121
13122 static const unsigned Ops[2][3] =
13125 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
13126 BottomHalf = DAG.getNode(Ops[isSigned][0], dl, DAG.getVTList(VT, VT), LHS,
13127 RHS);
13128 TopHalf = BottomHalf.getValue(1);
13129 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
13130 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
13131 TopHalf = DAG.getNode(Ops[isSigned][1], dl, VT, LHS, RHS);
13132 } else if (isTypeLegal(WideVT)) {
13133 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
13134 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
13135 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
13136 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
13137 SDValue ShiftAmt =
13138 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
13139 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
13140 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
13141 } else {
13142 if (VT.isVector())
13143 return false;
13144
13145 forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
13146 }
13147
13148 Result = BottomHalf;
13149 if (isSigned) {
13150 SDValue ShiftAmt = DAG.getShiftAmountConstant(
13151 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
13152 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
13153 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
13154 } else {
13155 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
13156 DAG.getConstant(0, dl, VT), ISD::SETNE);
13157 }
13158
13159 // Truncate the result if SetCC returns a larger type than needed.
13160 EVT RType = Node->getValueType(1);
13161 if (RType.bitsLT(Overflow.getValueType()))
13162 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
13163
13164 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
13165 "Unexpected result type for S/UMULO legalization");
13166 return true;
13167}
13168
13170 SDLoc dl(Node);
13171 ISD::NodeType BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
13172 SDValue Op = Node->getOperand(0);
13173 SDNodeFlags Flags = Node->getFlags();
13174 EVT VT = Op.getValueType();
13175
13176 // Try to use a shuffle reduction for power of two vectors.
13177 if (VT.isPow2VectorType()) {
13178 // See if the reduction opcode is safe to use with widened types.
13179 bool WidenSrc = false;
13180 switch (Node->getOpcode()) {
13183 case ISD::VECREDUCE_ADD:
13184 case ISD::VECREDUCE_MUL:
13185 case ISD::VECREDUCE_AND:
13186 case ISD::VECREDUCE_OR:
13187 case ISD::VECREDUCE_XOR:
13192 WidenSrc = VT.isFixedLengthVector();
13193 break;
13194 }
13195
13197 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
13198 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT)) {
13199 if (WidenSrc && Op.getOpcode() != ISD::BUILD_VECTOR) {
13200 // Attempt to widen the source vectors to a legal op.
13201 EVT WideVT = getTypeToTransformTo(*DAG.getContext(), HalfVT);
13202 if (WideVT.isVector() &&
13203 WideVT.getScalarType() == HalfVT.getScalarType() &&
13204 WideVT.getVectorNumElements() >= HalfVT.getVectorNumElements() &&
13205 isOperationLegalOrCustom(BaseOpcode, WideVT)) {
13206 SDValue Lo, Hi;
13207 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
13208 Lo = DAG.getInsertSubvector(dl, DAG.getPOISON(WideVT), Lo, 0);
13209 Hi = DAG.getInsertSubvector(dl, DAG.getPOISON(WideVT), Hi, 0);
13210 Op = DAG.getNode(BaseOpcode, dl, WideVT, Lo, Hi, Flags);
13211 Op = DAG.getExtractSubvector(dl, HalfVT, Op, 0);
13212 VT = HalfVT;
13213 continue;
13214 }
13215 }
13216 break;
13217 }
13218
13219 SDValue Lo, Hi;
13220 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
13221 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Flags);
13222 VT = HalfVT;
13223
13224 // Stop if splitting is enough to make the reduction legal.
13225 if (isOperationLegalOrCustom(Node->getOpcode(), HalfVT))
13226 return DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Op,
13227 Flags);
13228 }
13229 }
13230
13231 if (VT.isScalableVector())
13233 "Expanding reductions for scalable vectors is undefined.");
13234
13235 EVT EltVT = VT.getVectorElementType();
13236 unsigned NumElts = VT.getVectorNumElements();
13237
13239 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
13240
13241 SDValue Res = Ops[0];
13242 for (unsigned i = 1; i < NumElts; i++)
13243 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
13244
13245 // Result type may be wider than element type.
13246 if (EltVT != Node->getValueType(0))
13247 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
13248 return Res;
13249}
13250
13252 SDLoc dl(Node);
13253 SDValue AccOp = Node->getOperand(0);
13254 SDValue VecOp = Node->getOperand(1);
13255 SDNodeFlags Flags = Node->getFlags();
13256
13257 EVT VT = VecOp.getValueType();
13258 EVT EltVT = VT.getVectorElementType();
13259
13260 if (VT.isScalableVector())
13262 "Expanding reductions for scalable vectors is undefined.");
13263
13264 unsigned NumElts = VT.getVectorNumElements();
13265
13267 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
13268
13269 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
13270
13271 SDValue Res = AccOp;
13272 for (unsigned i = 0; i < NumElts; i++)
13273 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
13274
13275 return Res;
13276}
13277
13279 SelectionDAG &DAG) const {
13280 EVT VT = Node->getValueType(0);
13281 SDLoc dl(Node);
13282 bool isSigned = Node->getOpcode() == ISD::SREM;
13283 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
13284 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
13285 SDValue Dividend = Node->getOperand(0);
13286 SDValue Divisor = Node->getOperand(1);
13287 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
13288 SDVTList VTs = DAG.getVTList(VT, VT);
13289 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
13290 return true;
13291 }
13292 if (isOperationLegalOrCustom(DivOpc, VT)) {
13293 // X % Y -> X-X/Y*Y
13294 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
13295 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
13296 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
13297 return true;
13298 }
13299 return false;
13300}
13301
13303 SelectionDAG &DAG) const {
13304 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
13305 SDLoc dl(SDValue(Node, 0));
13306 SDValue Src = Node->getOperand(0);
13307
13308 // DstVT is the result type, while SatVT is the size to which we saturate
13309 EVT SrcVT = Src.getValueType();
13310 EVT DstVT = Node->getValueType(0);
13311
13312 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
13313 unsigned SatWidth = SatVT.getScalarSizeInBits();
13314 unsigned DstWidth = DstVT.getScalarSizeInBits();
13315 assert(SatWidth <= DstWidth &&
13316 "Expected saturation width smaller than result width");
13317
13318 // Determine minimum and maximum integer values and their corresponding
13319 // floating-point values.
13320 APInt MinInt, MaxInt;
13321 if (IsSigned) {
13322 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
13323 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
13324 } else {
13325 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
13326 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
13327 }
13328
13329 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
13330 // libcall emission cannot handle this. Large result types will fail.
13331 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
13332 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
13333 SrcVT = Src.getValueType();
13334 }
13335
13336 const fltSemantics &Sem = SrcVT.getFltSemantics();
13337 APFloat MinFloat(Sem);
13338 APFloat MaxFloat(Sem);
13339
13340 APFloat::opStatus MinStatus =
13341 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
13342 APFloat::opStatus MaxStatus =
13343 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
13344 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
13345 !(MaxStatus & APFloat::opStatus::opInexact);
13346
13347 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
13348 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
13349
13350 // If the integer bounds are exactly representable as floats and min/max are
13351 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
13352 // of comparisons and selects.
13353 auto EmitMinMax = [&](unsigned MinOpcode, unsigned MaxOpcode,
13354 bool MayPropagateNaN) {
13355 bool MinMaxLegal = isOperationLegalOrCustom(MinOpcode, SrcVT) &&
13356 isOperationLegalOrCustom(MaxOpcode, SrcVT);
13357 if (!MinMaxLegal)
13358 return SDValue();
13359
13360 SDValue Clamped = Src;
13361
13362 // Clamp Src by MinFloat from below. If !MayPropagateNaN and Src is NaN
13363 // then the result is MinFloat.
13364 Clamped = DAG.getNode(MaxOpcode, dl, SrcVT, Clamped, MinFloatNode);
13365 // Clamp by MaxFloat from above. If !MayPropagateNaN then NaN cannot occur.
13366 Clamped = DAG.getNode(MinOpcode, dl, SrcVT, Clamped, MaxFloatNode);
13367 // Convert clamped value to integer.
13368 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
13369 dl, DstVT, Clamped);
13370
13371 // If !MayPropagateNan and the conversion is unsigned case we're done,
13372 // because we mapped NaN to MinFloat, which will cast to zero.
13373 if (!MayPropagateNaN && !IsSigned)
13374 return FpToInt;
13375
13376 // Otherwise, select 0 if Src is NaN.
13377 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
13378 EVT SetCCVT =
13379 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
13380 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
13381 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
13382 };
13383 if (AreExactFloatBounds) {
13384 if (SDValue Res = EmitMinMax(ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM,
13385 /*MayPropagateNaN=*/false))
13386 return Res;
13387 // These may propagate NaN for sNaN operands.
13388 if (SDValue Res =
13389 EmitMinMax(ISD::FMINNUM, ISD::FMAXNUM, /*MayPropagateNaN=*/true))
13390 return Res;
13391 // These always propagate NaN.
13392 if (SDValue Res =
13393 EmitMinMax(ISD::FMINIMUM, ISD::FMAXIMUM, /*MayPropagateNaN=*/true))
13394 return Res;
13395 }
13396
13397 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
13398 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
13399
13400 // Result of direct conversion. The assumption here is that the operation is
13401 // non-trapping and it's fine to apply it to an out-of-range value if we
13402 // select it away later.
13403 SDValue FpToInt =
13404 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
13405
13406 SDValue Select = FpToInt;
13407
13408 EVT SetCCVT =
13409 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
13410
13411 // If Src ULT MinFloat, select MinInt. In particular, this also selects
13412 // MinInt if Src is NaN.
13413 SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
13414 Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
13415 // If Src OGT MaxFloat, select MaxInt.
13416 SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
13417 Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
13418
13419 // In the unsigned case we are done, because we mapped NaN to MinInt, which
13420 // is already zero.
13421 if (!IsSigned)
13422 return Select;
13423
13424 // Otherwise, select 0 if Src is NaN.
13425 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
13426 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
13427 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
13428}
13429
13431 const SDLoc &dl,
13432 SelectionDAG &DAG) const {
13433 EVT OperandVT = Op.getValueType();
13434 if (OperandVT.getScalarType() == ResultVT.getScalarType())
13435 return Op;
13436 EVT ResultIntVT = ResultVT.changeTypeToInteger();
13437 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
13438 // can induce double-rounding which may alter the results. We can
13439 // correct for this using a trick explained in: Boldo, Sylvie, and
13440 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
13441 // World Congress. 2005.
13442 SDValue Narrow = DAG.getFPExtendOrRound(Op, dl, ResultVT);
13443 SDValue NarrowAsWide = DAG.getFPExtendOrRound(Narrow, dl, OperandVT);
13444
13445 // We can keep the narrow value as-is if narrowing was exact (no
13446 // rounding error), the wide value was NaN (the narrow value is also
13447 // NaN and should be preserved) or if we rounded to the odd value.
13448 SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, Narrow);
13449 SDValue One = DAG.getConstant(1, dl, ResultIntVT);
13450 SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
13451 SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
13452 EVT ResultIntVTCCVT = getSetCCResultType(
13453 DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
13454 SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
13455 // The result is already odd so we don't need to do anything.
13456 SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
13457
13458 EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
13459 Op.getValueType());
13460 // We keep results which are exact, odd or NaN.
13461 SDValue KeepNarrow =
13462 DAG.getSetCC(dl, WideSetCCVT, Op, NarrowAsWide, ISD::SETUEQ);
13463 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
13464 // We morally performed a round-down if AbsNarrow is smaller than
13465 // AbsWide.
13466 SDValue AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
13467 SDValue AbsNarrowAsWide = DAG.getNode(ISD::FABS, dl, OperandVT, NarrowAsWide);
13468 SDValue NarrowIsRd =
13469 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
13470 // If the narrow value is odd or exact, pick it.
13471 // Otherwise, narrow is even and corresponds to either the rounded-up
13472 // or rounded-down value. If narrow is the rounded-down value, we want
13473 // the rounded-up value as it will be odd.
13474 SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
13475 SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
13476 Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
13477 return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
13478}
13479
13481 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
13482 SDValue Op = Node->getOperand(0);
13483 EVT VT = Node->getValueType(0);
13484 SDLoc dl(Node);
13485 if (VT.getScalarType() == MVT::bf16) {
13486 if (Node->getConstantOperandVal(1) == 1) {
13487 return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
13488 }
13489 EVT OperandVT = Op.getValueType();
13490 SDValue IsNaN = DAG.getSetCC(
13491 dl,
13492 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
13493 Op, Op, ISD::SETUO);
13494
13495 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
13496 // can induce double-rounding which may alter the results. We can
13497 // correct for this using a trick explained in: Boldo, Sylvie, and
13498 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
13499 // World Congress. 2005.
13500 EVT F32 = VT.changeElementType(*DAG.getContext(), MVT::f32);
13501 EVT I32 = F32.changeTypeToInteger();
13502 Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
13503 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
13504
13505 // Conversions should set NaN's quiet bit. This also prevents NaNs from
13506 // turning into infinities.
13507 SDValue NaN =
13508 DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
13509
13510 // Factor in the contribution of the low 16 bits.
13511 SDValue One = DAG.getConstant(1, dl, I32);
13512 SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
13513 DAG.getShiftAmountConstant(16, I32, dl));
13514 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
13515 SDValue RoundingBias =
13516 DAG.getNode(ISD::ADD, dl, I32, Lsb, DAG.getConstant(0x7fff, dl, I32));
13517 SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
13518
13519 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
13520 // 0x80000000.
13521 Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
13522
13523 // Now that we have rounded, shift the bits into position.
13524 Op = DAG.getNode(ISD::SRL, dl, I32, Op,
13525 DAG.getShiftAmountConstant(16, I32, dl));
13526 EVT I16 = I32.changeElementType(*DAG.getContext(), MVT::i16);
13527 Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
13528 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
13529 }
13530 return SDValue();
13531}
13532
13534 SelectionDAG &DAG) const {
13535 assert((Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT ||
13536 Node->getOpcode() == ISD::VECTOR_SPLICE_RIGHT) &&
13537 "Unexpected opcode!");
13538 assert((Node->getValueType(0).isScalableVector() ||
13539 !isa<ConstantSDNode>(Node->getOperand(2))) &&
13540 "Fixed length vector types with constant offsets expected to use "
13541 "SHUFFLE_VECTOR!");
13542
13543 EVT VT = Node->getValueType(0);
13544 SDValue V1 = Node->getOperand(0);
13545 SDValue V2 = Node->getOperand(1);
13546 SDValue Offset = Node->getOperand(2);
13547 SDLoc DL(Node);
13548
13549 // Expand through memory thusly:
13550 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
13551 // Store V1, Ptr
13552 // Store V2, Ptr + sizeof(V1)
13553 // if (VECTOR_SPLICE_LEFT)
13554 // Ptr = Ptr + (Offset * sizeof(VT.Elt))
13555 // else
13556 // Ptr = Ptr + sizeof(V1) - (Offset * size(VT.Elt))
13557 // Res = Load Ptr
13558
13559 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
13560
13562 VT.getVectorElementCount() * 2);
13563 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
13564 EVT PtrVT = StackPtr.getValueType();
13565 auto &MF = DAG.getMachineFunction();
13566 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
13567 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
13568
13569 // Store the lo part of CONCAT_VECTORS(V1, V2)
13570 SDValue StoreV1 =
13571 DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo, Alignment);
13572 // Store the hi part of CONCAT_VECTORS(V1, V2)
13573 SDValue VTBytes = DAG.getTypeSize(DL, PtrVT, VT.getStoreSize());
13574 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, VTBytes);
13575 SDValue StoreV2 =
13576 DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo, Alignment);
13577
13578 // NOTE: TrailingBytes must be clamped so as not to read outside of V1:V2.
13579 SDValue EltByteSize =
13580 DAG.getTypeSize(DL, PtrVT, VT.getVectorElementType().getStoreSize());
13581 Offset = DAG.getZExtOrTrunc(Offset, DL, PtrVT);
13582 SDValue TrailingBytes = DAG.getNode(ISD::MUL, DL, PtrVT, Offset, EltByteSize);
13583
13584 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VTBytes);
13585
13586 if (Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT)
13587 StackPtr = DAG.getMemBasePlusOffset(StackPtr, TrailingBytes, DL);
13588 else
13589 StackPtr = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
13590
13591 // Load the spliced result
13592 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
13594}
13595
13597 SelectionDAG &DAG) const {
13598 SDLoc DL(Node);
13599 SDValue Vec = Node->getOperand(0);
13600 SDValue Mask = Node->getOperand(1);
13601 SDValue Passthru = Node->getOperand(2);
13602
13603 EVT VecVT = Vec.getValueType();
13604 EVT ScalarVT = VecVT.getScalarType();
13605 EVT MaskVT = Mask.getValueType();
13606 EVT MaskScalarVT = MaskVT.getScalarType();
13607
13608 // Needs to be handled by targets that have scalable vector types.
13609 if (VecVT.isScalableVector())
13610 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
13611
13612 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
13613 SDValue StackPtr = DAG.CreateStackTemporary(VecVT.getStoreSize(), Alignment);
13614 int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
13615 MachinePointerInfo PtrInfo =
13617
13618 MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
13619 SDValue Chain = DAG.getEntryNode();
13620 SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
13621
13622 bool HasPassthru = !Passthru.isUndef();
13623
13624 // If we have a passthru vector, store it on the stack, overwrite the matching
13625 // positions and then re-write the last element that was potentially
13626 // overwritten even though mask[i] = false.
13627 if (HasPassthru)
13628 Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo, Alignment);
13629
13630 SDValue LastWriteVal;
13631 APInt PassthruSplatVal;
13632 bool IsSplatPassthru =
13633 ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
13634
13635 if (IsSplatPassthru) {
13636 // As we do not know which position we wrote to last, we cannot simply
13637 // access that index from the passthru vector. So we first check if passthru
13638 // is a splat vector, to use any element ...
13639 LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
13640 } else if (HasPassthru) {
13641 // ... if it is not a splat vector, we need to get the passthru value at
13642 // position = popcount(mask) and re-load it from the stack before it is
13643 // overwritten in the loop below.
13644 EVT PopcountVT = ScalarVT.changeTypeToInteger();
13645 SDValue Popcount = DAG.getNode(
13647 MaskVT.changeVectorElementType(*DAG.getContext(), MVT::i1), Mask);
13648 Popcount = DAG.getNode(
13650 MaskVT.changeVectorElementType(*DAG.getContext(), PopcountVT),
13651 Popcount);
13652 Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, PopcountVT, Popcount);
13653 SDValue LastElmtPtr =
13654 getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
13655 LastWriteVal = DAG.getLoad(
13656 ScalarVT, DL, Chain, LastElmtPtr,
13658 Chain = LastWriteVal.getValue(1);
13659 }
13660
13661 unsigned NumElms = VecVT.getVectorNumElements();
13662 for (unsigned I = 0; I < NumElms; I++) {
13663 SDValue ValI = DAG.getExtractVectorElt(DL, ScalarVT, Vec, I);
13664 SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
13665 Chain = DAG.getStore(
13666 Chain, DL, ValI, OutPtr,
13668
13669 // Get the mask value and add it to the current output position. This
13670 // either increments by 1 if MaskI is true or adds 0 otherwise.
13671 // Freeze in case we have poison/undef mask entries.
13672 SDValue MaskI = DAG.getExtractVectorElt(DL, MaskScalarVT, Mask, I);
13673 MaskI = DAG.getFreeze(MaskI);
13674 MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
13675 MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
13676 OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
13677
13678 if (HasPassthru && I == NumElms - 1) {
13679 SDValue EndOfVector =
13680 DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
13681 SDValue AllLanesSelected =
13682 DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
13683 OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
13684 OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
13685
13686 // Re-write the last ValI if all lanes were selected. Otherwise,
13687 // overwrite the last write it with the passthru value.
13688 LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,
13689 LastWriteVal, SDNodeFlags::Unpredictable);
13690 Chain = DAG.getStore(
13691 Chain, DL, LastWriteVal, OutPtr,
13693 }
13694 }
13695
13696 return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo, Alignment);
13697}
13698
13700 SDLoc DL(Node);
13701 EVT VT = Node->getValueType(0);
13702
13703 bool ZeroIsPoison = Node->getOpcode() == ISD::CTTZ_ELTS_ZERO_POISON;
13704 auto [Mask, StepVec] =
13705 getLegalMaskAndStepVector(Node->getOperand(0), ZeroIsPoison, DL, DAG);
13706
13707 // No legal step vector: split mask in half and recombine results.
13708 // LoNumElts uses the non-poison CTTZ_ELTS so its result is well-defined
13709 // (== LoNumElts when no active lane), allowing the SETNE comparison.
13710 // Result: (ResLo != LoNumElts) ? ResLo : (LoNumElts + ResHi)
13711 if (!StepVec) {
13712 EVT ResVT = Node->getValueType(0);
13713 auto [MaskLo, MaskHi] = DAG.SplitVector(Node->getOperand(0), DL);
13714 SDValue LoNumElts = DAG.getElementCount(
13715 DL, ResVT, MaskLo.getValueType().getVectorElementCount());
13716 SDValue ResLo = DAG.getNode(ISD::CTTZ_ELTS, DL, ResVT, MaskLo);
13717 SDValue ResHi = DAG.getNode(Node->getOpcode(), DL, ResVT, MaskHi);
13718 SDValue ResLoNotNumElts = DAG.getSetCC(
13719 DL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ResVT),
13720 ResLo, LoNumElts, ISD::SETNE);
13721 // Per LangRef, ResVT must be wide enough to hold the total element count,
13722 // so the sum cannot wrap as an unsigned add. NSW is not guaranteed since
13723 // the count is only required to fit unsigned.
13724 SDValue Sum = DAG.getNode(ISD::ADD, DL, ResVT, LoNumElts, ResHi,
13726 return DAG.getSelect(DL, ResVT, ResLoNotNumElts, ResLo, Sum);
13727 }
13728
13729 EVT StepVecVT = StepVec.getValueType();
13730 EVT StepVT = StepVecVT.getVectorElementType();
13731
13732 // Promote the scalar result type early to avoid redundant zexts.
13734 StepVT = getTypeToTransformTo(*DAG.getContext(), StepVT);
13735
13736 SDValue VL =
13737 DAG.getElementCount(DL, StepVT, StepVecVT.getVectorElementCount());
13738 SDValue SplatVL = DAG.getSplat(StepVecVT, DL, VL);
13739 StepVec = DAG.getNode(ISD::SUB, DL, StepVecVT, SplatVL, StepVec);
13740 SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT);
13741 SDValue Select = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
13743 StepVecVT.getVectorElementType(), Select);
13744 SDValue Sub = DAG.getNode(ISD::SUB, DL, StepVT, VL,
13745 DAG.getZExtOrTrunc(Max, DL, StepVT));
13746
13747 return DAG.getZExtOrTrunc(Sub, DL, VT);
13748}
13749
13751 SelectionDAG &DAG) const {
13752 SDLoc DL(N);
13753 SDValue Acc = N->getOperand(0);
13754 SDValue MulLHS = N->getOperand(1);
13755 SDValue MulRHS = N->getOperand(2);
13756 EVT AccVT = Acc.getValueType();
13757 EVT MulOpVT = MulLHS.getValueType();
13758
13759 EVT ExtMulOpVT =
13761 MulOpVT.getVectorElementCount());
13762
13763 unsigned ExtOpcLHS, ExtOpcRHS;
13764 switch (N->getOpcode()) {
13765 default:
13766 llvm_unreachable("Unexpected opcode");
13768 ExtOpcLHS = ExtOpcRHS = ISD::ZERO_EXTEND;
13769 break;
13771 ExtOpcLHS = ExtOpcRHS = ISD::SIGN_EXTEND;
13772 break;
13774 ExtOpcLHS = ExtOpcRHS = ISD::FP_EXTEND;
13775 break;
13776 }
13777
13778 if (ExtMulOpVT != MulOpVT) {
13779 MulLHS = DAG.getNode(ExtOpcLHS, DL, ExtMulOpVT, MulLHS);
13780 MulRHS = DAG.getNode(ExtOpcRHS, DL, ExtMulOpVT, MulRHS);
13781 }
13782 SDValue Input = MulLHS;
13783 if (N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA) {
13784 if (!llvm::isOneOrOneSplatFP(MulRHS))
13785 Input = DAG.getNode(ISD::FMUL, DL, ExtMulOpVT, MulLHS, MulRHS);
13786 } else if (!llvm::isOneOrOneSplat(MulRHS)) {
13787 Input = DAG.getNode(ISD::MUL, DL, ExtMulOpVT, MulLHS, MulRHS);
13788 }
13789
13790 unsigned Stride = AccVT.getVectorMinNumElements();
13791 unsigned ScaleFactor = MulOpVT.getVectorMinNumElements() / Stride;
13792
13793 // Collect all of the subvectors
13794 std::deque<SDValue> Subvectors = {Acc};
13795 for (unsigned I = 0; I < ScaleFactor; I++)
13796 Subvectors.push_back(DAG.getExtractSubvector(DL, AccVT, Input, I * Stride));
13797
13798 unsigned FlatNode =
13799 N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA ? ISD::FADD : ISD::ADD;
13800
13801 // Flatten the subvector tree
13802 while (Subvectors.size() > 1) {
13803 Subvectors.push_back(
13804 DAG.getNode(FlatNode, DL, AccVT, {Subvectors[0], Subvectors[1]}));
13805 Subvectors.pop_front();
13806 Subvectors.pop_front();
13807 }
13808
13809 assert(Subvectors.size() == 1 &&
13810 "There should only be one subvector after tree flattening");
13811
13812 return Subvectors[0];
13813}
13814
13815/// Given a store node \p StoreNode, return true if it is safe to fold that node
13816/// into \p FPNode, which expands to a library call with output pointers.
13818 SDNode *FPNode) {
13820 SmallVector<const SDNode *, 8> DeferredNodes;
13822
13823 // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode).
13824 for (SDValue Op : StoreNode->ops())
13825 if (Op.getNode() != FPNode)
13826 Worklist.push_back(Op.getNode());
13827
13829 while (!Worklist.empty()) {
13830 const SDNode *Node = Worklist.pop_back_val();
13831 auto [_, Inserted] = Visited.insert(Node);
13832 if (!Inserted)
13833 continue;
13834
13835 if (MaxSteps > 0 && Visited.size() >= MaxSteps)
13836 return false;
13837
13838 // Reached the FPNode (would result in a cycle).
13839 // OR Reached CALLSEQ_START (would result in nested call sequences).
13840 if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START)
13841 return false;
13842
13843 if (Node->getOpcode() == ISD::CALLSEQ_END) {
13844 // Defer looking into call sequences (so we can check we're outside one).
13845 // We still need to look through these for the predecessor check.
13846 DeferredNodes.push_back(Node);
13847 continue;
13848 }
13849
13850 for (SDValue Op : Node->ops())
13851 Worklist.push_back(Op.getNode());
13852 }
13853
13854 // True if we're outside a call sequence and don't have the FPNode as a
13855 // predecessor. No cycles or nested call sequences possible.
13856 return !SDNode::hasPredecessorHelper(FPNode, Visited, DeferredNodes,
13857 MaxSteps);
13858}
13859
13861 SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node,
13863 std::optional<unsigned> CallRetResNo) const {
13864 if (LC == RTLIB::UNKNOWN_LIBCALL)
13865 return false;
13866
13867 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC);
13868 if (LibcallImpl == RTLIB::Unsupported)
13869 return false;
13870
13871 LLVMContext &Ctx = *DAG.getContext();
13872 EVT VT = Node->getValueType(0);
13873 unsigned NumResults = Node->getNumValues();
13874
13875 // Find users of the node that store the results (and share input chains). The
13876 // destination pointers can be used instead of creating stack allocations.
13877 SDValue StoresInChain;
13878 SmallVector<StoreSDNode *, 2> ResultStores(NumResults);
13879 for (SDNode *User : Node->users()) {
13881 continue;
13882 auto *ST = cast<StoreSDNode>(User);
13883 SDValue StoreValue = ST->getValue();
13884 unsigned ResNo = StoreValue.getResNo();
13885 // Ensure the store corresponds to an output pointer.
13886 if (CallRetResNo == ResNo)
13887 continue;
13888 // Ensure the store to the default address space and not atomic or volatile.
13889 if (!ST->isSimple() || ST->getAddressSpace() != 0)
13890 continue;
13891 // Ensure all store chains are the same (so they don't alias).
13892 if (StoresInChain && ST->getChain() != StoresInChain)
13893 continue;
13894 // Ensure the store is properly aligned.
13895 Type *StoreType = StoreValue.getValueType().getTypeForEVT(Ctx);
13896 if (ST->getAlign() <
13897 DAG.getDataLayout().getABITypeAlign(StoreType->getScalarType()))
13898 continue;
13899 // Avoid:
13900 // 1. Creating cyclic dependencies.
13901 // 2. Expanding the node to a call within a call sequence.
13903 continue;
13904 ResultStores[ResNo] = ST;
13905 StoresInChain = ST->getChain();
13906 }
13907
13908 ArgListTy Args;
13909
13910 // Pass the arguments.
13911 for (const SDValue &Op : Node->op_values()) {
13912 EVT ArgVT = Op.getValueType();
13913 Type *ArgTy = ArgVT.getTypeForEVT(Ctx);
13914 Args.emplace_back(Op, ArgTy);
13915 }
13916
13917 // Pass the output pointers.
13918 SmallVector<SDValue, 2> ResultPtrs(NumResults);
13920 for (auto [ResNo, ST] : llvm::enumerate(ResultStores)) {
13921 if (ResNo == CallRetResNo)
13922 continue;
13923 EVT ResVT = Node->getValueType(ResNo);
13924 SDValue ResultPtr = ST ? ST->getBasePtr() : DAG.CreateStackTemporary(ResVT);
13925 ResultPtrs[ResNo] = ResultPtr;
13926 Args.emplace_back(ResultPtr, PointerTy);
13927 }
13928
13929 SDLoc DL(Node);
13930
13932 // Pass the vector mask (if required).
13933 EVT MaskVT = getSetCCResultType(DAG.getDataLayout(), Ctx, VT);
13934 SDValue Mask = DAG.getBoolConstant(true, DL, MaskVT, VT);
13935 Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx));
13936 }
13937
13938 Type *RetType = CallRetResNo.has_value()
13939 ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx)
13940 : Type::getVoidTy(Ctx);
13941 SDValue InChain = StoresInChain ? StoresInChain : DAG.getEntryNode();
13942 SDValue Callee =
13943 DAG.getExternalSymbol(LibcallImpl, getPointerTy(DAG.getDataLayout()));
13945 CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(
13946 getLibcallImplCallingConv(LibcallImpl), RetType, Callee, std::move(Args));
13947
13948 auto [Call, CallChain] = LowerCallTo(CLI);
13949
13950 for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) {
13951 if (ResNo == CallRetResNo) {
13952 Results.push_back(Call);
13953 continue;
13954 }
13955 MachinePointerInfo PtrInfo;
13956 SDValue LoadResult = DAG.getLoad(Node->getValueType(ResNo), DL, CallChain,
13957 ResultPtr, PtrInfo);
13958 SDValue OutChain = LoadResult.getValue(1);
13959
13960 if (StoreSDNode *ST = ResultStores[ResNo]) {
13961 // Replace store with the library call.
13962 DAG.ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain);
13963 PtrInfo = ST->getPointerInfo();
13964 } else {
13966 DAG.getMachineFunction(),
13967 cast<FrameIndexSDNode>(ResultPtr)->getIndex());
13968 }
13969
13970 Results.push_back(LoadResult);
13971 }
13972
13973 return true;
13974}
13975
13977 SDValue &LHS, SDValue &RHS,
13978 SDValue &CC, SDValue Mask,
13979 SDValue EVL, bool &NeedInvert,
13980 const SDLoc &dl, SDValue &Chain,
13981 bool IsSignaling) const {
13982 MVT OpVT = LHS.getSimpleValueType();
13983 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
13984 NeedInvert = false;
13985 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
13986 bool IsNonVP = !EVL;
13987 switch (getCondCodeAction(CCCode, OpVT)) {
13988 default:
13989 llvm_unreachable("Unknown condition code action!");
13991 // Nothing to do.
13992 break;
13995 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
13996 std::swap(LHS, RHS);
13997 CC = DAG.getCondCode(InvCC);
13998 return true;
13999 }
14000 // Swapping operands didn't work. Try inverting the condition.
14001 bool NeedSwap = false;
14002 InvCC = getSetCCInverse(CCCode, OpVT);
14003 if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
14004 // If inverting the condition is not enough, try swapping operands
14005 // on top of it.
14006 InvCC = ISD::getSetCCSwappedOperands(InvCC);
14007 NeedSwap = true;
14008 }
14009 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
14010 CC = DAG.getCondCode(InvCC);
14011 NeedInvert = true;
14012 if (NeedSwap)
14013 std::swap(LHS, RHS);
14014 return true;
14015 }
14016
14017 // Special case: expand i1 comparisons using logical operations.
14018 if (OpVT == MVT::i1) {
14019 SDValue Ret;
14020 switch (CCCode) {
14021 default:
14022 llvm_unreachable("Unknown integer setcc!");
14023 case ISD::SETEQ: // X == Y --> ~(X ^ Y)
14024 Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS),
14025 MVT::i1);
14026 break;
14027 case ISD::SETNE: // X != Y --> (X ^ Y)
14028 Ret = DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS);
14029 break;
14030 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
14031 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
14032 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, RHS,
14033 DAG.getNOT(dl, LHS, MVT::i1));
14034 break;
14035 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
14036 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
14037 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, LHS,
14038 DAG.getNOT(dl, RHS, MVT::i1));
14039 break;
14040 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
14041 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
14042 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, RHS,
14043 DAG.getNOT(dl, LHS, MVT::i1));
14044 break;
14045 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
14046 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
14047 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, LHS,
14048 DAG.getNOT(dl, RHS, MVT::i1));
14049 break;
14050 }
14051
14052 LHS = DAG.getZExtOrTrunc(Ret, dl, VT);
14053 RHS = SDValue();
14054 CC = SDValue();
14055 return true;
14056 }
14057
14059 unsigned Opc = 0;
14060 switch (CCCode) {
14061 default:
14062 llvm_unreachable("Don't know how to expand this condition!");
14063 case ISD::SETUO:
14064 if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
14065 CC1 = ISD::SETUNE;
14066 CC2 = ISD::SETUNE;
14067 Opc = ISD::OR;
14068 break;
14069 }
14071 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
14072 NeedInvert = true;
14073 [[fallthrough]];
14074 case ISD::SETO:
14076 "If SETO is expanded, SETOEQ must be legal!");
14077 CC1 = ISD::SETOEQ;
14078 CC2 = ISD::SETOEQ;
14079 Opc = ISD::AND;
14080 break;
14081 case ISD::SETONE:
14082 case ISD::SETUEQ:
14083 // If the SETUO or SETO CC isn't legal, we might be able to use
14084 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
14085 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
14086 // the operands.
14087 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
14088 if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
14089 isCondCodeLegal(ISD::SETOLT, OpVT))) {
14090 CC1 = ISD::SETOGT;
14091 CC2 = ISD::SETOLT;
14092 Opc = ISD::OR;
14093 NeedInvert = ((unsigned)CCCode & 0x8U);
14094 break;
14095 }
14096 [[fallthrough]];
14097 case ISD::SETOEQ:
14098 case ISD::SETOGT:
14099 case ISD::SETOGE:
14100 case ISD::SETOLT:
14101 case ISD::SETOLE:
14102 case ISD::SETUNE:
14103 case ISD::SETUGT:
14104 case ISD::SETUGE:
14105 case ISD::SETULT:
14106 case ISD::SETULE:
14107 // If we are floating point, assign and break, otherwise fall through.
14108 if (!OpVT.isInteger()) {
14109 // We can use the 4th bit to tell if we are the unordered
14110 // or ordered version of the opcode.
14111 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
14112 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
14113 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
14114 break;
14115 }
14116 // Fallthrough if we are unsigned integer.
14117 [[fallthrough]];
14118 case ISD::SETLE:
14119 case ISD::SETGT:
14120 case ISD::SETGE:
14121 case ISD::SETLT:
14122 case ISD::SETNE:
14123 case ISD::SETEQ:
14124 // If all combinations of inverting the condition and swapping operands
14125 // didn't work then we have no means to expand the condition.
14126 llvm_unreachable("Don't know how to expand this condition!");
14127 }
14128
14129 SDValue SetCC1, SetCC2;
14130 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
14131 // If we aren't the ordered or unorder operation,
14132 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
14133 if (IsNonVP) {
14134 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
14135 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
14136 } else {
14137 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
14138 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
14139 }
14140 } else {
14141 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
14142 if (IsNonVP) {
14143 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
14144 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
14145 } else {
14146 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
14147 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
14148 }
14149 }
14150 if (Chain)
14151 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
14152 SetCC2.getValue(1));
14153 if (IsNonVP)
14154 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
14155 else {
14156 // Transform the binary opcode to the VP equivalent.
14157 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
14158 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
14159 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
14160 }
14161 RHS = SDValue();
14162 CC = SDValue();
14163 return true;
14164 }
14165 }
14166 return false;
14167}
14168
14170 SelectionDAG &DAG) const {
14171 EVT VT = Node->getValueType(0);
14172 // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
14173 // split into two equal parts.
14174 if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))
14175 return SDValue();
14176
14177 // Restrict expansion to cases where both parts can be concatenated.
14178 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
14179 if (LoVT != HiVT || !isTypeLegal(LoVT))
14180 return SDValue();
14181
14182 SDLoc DL(Node);
14183 unsigned Opcode = Node->getOpcode();
14184
14185 // Don't expand if the result is likely to be unrolled anyway.
14186 if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))
14187 return SDValue();
14188
14189 SmallVector<SDValue, 4> LoOps, HiOps;
14190 for (const SDValue &V : Node->op_values()) {
14191 auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT);
14192 LoOps.push_back(Lo);
14193 HiOps.push_back(Hi);
14194 }
14195
14196 SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps, Node->getFlags());
14197 SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps, Node->getFlags());
14198 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi);
14199}
14200
14202 const SDLoc &DL,
14203 EVT InVecVT, SDValue EltNo,
14204 LoadSDNode *OriginalLoad,
14205 SelectionDAG &DAG) const {
14206 assert(OriginalLoad->isSimple());
14207
14208 EVT VecEltVT = InVecVT.getVectorElementType();
14209
14210 // If the vector element type is not a multiple of a byte then we are unable
14211 // to correctly compute an address to load only the extracted element as a
14212 // scalar.
14213 if (!VecEltVT.isByteSized())
14214 return SDValue();
14215
14216 ISD::LoadExtType ExtTy =
14217 ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
14218 if (!isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
14219 return SDValue();
14220
14221 std::optional<unsigned> ByteOffset;
14222 Align Alignment = OriginalLoad->getAlign();
14224 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
14225 int Elt = ConstEltNo->getZExtValue();
14226 ByteOffset = VecEltVT.getSizeInBits() * Elt / 8;
14227 MPI = OriginalLoad->getPointerInfo().getWithOffset(*ByteOffset);
14228 Alignment = commonAlignment(Alignment, *ByteOffset);
14229 } else {
14230 // Discard the pointer info except the address space because the memory
14231 // operand can't represent this new access since the offset is variable.
14232 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
14233 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
14234 }
14235
14236 if (!shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT, ByteOffset))
14237 return SDValue();
14238
14239 unsigned IsFast = 0;
14240 if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
14241 OriginalLoad->getAddressSpace(), Alignment,
14242 OriginalLoad->getMemOperand()->getFlags(), &IsFast) ||
14243 !IsFast)
14244 return SDValue();
14245
14246 // The original DAG loaded the entire vector from memory, so arithmetic
14247 // within it must be inbounds.
14249 DAG, OriginalLoad->getBasePtr(), InVecVT, EltNo);
14250
14251 // We are replacing a vector load with a scalar load. The new load must have
14252 // identical memory op ordering to the original.
14253 SDValue Load;
14254 if (ResultVT.bitsGT(VecEltVT)) {
14255 // If the result type of vextract is wider than the load, then issue an
14256 // extending load instead.
14257 ISD::LoadExtType ExtType =
14258 isLoadLegal(ResultVT, VecEltVT, Alignment,
14259 OriginalLoad->getAddressSpace(), ISD::ZEXTLOAD, false)
14261 : ISD::EXTLOAD;
14262 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
14263 NewPtr, MPI, VecEltVT, Alignment,
14264 OriginalLoad->getMemOperand()->getFlags(),
14265 OriginalLoad->getAAInfo());
14266 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
14267 } else {
14268 // The result type is narrower or the same width as the vector element
14269 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
14270 Alignment, OriginalLoad->getMemOperand()->getFlags(),
14271 OriginalLoad->getAAInfo());
14272 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
14273 if (ResultVT.bitsLT(VecEltVT))
14274 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
14275 else
14276 Load = DAG.getBitcast(ResultVT, Load);
14277 }
14278
14279 return Load;
14280}
14281
14282// Set type id for call site info and metadata 'call_target'.
14283// We are filtering for:
14284// a) The call-graph-section use case that wants to know about indirect
14285// calls, or
14286// b) We want to annotate indirect calls.
14288 const CallBase *CB, MachineFunction &MF,
14289 MachineFunction::CallSiteInfo &CSInfo) const {
14290 if (CB && CB->isIndirectCall() &&
14293 CSInfo = MachineFunction::CallSiteInfo(*CB);
14294}
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT F32
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool isSigned(unsigned Opcode)
#define _
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
lazy value info
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static bool isUndef(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
#define T
#define T1
uint64_t High
#define P(N)
Function const char * Passes
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static std::pair< SDValue, SDValue > getLegalMaskAndStepVector(SDValue Mask, bool ZeroIsPoison, SDLoc DL, SelectionDAG &DAG)
Returns a type-legalized version of Mask as the first item in the pair.
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact UDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static bool canNarrowCLMULToLegal(const TargetLowering &TLI, LLVMContext &Ctx, EVT VT, unsigned HalveDepth=0, unsigned TotalDepth=0)
Check if CLMUL on VT can eventually reach a type with legal CLMUL through a chain of halving decompos...
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode, SDNode *FPNode)
Given a store node StoreNode, return true if it is safe to fold that node into FPNode,...
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineShiftToAVG(SDValue Op, TargetLowering::TargetLoweringOpt &TLO, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
This file describes how to lower LLVM code to machine code.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static LLVM_ABI const llvm::fltSemantics & EnumToSemantics(Semantics S)
Definition APFloat.cpp:113
static constexpr roundingMode rmTowardZero
Definition APFloat.h:349
static LLVM_ABI ExponentType semanticsMinExponent(const fltSemantics &)
Definition APFloat.cpp:237
static LLVM_ABI unsigned getSizeInBits(const fltSemantics &Sem)
Returns the size of the floating point number (in bits) in the given semantics.
Definition APFloat.cpp:293
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:345
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:229
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition APFloat.cpp:270
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:361
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1412
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.h:1223
APInt bitcastToAPInt() const
Definition APFloat.h:1436
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1203
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1163
void changeSign()
Definition APFloat.h:1362
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1174
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1599
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1793
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1429
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition APInt.h:424
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1414
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1408
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1076
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1535
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1692
void setSignBit()
Set the sign bit to 1.
Definition APInt.h:1363
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:217
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1256
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1419
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:841
void negate()
Negate this APInt in place.
Definition APInt.h:1491
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1621
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:652
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1554
unsigned countLeadingZeros() const
Definition APInt.h:1629
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:398
void clearLowBits(unsigned loBits)
Set bottom loBits bits to 0.
Definition APInt.h:1458
unsigned logBase2() const
Definition APInt.h:1784
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:834
void setAllBits()
Set every bit to 1.
Definition APInt.h:1342
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1300
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition APInt.h:406
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1157
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1028
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition APInt.h:1390
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:880
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
void clearBits(unsigned LoBit, unsigned HiBit)
Clear the bits from LoBit (inclusive) to HiBit (exclusive) to 0.
Definition APInt.h:1440
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1411
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:483
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
void clearHighBits(unsigned hiBits)
Set top hiBits bits to 0.
Definition APInt.h:1465
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1679
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
Definition APInt.h:1366
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:872
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
This class represents a range of values.
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:217
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
std::vector< std::string > ConstraintCodeVector
Definition InlineAsm.h:104
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:350
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:213
Machine Value Type.
SimpleValueType SimpleTy
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Function & getFunction()
Return the LLVM function that this machine code represents.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
Flags getFlags() const
Return the raw flags of the source value,.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MCRegister getLiveInPhysReg(Register VReg) const
getLiveInPhysReg - If VReg is a live-in virtual register, return the corresponding live-in physical r...
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition Module.h:447
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
iterator end() const
Definition ArrayRef.h:339
iterator begin() const
Definition ArrayRef.h:338
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC)
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI bool isKnownNeverLogicalZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Test whether the given floating point SDValue (or all elements of it, if it is a vector) is known to ...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
LLVM_ABI SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl, SDNodeFlags Flags={})
Constant fold a setcc to true or false.
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
LLVM_ABI std::optional< unsigned > getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has shift amounts that are all less than the element bit-width of the shift n...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
static LLVM_ABI unsigned getHasPredecessorMaxSteps()
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
LLVM_ABI bool shouldOptForSize() const
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI std::optional< unsigned > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getTypeSize(const SDLoc &DL, EVT VT, TypeSize TS)
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isIdentityElement(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo, unsigned Depth=0) const
Returns true if V is an identity element of Opc with Flags.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, UndefPoisonKind Kind=UndefPoisonKind::UndefOrPoison, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, Kind can be used to track poison ...
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI bool isKnownToBeAPowerOfTwo(SDValue Val, bool OrZero=false, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:138
iterator end() const
Definition StringRef.h:116
Class to represent struct types.
LLVM_ABI void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
unsigned getBitWidthForCttzElements(EVT RetVT, ElementCount EC, bool ZeroIsPoison, const ConstantRange *VScaleRange) const
Return the minimum number of bits required to hold the maximum possible number of trailing zero vecto...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool preferSelectsOverBooleanArithmetic(EVT VT) const
Should we prefer selects to doing arithmetic on boolean types.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const
Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
EVT getLegalTypeToTransformTo(LLVMContext &Context, EVT VT) const
Perform getTypeToTransformTo repeatedly until a legal type is obtained.
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
CallingConv::ID getLibcallImplCallingConv(RTLIB::LibcallImpl Call) const
Get the CallingConv that should be used for the specified libcall implementation.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
ISD::CondCode getSoftFloatCmpLibcallPredicate(RTLIB::LibcallImpl Call) const
Get the comparison predicate that's to be used to test the result of the comparison libcall against z...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
TargetLoweringBase(const TargetMachine &TM, const TargetSubtargetInfo &STI)
NOTE: The TargetMachine owns TLOF.
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
BooleanContent
Enum that describes how the target represents true/false values.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
virtual EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom for a comparison of the specified type...
bool isLoadLegal(EVT ValVT, EVT MemVT, Align Alignment, unsigned AddrSpace, unsigned ExtType, bool Atomic) const
Return true if the specified load with extension is legal on this target.
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
bool expandMultipleResultFPLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl< SDValue > &Results, std::optional< unsigned > CallRetResNo={}) const
Expands a node with multiple results to an FP or vector libcall.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_POISON nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
SmallVector< ConstraintPair > ConstraintGroup
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
virtual Align computeKnownAlignForTargetInstr(GISelValueTracking &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, const SDValue LHS, const SDValue RHS, SDValue &Lo, SDValue &Hi) const
Calculate full product of LHS and RHS either via a libcall or through brute force expansion of the mu...
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandFCANONICALIZE(SDNode *Node, SelectionDAG &DAG) const
Expand FCANONICALIZE to FMUL with 1.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_POISON nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_POISON nodes.
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue expandCLMUL(SDNode *N, SelectionDAG &DAG) const
Expand carryless multiply.
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
virtual bool isTargetCanonicalConstantNode(SDValue Op) const
Returns true if the given Opc is considered a canonical constant for the target, which should not be ...
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
SDValue expandCttzElts(SDNode *Node, SelectionDAG &DAG) const
Expand a CTTZ_ELTS or CTTZ_ELTS_ZERO_POISON by calculating (VL - i) for each active lane (i),...
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual unsigned computeNumSignBitsForTargetInstr(GISelValueTracking &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, EVT *LargestVT=nullptr) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual SDValue unwrapAddress(SDValue N) const
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, UndefPoisonKind Kind, unsigned Depth) const
Return true if this function can prove that Op is never poison and, Kind can be used to track poison ...
SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_POISON nodes.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimumnum/fmaximumnum into multiple comparison with selects.
void forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl, bool Signed, SDValue &Lo, SDValue &Hi, SDValue LHS, SDValue RHS, SDValue HiLHS=SDValue(), SDValue HiRHS=SDValue()) const
Calculate the product twice the width of LHS and RHS.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index, const SDNodeFlags PtrArithFlags=SDNodeFlags()) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
SDValue expandLoopDependenceMask(SDNode *N, SelectionDAG &DAG) const
Expand LOOP_DEPENDENCE_MASK nodes.
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
virtual void computeKnownBitsForTargetInstr(GISelValueTracking &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const
~TargetLowering() override
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using an n/2-bit algorithm.
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isPositionIndependent() const
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
TargetLowering(const TargetLowering &)=delete
virtual bool shouldSimplifyDemandedVectorElts(SDValue Op, const TargetLoweringOpt &TLO) const
Return true if the target supports simplifying demanded vector elements by converting them to undefs.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
SDValue expandCONVERT_TO_ARBITRARY_FP(SDNode *Node, SelectionDAG &DAG) const
Expand CONVERT_TO_ARBITRARY_FP using bit manipulation.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
virtual bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const
For most targets, an LLVM type must be broken down into multiple smaller types.
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
SDValue expandPEXT(SDNode *N, SelectionDAG &DAG) const
Expand parallel bit extract (compress).
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, UndefPoisonKind Kind, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_POISON nodes.
SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const
Expand a vector VECTOR_COMPRESS into a sequence of extract element, store temporarily,...
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
SDValue expandCONVERT_FROM_ARBITRARY_FP(SDNode *Node, SelectionDAG &DAG) const
Expand CONVERT_FROM_ARBITRARY_FP using bit manipulation.
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode, SDNodeFlags Flags={}) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index, const SDNodeFlags PtrArithFlags=SDNodeFlags()) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual void computeKnownFPClassForTargetNode(const SDValue Op, KnownFPClass &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine floating-point class information for a target node.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual void computeKnownFPClassForTargetInstr(GISelValueTracking &Analysis, Register R, KnownFPClass &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]CMP.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue getInboundsVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const
Expand VECTOR_FIND_LAST_ACTIVE nodes.
SDValue expandPartialReduceMLA(SDNode *Node, SelectionDAG &DAG) const
Expands PARTIAL_REDUCE_S/UMLA nodes to a series of simpler operations, consisting of zext/sext,...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
SDValue expandPDEP(SDNode *N, SelectionDAG &DAG) const
Expand parallel bit deposit (expand).
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad, SelectionDAG &DAG) const
Replace an extraction of a load with a narrowed load.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const
Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
SDValue expandCTLS(SDNode *N, SelectionDAG &DAG) const
Expand CTLS (count leading sign bits) nodes.
void setTypeIdForCallsiteInfo(const CallBase *CB, MachineFunction &MF, MachineFunction::CallSiteInfo &CSInfo) const
Primary interface to the complete machine description for the target machine.
bool isPositionIndependent() const
const Triple & getTargetTriple() const
TargetOptions Options
unsigned EmitCallSiteInfo
The flag enables call site info production.
unsigned EmitCallGraphSection
Emit section containing call graph metadata.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< regclass_iterator > regclasses() const
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition Triple.h:785
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition Type.h:311
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:282
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:326
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:130
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:717
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition APInt.cpp:3040
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:827
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ PTRADD
PTRADD represents pointer arithmetic semantics, for targets that opt in using shouldPreservePtrArith(...
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ LOOP_DEPENDENCE_RAW_MASK
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition ISDOpcodes.h:538
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:787
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:394
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:294
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:522
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:400
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:861
@ CTTZ_ELTS
Returns the number of number of trailing (least significant) zero elements in a vector.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ VECTOR_FIND_LAST_ACTIVE
Finds the index of the last active mask element Operands: Mask.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:888
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:918
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:528
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ CLMUL
Carry-less multiplication operations.
Definition ISDOpcodes.h:778
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition ISDOpcodes.h:407
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ CTLZ_ZERO_POISON
Definition ISDOpcodes.h:796
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:852
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
@ PARTIAL_REDUCE_FMLA
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ BRIND
BRIND - Indirect branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:804
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ GET_ACTIVE_LANE_MASK
GET_ACTIVE_LANE_MASK - this corrosponds to the llvm.get.active.lane.mask intrinsic.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:858
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:819
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:386
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:356
@ VECTOR_SPLICE_LEFT
VECTOR_SPLICE_LEFT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1, VEC2) left by OFFSET elements an...
Definition ISDOpcodes.h:653
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:907
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:896
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:413
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:986
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:813
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:328
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:934
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:179
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:710
@ VECTOR_SPLICE_RIGHT
VECTOR_SPLICE_RIGHT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1,VEC2) right by OFFSET elements a...
Definition ISDOpcodes.h:657
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:304
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ CTTZ_ZERO_POISON
Bit counting operators with a poisoned result for zero inputs.
Definition ISDOpcodes.h:795
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:967
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:929
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:953
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:864
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:841
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
@ CTTZ_ELTS_ZERO_POISON
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ ABS_MIN_POISON
ABS with a poison result for INT_MIN.
Definition ISDOpcodes.h:751
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
LLVM_ABI NodeType getOppositeSignednessMinMaxOpcode(unsigned MinMaxOpc)
Given a MinMaxOpc of ISD::(U|S)MIN or ISD::(U|S)MAX, returns the corresponding opcode with the opposi...
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
LLVM_ABI NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_ABI bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTruncation=false)
Hook for matching ConstantSDNode predicate.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Or< Preds... > m_AnyOf(const Preds &...preds)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
NUses_match< 1, Value_match > m_OneUse()
This is an optimization pass for GlobalISel generic memory operations.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:558
void stable_sort(R &&Range)
Definition STLExtras.h:2115
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
InstructionCost Cost
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1572
@ Undef
Value of the register doesn't matter.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
LLVM_ABI bool isOneOrOneSplatFP(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant floating-point value, or a splatted vector of a constant float...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
void * PointerTy
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1554
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:362
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:173
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:156
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1776
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ Other
Any other memory.
Definition ModRef.h:68
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
fltNonfiniteBehavior
Definition APFloat.h:953
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isZeroOrZeroSplat(SDValue N, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
UndefPoisonKind
Enumeration to track whether we are interested in Undef, Poison, or both.
Definition UndefPoison.h:20
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition APFloat.h:1672
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
fltNanEncoding
Definition APFloat.h:977
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:418
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:129
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:307
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:323
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
ElementCount getVectorElementCount() const
Definition ValueTypes.h:373
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:494
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:266
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:382
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:408
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition ValueTypes.h:453
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:501
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:435
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:404
bool isScalableVT() const
Return true if the type is a scalable type.
Definition ValueTypes.h:210
bool isFixedLengthVector() const
Definition ValueTypes.h:199
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:346
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT widenIntegerElementType(LLVMContext &Context) const
Return a VT for an integer element type with doubled bit width.
Definition ValueTypes.h:467
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:187
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:351
EVT changeElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a type whose attributes match ourselves with the exception of the element type that i...
Definition ValueTypes.h:121
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:359
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:331
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:484
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition KnownBits.h:315
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:190
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition KnownBits.h:269
static LLVM_ABI KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:106
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:256
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:64
void setAllConflict()
Make all bits known to be both zero and one.
Definition KnownBits.h:97
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:165
KnownBits byteSwap() const
Definition KnownBits.h:559
static LLVM_ABI std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:303
KnownBits reverseBits() const
Definition KnownBits.h:563
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
Definition KnownBits.h:247
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
static LLVM_ABI KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:176
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:72
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition KnownBits.h:335
bool isSignUnknown() const
Returns true if we don't know the sign bit.
Definition KnownBits.h:67
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:325
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:184
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:146
static LLVM_ABI KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
static LLVM_ABI std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
static LLVM_ABI std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
static LLVM_ABI KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition KnownBits.cpp:61
static LLVM_ABI std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
static LLVM_ABI std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:103
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition KnownBits.h:171
static LLVM_ABI std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
static LLVM_ABI std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:300
static LLVM_ABI std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
static LLVM_ABI KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static LLVM_ABI bool hasVectorMaskArgument(RTLIB::LibcallImpl Impl)
Returns true if the function has a vector mask argument, which is assumed to be the last argument.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
void setNoSignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This contains information for each constraint that we are lowering.
std::string ConstraintCode
This contains the actual string for the code, like "m".
LLVM_ABI unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
LLVM_ABI bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
MakeLibCallOptions & setIsSigned(bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true, bool AllowWidenOptimization=false)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...
fltNonfiniteBehavior nonFiniteBehavior
Definition APFloat.h:1014
fltNanEncoding nanEncoding
Definition APFloat.h:1016