LLVM 22.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
27#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/MC/MCAsmInfo.h"
32#include "llvm/MC/MCExpr.h"
38#include <cctype>
39#include <deque>
40using namespace llvm;
41using namespace llvm::SDPatternMatch;
42
43/// NOTE: The TargetMachine owns TLOF.
47
48// Define the virtual destructor out-of-line for build efficiency.
50
51const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
52 return nullptr;
53}
54
58
59/// Check whether a given call node is in tail position within its function. If
60/// so, it sets Chain to the input chain of the tail call.
62 SDValue &Chain) const {
64
65 // First, check if tail calls have been disabled in this function.
66 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
67 return false;
68
69 // Conservatively require the attributes of the call to match those of
70 // the return. Ignore following attributes because they don't affect the
71 // call sequence.
72 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
73 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
74 Attribute::DereferenceableOrNull, Attribute::NoAlias,
75 Attribute::NonNull, Attribute::NoUndef,
76 Attribute::Range, Attribute::NoFPClass})
77 CallerAttrs.removeAttribute(Attr);
78
79 if (CallerAttrs.hasAttributes())
80 return false;
81
82 // It's not safe to eliminate the sign / zero extension of the return value.
83 if (CallerAttrs.contains(Attribute::ZExt) ||
84 CallerAttrs.contains(Attribute::SExt))
85 return false;
86
87 // Check if the only use is a function return node.
88 return isUsedByReturnOnly(Node, Chain);
89}
90
92 const uint32_t *CallerPreservedMask,
93 const SmallVectorImpl<CCValAssign> &ArgLocs,
94 const SmallVectorImpl<SDValue> &OutVals) const {
95 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
96 const CCValAssign &ArgLoc = ArgLocs[I];
97 if (!ArgLoc.isRegLoc())
98 continue;
99 MCRegister Reg = ArgLoc.getLocReg();
100 // Only look at callee saved registers.
101 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
102 continue;
103 // Check that we pass the value used for the caller.
104 // (We look for a CopyFromReg reading a virtual register that is used
105 // for the function live-in value of register Reg)
106 SDValue Value = OutVals[I];
107 if (Value->getOpcode() == ISD::AssertZext)
108 Value = Value.getOperand(0);
109 if (Value->getOpcode() != ISD::CopyFromReg)
110 return false;
111 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
112 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
113 return false;
114 }
115 return true;
116}
117
118/// Set CallLoweringInfo attribute flags based on a call instruction
119/// and called function attributes.
121 unsigned ArgIdx) {
122 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
123 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
124 IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
125 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
126 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
127 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
128 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
129 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
130 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
131 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
132 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
133 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
134 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
135 Alignment = Call->getParamStackAlign(ArgIdx);
136 IndirectType = nullptr;
138 "multiple ABI attributes?");
139 if (IsByVal) {
140 IndirectType = Call->getParamByValType(ArgIdx);
141 if (!Alignment)
142 Alignment = Call->getParamAlign(ArgIdx);
143 }
144 if (IsPreallocated)
145 IndirectType = Call->getParamPreallocatedType(ArgIdx);
146 if (IsInAlloca)
147 IndirectType = Call->getParamInAllocaType(ArgIdx);
148 if (IsSRet)
149 IndirectType = Call->getParamStructRetType(ArgIdx);
150}
151
152/// Generate a libcall taking the given operands as arguments and returning a
153/// result of type RetVT.
154std::pair<SDValue, SDValue>
155TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
157 MakeLibCallOptions CallOptions,
158 const SDLoc &dl,
159 SDValue InChain) const {
160 if (!InChain)
161 InChain = DAG.getEntryNode();
162
164 Args.reserve(Ops.size());
165
166 ArrayRef<Type *> OpsTypeOverrides = CallOptions.OpsTypeOverrides;
167 for (unsigned i = 0; i < Ops.size(); ++i) {
168 SDValue NewOp = Ops[i];
169 Type *Ty = i < OpsTypeOverrides.size() && OpsTypeOverrides[i]
170 ? OpsTypeOverrides[i]
171 : NewOp.getValueType().getTypeForEVT(*DAG.getContext());
172 TargetLowering::ArgListEntry Entry(NewOp, Ty);
173 if (CallOptions.IsSoften)
174 Entry.OrigTy =
175 CallOptions.OpsVTBeforeSoften[i].getTypeForEVT(*DAG.getContext());
176
177 Entry.IsSExt =
178 shouldSignExtendTypeInLibCall(Entry.Ty, CallOptions.IsSigned);
179 Entry.IsZExt = !Entry.IsSExt;
180
181 if (CallOptions.IsSoften &&
183 Entry.IsSExt = Entry.IsZExt = false;
184 }
185 Args.push_back(Entry);
186 }
187
188 const char *LibcallName = getLibcallName(LC);
189 if (LC == RTLIB::UNKNOWN_LIBCALL || !LibcallName)
190 reportFatalInternalError("unsupported library call operation");
191
192 SDValue Callee =
193 DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
194
195 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
196 Type *OrigRetTy = RetTy;
198 bool signExtend = shouldSignExtendTypeInLibCall(RetTy, CallOptions.IsSigned);
199 bool zeroExtend = !signExtend;
200
201 if (CallOptions.IsSoften) {
202 OrigRetTy = CallOptions.RetVTBeforeSoften.getTypeForEVT(*DAG.getContext());
204 signExtend = zeroExtend = false;
205 }
206
207 CLI.setDebugLoc(dl)
208 .setChain(InChain)
209 .setLibCallee(getLibcallCallingConv(LC), RetTy, OrigRetTy, Callee,
210 std::move(Args))
211 .setNoReturn(CallOptions.DoesNotReturn)
214 .setSExtResult(signExtend)
215 .setZExtResult(zeroExtend);
216 return LowerCallTo(CLI);
217}
218
220 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
221 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
222 const AttributeList &FuncAttributes) const {
223 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
224 Op.getSrcAlign() < Op.getDstAlign())
225 return false;
226
227 EVT VT = getOptimalMemOpType(Context, Op, FuncAttributes);
228
229 if (VT == MVT::Other) {
230 // Use the largest integer type whose alignment constraints are satisfied.
231 // We only need to check DstAlign here as SrcAlign is always greater or
232 // equal to DstAlign (or zero).
233 VT = MVT::LAST_INTEGER_VALUETYPE;
234 if (Op.isFixedDstAlign())
235 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
236 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
238 assert(VT.isInteger());
239
240 // Find the largest legal integer type.
241 MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
242 while (!isTypeLegal(LVT))
243 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
244 assert(LVT.isInteger());
245
246 // If the type we've chosen is larger than the largest legal integer type
247 // then use that instead.
248 if (VT.bitsGT(LVT))
249 VT = LVT;
250 }
251
252 unsigned NumMemOps = 0;
253 uint64_t Size = Op.size();
254 while (Size) {
255 unsigned VTSize = VT.getSizeInBits() / 8;
256 while (VTSize > Size) {
257 // For now, only use non-vector load / store's for the left-over pieces.
258 EVT NewVT = VT;
259 unsigned NewVTSize;
260
261 bool Found = false;
262 if (VT.isVector() || VT.isFloatingPoint()) {
263 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
264 if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
266 Found = true;
267 else if (NewVT == MVT::i64 &&
268 isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
269 isSafeMemOpType(MVT::f64)) {
270 // i64 is usually not legal on 32-bit targets, but f64 may be.
271 NewVT = MVT::f64;
272 Found = true;
273 }
274 }
275
276 if (!Found) {
277 do {
278 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
279 if (NewVT == MVT::i8)
280 break;
281 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
282 }
283 NewVTSize = NewVT.getSizeInBits() / 8;
284
285 // If the new VT cannot cover all of the remaining bits, then consider
286 // issuing a (or a pair of) unaligned and overlapping load / store.
287 unsigned Fast;
288 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
290 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
292 Fast)
293 VTSize = Size;
294 else {
295 VT = NewVT;
296 VTSize = NewVTSize;
297 }
298 }
299
300 if (++NumMemOps > Limit)
301 return false;
302
303 MemOps.push_back(VT);
304 Size -= VTSize;
305 }
306
307 return true;
308}
309
310/// Soften the operands of a comparison. This code is shared among BR_CC,
311/// SELECT_CC, and SETCC handlers.
313 SDValue &NewLHS, SDValue &NewRHS,
314 ISD::CondCode &CCCode,
315 const SDLoc &dl, const SDValue OldLHS,
316 const SDValue OldRHS) const {
317 SDValue Chain;
318 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
319 OldRHS, Chain);
320}
321
323 SDValue &NewLHS, SDValue &NewRHS,
324 ISD::CondCode &CCCode,
325 const SDLoc &dl, const SDValue OldLHS,
326 const SDValue OldRHS,
327 SDValue &Chain,
328 bool IsSignaling) const {
329 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
330 // not supporting it. We can update this code when libgcc provides such
331 // functions.
332
333 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
334 && "Unsupported setcc type!");
335
336 // Expand into one or more soft-fp libcall(s).
337 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
338 bool ShouldInvertCC = false;
339 switch (CCCode) {
340 case ISD::SETEQ:
341 case ISD::SETOEQ:
342 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
343 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
344 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
345 break;
346 case ISD::SETNE:
347 case ISD::SETUNE:
348 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
349 (VT == MVT::f64) ? RTLIB::UNE_F64 :
350 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
351 break;
352 case ISD::SETGE:
353 case ISD::SETOGE:
354 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
355 (VT == MVT::f64) ? RTLIB::OGE_F64 :
356 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
357 break;
358 case ISD::SETLT:
359 case ISD::SETOLT:
360 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
361 (VT == MVT::f64) ? RTLIB::OLT_F64 :
362 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
363 break;
364 case ISD::SETLE:
365 case ISD::SETOLE:
366 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
367 (VT == MVT::f64) ? RTLIB::OLE_F64 :
368 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
369 break;
370 case ISD::SETGT:
371 case ISD::SETOGT:
372 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
373 (VT == MVT::f64) ? RTLIB::OGT_F64 :
374 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
375 break;
376 case ISD::SETO:
377 ShouldInvertCC = true;
378 [[fallthrough]];
379 case ISD::SETUO:
380 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
381 (VT == MVT::f64) ? RTLIB::UO_F64 :
382 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
383 break;
384 case ISD::SETONE:
385 // SETONE = O && UNE
386 ShouldInvertCC = true;
387 [[fallthrough]];
388 case ISD::SETUEQ:
389 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
390 (VT == MVT::f64) ? RTLIB::UO_F64 :
391 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
392 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
393 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
394 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
395 break;
396 default:
397 // Invert CC for unordered comparisons
398 ShouldInvertCC = true;
399 switch (CCCode) {
400 case ISD::SETULT:
401 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
402 (VT == MVT::f64) ? RTLIB::OGE_F64 :
403 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
404 break;
405 case ISD::SETULE:
406 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
407 (VT == MVT::f64) ? RTLIB::OGT_F64 :
408 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
409 break;
410 case ISD::SETUGT:
411 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
412 (VT == MVT::f64) ? RTLIB::OLE_F64 :
413 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
414 break;
415 case ISD::SETUGE:
416 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
417 (VT == MVT::f64) ? RTLIB::OLT_F64 :
418 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
419 break;
420 default: llvm_unreachable("Do not know how to soften this setcc!");
421 }
422 }
423
424 // Use the target specific return value for comparison lib calls.
426 SDValue Ops[2] = {NewLHS, NewRHS};
428 EVT OpsVT[2] = { OldLHS.getValueType(),
429 OldRHS.getValueType() };
430 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT);
431 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
432 NewLHS = Call.first;
433 NewRHS = DAG.getConstant(0, dl, RetVT);
434
435 RTLIB::LibcallImpl LC1Impl = getLibcallImpl(LC1);
436 if (LC1Impl == RTLIB::Unsupported) {
438 "no libcall available to soften floating-point compare");
439 }
440
441 CCCode = getSoftFloatCmpLibcallPredicate(LC1Impl);
442 if (ShouldInvertCC) {
443 assert(RetVT.isInteger());
444 CCCode = getSetCCInverse(CCCode, RetVT);
445 }
446
447 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
448 // Update Chain.
449 Chain = Call.second;
450 } else {
451 RTLIB::LibcallImpl LC2Impl = getLibcallImpl(LC2);
452 if (LC2Impl == RTLIB::Unsupported) {
454 "no libcall available to soften floating-point compare");
455 }
456
457 assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
458 "unordered call should be simple boolean");
459
460 EVT SetCCVT =
461 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
463 NewLHS = DAG.getNode(ISD::AssertZext, dl, RetVT, Call.first,
464 DAG.getValueType(MVT::i1));
465 }
466
467 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
468 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
469 CCCode = getSoftFloatCmpLibcallPredicate(LC2Impl);
470 if (ShouldInvertCC)
471 CCCode = getSetCCInverse(CCCode, RetVT);
472 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
473 if (Chain)
474 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
475 Call2.second);
476 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
477 Tmp.getValueType(), Tmp, NewLHS);
478 NewRHS = SDValue();
479 }
480}
481
482/// Return the entry encoding for a jump table in the current function. The
483/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
485 // In non-pic modes, just use the address of a block.
488
489 // Otherwise, use a label difference.
491}
492
494 SelectionDAG &DAG) const {
495 return Table;
496}
497
498/// This returns the relocation base for the given PIC jumptable, the same as
499/// getPICJumpTableRelocBase, but as an MCExpr.
500const MCExpr *
502 unsigned JTI,MCContext &Ctx) const{
503 // The normal PIC reloc base is the label at the start of the jump table.
504 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
505}
506
508 SDValue Addr, int JTI,
509 SelectionDAG &DAG) const {
510 SDValue Chain = Value;
511 // Jump table debug info is only needed if CodeView is enabled.
513 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
514 }
515 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
516}
517
518bool
520 const TargetMachine &TM = getTargetMachine();
521 const GlobalValue *GV = GA->getGlobal();
522
523 // If the address is not even local to this DSO we will have to load it from
524 // a got and then add the offset.
525 if (!TM.shouldAssumeDSOLocal(GV))
526 return false;
527
528 // If the code is position independent we will have to add a base register.
530 return false;
531
532 // Otherwise we can do it.
533 return true;
534}
535
536//===----------------------------------------------------------------------===//
537// Optimization Methods
538//===----------------------------------------------------------------------===//
539
540/// If the specified instruction has a constant integer operand and there are
541/// bits set in that constant that are not demanded, then clear those bits and
542/// return true.
544 const APInt &DemandedBits,
545 const APInt &DemandedElts,
546 TargetLoweringOpt &TLO) const {
547 SDLoc DL(Op);
548 unsigned Opcode = Op.getOpcode();
549
550 // Early-out if we've ended up calling an undemanded node, leave this to
551 // constant folding.
552 if (DemandedBits.isZero() || DemandedElts.isZero())
553 return false;
554
555 // Do target-specific constant optimization.
556 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
557 return TLO.New.getNode();
558
559 // FIXME: ISD::SELECT, ISD::SELECT_CC
560 switch (Opcode) {
561 default:
562 break;
563 case ISD::XOR:
564 case ISD::AND:
565 case ISD::OR: {
566 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
567 if (!Op1C || Op1C->isOpaque())
568 return false;
569
570 // If this is a 'not' op, don't touch it because that's a canonical form.
571 const APInt &C = Op1C->getAPIntValue();
572 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
573 return false;
574
575 if (!C.isSubsetOf(DemandedBits)) {
576 EVT VT = Op.getValueType();
577 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
578 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
579 Op->getFlags());
580 return TLO.CombineTo(Op, NewOp);
581 }
582
583 break;
584 }
585 }
586
587 return false;
588}
589
591 const APInt &DemandedBits,
592 TargetLoweringOpt &TLO) const {
593 EVT VT = Op.getValueType();
594 APInt DemandedElts = VT.isVector()
596 : APInt(1, 1);
597 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
598}
599
600/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
601/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
602/// but it could be generalized for targets with other types of implicit
603/// widening casts.
605 const APInt &DemandedBits,
606 TargetLoweringOpt &TLO) const {
607 assert(Op.getNumOperands() == 2 &&
608 "ShrinkDemandedOp only supports binary operators!");
609 assert(Op.getNode()->getNumValues() == 1 &&
610 "ShrinkDemandedOp only supports nodes with one result!");
611
612 EVT VT = Op.getValueType();
613 SelectionDAG &DAG = TLO.DAG;
614 SDLoc dl(Op);
615
616 // Early return, as this function cannot handle vector types.
617 if (VT.isVector())
618 return false;
619
620 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
621 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
622 "ShrinkDemandedOp only supports operands that have the same size!");
623
624 // Don't do this if the node has another user, which may require the
625 // full value.
626 if (!Op.getNode()->hasOneUse())
627 return false;
628
629 // Search for the smallest integer type with free casts to and from
630 // Op's type. For expedience, just check power-of-2 integer types.
631 unsigned DemandedSize = DemandedBits.getActiveBits();
632 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
633 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
634 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
635 if (isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT)) {
636 // We found a type with free casts.
637
638 // If the operation has the 'disjoint' flag, then the
639 // operands on the new node are also disjoint.
640 SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
642 unsigned Opcode = Op.getOpcode();
643 if (Opcode == ISD::PTRADD) {
644 // It isn't a ptradd anymore if it doesn't operate on the entire
645 // pointer.
646 Opcode = ISD::ADD;
647 }
648 SDValue X = DAG.getNode(
649 Opcode, dl, SmallVT,
650 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
651 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags);
652 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
653 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
654 return TLO.CombineTo(Op, Z);
655 }
656 }
657 return false;
658}
659
661 DAGCombinerInfo &DCI) const {
662 SelectionDAG &DAG = DCI.DAG;
663 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
664 !DCI.isBeforeLegalizeOps());
665 KnownBits Known;
666
667 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
668 if (Simplified) {
669 DCI.AddToWorklist(Op.getNode());
671 }
672 return Simplified;
673}
674
676 const APInt &DemandedElts,
677 DAGCombinerInfo &DCI) const {
678 SelectionDAG &DAG = DCI.DAG;
679 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
680 !DCI.isBeforeLegalizeOps());
681 KnownBits Known;
682
683 bool Simplified =
684 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
685 if (Simplified) {
686 DCI.AddToWorklist(Op.getNode());
688 }
689 return Simplified;
690}
691
693 KnownBits &Known,
695 unsigned Depth,
696 bool AssumeSingleUse) const {
697 EVT VT = Op.getValueType();
698
699 // Since the number of lanes in a scalable vector is unknown at compile time,
700 // we track one bit which is implicitly broadcast to all lanes. This means
701 // that all lanes in a scalable vector are considered demanded.
702 APInt DemandedElts = VT.isFixedLengthVector()
704 : APInt(1, 1);
705 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
706 AssumeSingleUse);
707}
708
709// TODO: Under what circumstances can we create nodes? Constant folding?
711 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
712 SelectionDAG &DAG, unsigned Depth) const {
713 EVT VT = Op.getValueType();
714
715 // Limit search depth.
717 return SDValue();
718
719 // Ignore UNDEFs.
720 if (Op.isUndef())
721 return SDValue();
722
723 // Not demanding any bits/elts from Op.
724 if (DemandedBits == 0 || DemandedElts == 0)
725 return DAG.getUNDEF(VT);
726
727 bool IsLE = DAG.getDataLayout().isLittleEndian();
728 unsigned NumElts = DemandedElts.getBitWidth();
729 unsigned BitWidth = DemandedBits.getBitWidth();
730 KnownBits LHSKnown, RHSKnown;
731 switch (Op.getOpcode()) {
732 case ISD::BITCAST: {
733 if (VT.isScalableVector())
734 return SDValue();
735
736 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
737 EVT SrcVT = Src.getValueType();
738 EVT DstVT = Op.getValueType();
739 if (SrcVT == DstVT)
740 return Src;
741
742 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
743 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
744 if (NumSrcEltBits == NumDstEltBits)
746 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
747 return DAG.getBitcast(DstVT, V);
748
749 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
750 unsigned Scale = NumDstEltBits / NumSrcEltBits;
751 unsigned NumSrcElts = SrcVT.getVectorNumElements();
752 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
753 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
754 for (unsigned i = 0; i != Scale; ++i) {
755 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
756 unsigned BitOffset = EltOffset * NumSrcEltBits;
757 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
758 if (!Sub.isZero()) {
759 DemandedSrcBits |= Sub;
760 for (unsigned j = 0; j != NumElts; ++j)
761 if (DemandedElts[j])
762 DemandedSrcElts.setBit((j * Scale) + i);
763 }
764 }
765
767 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
768 return DAG.getBitcast(DstVT, V);
769 }
770
771 // TODO - bigendian once we have test coverage.
772 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
773 unsigned Scale = NumSrcEltBits / NumDstEltBits;
774 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
775 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
776 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
777 for (unsigned i = 0; i != NumElts; ++i)
778 if (DemandedElts[i]) {
779 unsigned Offset = (i % Scale) * NumDstEltBits;
780 DemandedSrcBits.insertBits(DemandedBits, Offset);
781 DemandedSrcElts.setBit(i / Scale);
782 }
783
785 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
786 return DAG.getBitcast(DstVT, V);
787 }
788
789 break;
790 }
791 case ISD::AND: {
792 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
793 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
794
795 // If all of the demanded bits are known 1 on one side, return the other.
796 // These bits cannot contribute to the result of the 'and' in this
797 // context.
798 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
799 return Op.getOperand(0);
800 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
801 return Op.getOperand(1);
802 break;
803 }
804 case ISD::OR: {
805 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
806 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
807
808 // If all of the demanded bits are known zero on one side, return the
809 // other. These bits cannot contribute to the result of the 'or' in this
810 // context.
811 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
812 return Op.getOperand(0);
813 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
814 return Op.getOperand(1);
815 break;
816 }
817 case ISD::XOR: {
818 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
819 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
820
821 // If all of the demanded bits are known zero on one side, return the
822 // other.
823 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
824 return Op.getOperand(0);
825 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
826 return Op.getOperand(1);
827 break;
828 }
829 case ISD::ADD: {
830 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
831 if (RHSKnown.isZero())
832 return Op.getOperand(0);
833
834 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
835 if (LHSKnown.isZero())
836 return Op.getOperand(1);
837 break;
838 }
839 case ISD::SHL: {
840 // If we are only demanding sign bits then we can use the shift source
841 // directly.
842 if (std::optional<unsigned> MaxSA =
843 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
844 SDValue Op0 = Op.getOperand(0);
845 unsigned ShAmt = *MaxSA;
846 unsigned NumSignBits =
847 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
848 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
849 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
850 return Op0;
851 }
852 break;
853 }
854 case ISD::SRL: {
855 // If we are only demanding sign bits then we can use the shift source
856 // directly.
857 if (std::optional<unsigned> MaxSA =
858 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
859 SDValue Op0 = Op.getOperand(0);
860 unsigned ShAmt = *MaxSA;
861 // Must already be signbits in DemandedBits bounds, and can't demand any
862 // shifted in zeroes.
863 if (DemandedBits.countl_zero() >= ShAmt) {
864 unsigned NumSignBits =
865 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
866 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
867 return Op0;
868 }
869 }
870 break;
871 }
872 case ISD::SETCC: {
873 SDValue Op0 = Op.getOperand(0);
874 SDValue Op1 = Op.getOperand(1);
875 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
876 // If (1) we only need the sign-bit, (2) the setcc operands are the same
877 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
878 // -1, we may be able to bypass the setcc.
879 if (DemandedBits.isSignMask() &&
883 // If we're testing X < 0, then this compare isn't needed - just use X!
884 // FIXME: We're limiting to integer types here, but this should also work
885 // if we don't care about FP signed-zero. The use of SETLT with FP means
886 // that we don't care about NaNs.
887 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
889 return Op0;
890 }
891 break;
892 }
894 // If none of the extended bits are demanded, eliminate the sextinreg.
895 SDValue Op0 = Op.getOperand(0);
896 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
897 unsigned ExBits = ExVT.getScalarSizeInBits();
898 if (DemandedBits.getActiveBits() <= ExBits &&
900 return Op0;
901 // If the input is already sign extended, just drop the extension.
902 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
903 if (NumSignBits >= (BitWidth - ExBits + 1))
904 return Op0;
905 break;
906 }
910 if (VT.isScalableVector())
911 return SDValue();
912
913 // If we only want the lowest element and none of extended bits, then we can
914 // return the bitcasted source vector.
915 SDValue Src = Op.getOperand(0);
916 EVT SrcVT = Src.getValueType();
917 EVT DstVT = Op.getValueType();
918 if (IsLE && DemandedElts == 1 &&
919 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
920 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
921 return DAG.getBitcast(DstVT, Src);
922 }
923 break;
924 }
926 if (VT.isScalableVector())
927 return SDValue();
928
929 // If we don't demand the inserted element, return the base vector.
930 SDValue Vec = Op.getOperand(0);
931 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
932 EVT VecVT = Vec.getValueType();
933 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
934 !DemandedElts[CIdx->getZExtValue()])
935 return Vec;
936 break;
937 }
939 if (VT.isScalableVector())
940 return SDValue();
941
942 SDValue Vec = Op.getOperand(0);
943 SDValue Sub = Op.getOperand(1);
944 uint64_t Idx = Op.getConstantOperandVal(2);
945 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
946 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
947 // If we don't demand the inserted subvector, return the base vector.
948 if (DemandedSubElts == 0)
949 return Vec;
950 break;
951 }
952 case ISD::VECTOR_SHUFFLE: {
954 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
955
956 // If all the demanded elts are from one operand and are inline,
957 // then we can use the operand directly.
958 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
959 for (unsigned i = 0; i != NumElts; ++i) {
960 int M = ShuffleMask[i];
961 if (M < 0 || !DemandedElts[i])
962 continue;
963 AllUndef = false;
964 IdentityLHS &= (M == (int)i);
965 IdentityRHS &= ((M - NumElts) == i);
966 }
967
968 if (AllUndef)
969 return DAG.getUNDEF(Op.getValueType());
970 if (IdentityLHS)
971 return Op.getOperand(0);
972 if (IdentityRHS)
973 return Op.getOperand(1);
974 break;
975 }
976 default:
977 // TODO: Probably okay to remove after audit; here to reduce change size
978 // in initial enablement patch for scalable vectors
979 if (VT.isScalableVector())
980 return SDValue();
981
982 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
984 Op, DemandedBits, DemandedElts, DAG, Depth))
985 return V;
986 break;
987 }
988 return SDValue();
989}
990
993 unsigned Depth) const {
994 EVT VT = Op.getValueType();
995 // Since the number of lanes in a scalable vector is unknown at compile time,
996 // we track one bit which is implicitly broadcast to all lanes. This means
997 // that all lanes in a scalable vector are considered demanded.
998 APInt DemandedElts = VT.isFixedLengthVector()
1000 : APInt(1, 1);
1001 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1002 Depth);
1003}
1004
1006 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
1007 unsigned Depth) const {
1008 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
1009 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1010 Depth);
1011}
1012
1013// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
1014// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
1017 const TargetLowering &TLI,
1018 const APInt &DemandedBits,
1019 const APInt &DemandedElts, unsigned Depth) {
1020 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
1021 "SRL or SRA node is required here!");
1022 // Is the right shift using an immediate value of 1?
1023 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
1024 if (!N1C || !N1C->isOne())
1025 return SDValue();
1026
1027 // We are looking for an avgfloor
1028 // add(ext, ext)
1029 // or one of these as a avgceil
1030 // add(add(ext, ext), 1)
1031 // add(add(ext, 1), ext)
1032 // add(ext, add(ext, 1))
1033 SDValue Add = Op.getOperand(0);
1034 if (Add.getOpcode() != ISD::ADD)
1035 return SDValue();
1036
1037 SDValue ExtOpA = Add.getOperand(0);
1038 SDValue ExtOpB = Add.getOperand(1);
1039 SDValue Add2;
1040 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1041 ConstantSDNode *ConstOp;
1042 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
1043 ConstOp->isOne()) {
1044 ExtOpA = Op1;
1045 ExtOpB = Op3;
1046 Add2 = A;
1047 return true;
1048 }
1049 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
1050 ConstOp->isOne()) {
1051 ExtOpA = Op1;
1052 ExtOpB = Op2;
1053 Add2 = A;
1054 return true;
1055 }
1056 return false;
1057 };
1058 bool IsCeil =
1059 (ExtOpA.getOpcode() == ISD::ADD &&
1060 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1061 (ExtOpB.getOpcode() == ISD::ADD &&
1062 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1063
1064 // If the shift is signed (sra):
1065 // - Needs >= 2 sign bit for both operands.
1066 // - Needs >= 2 zero bits.
1067 // If the shift is unsigned (srl):
1068 // - Needs >= 1 zero bit for both operands.
1069 // - Needs 1 demanded bit zero and >= 2 sign bits.
1070 SelectionDAG &DAG = TLO.DAG;
1071 unsigned ShiftOpc = Op.getOpcode();
1072 bool IsSigned = false;
1073 unsigned KnownBits;
1074 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1075 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1076 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1077 unsigned NumZeroA =
1078 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1079 unsigned NumZeroB =
1080 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1081 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1082
1083 switch (ShiftOpc) {
1084 default:
1085 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1086 case ISD::SRA: {
1087 if (NumZero >= 2 && NumSigned < NumZero) {
1088 IsSigned = false;
1089 KnownBits = NumZero;
1090 break;
1091 }
1092 if (NumSigned >= 1) {
1093 IsSigned = true;
1094 KnownBits = NumSigned;
1095 break;
1096 }
1097 return SDValue();
1098 }
1099 case ISD::SRL: {
1100 if (NumZero >= 1 && NumSigned < NumZero) {
1101 IsSigned = false;
1102 KnownBits = NumZero;
1103 break;
1104 }
1105 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1106 IsSigned = true;
1107 KnownBits = NumSigned;
1108 break;
1109 }
1110 return SDValue();
1111 }
1112 }
1113
1114 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1115 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1116
1117 // Find the smallest power-2 type that is legal for this vector size and
1118 // operation, given the original type size and the number of known sign/zero
1119 // bits.
1120 EVT VT = Op.getValueType();
1121 unsigned MinWidth =
1122 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1123 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1125 return SDValue();
1126 if (VT.isVector())
1127 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1128 if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1129 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1130 // larger type size to do the transform.
1131 if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1132 return SDValue();
1133 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1134 Add.getOperand(1)) &&
1135 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1136 Add2.getOperand(1))))
1137 NVT = VT;
1138 else
1139 return SDValue();
1140 }
1141
1142 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1143 // this is likely to stop other folds (reassociation, value tracking etc.)
1144 if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1145 (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1146 return SDValue();
1147
1148 SDLoc DL(Op);
1149 SDValue ResultAVG =
1150 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1151 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1152 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1153}
1154
1155/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1156/// result of Op are ever used downstream. If we can use this information to
1157/// simplify Op, create a new simplified DAG node and return true, returning the
1158/// original and new nodes in Old and New. Otherwise, analyze the expression and
1159/// return a mask of Known bits for the expression (used to simplify the
1160/// caller). The Known bits may only be accurate for those bits in the
1161/// OriginalDemandedBits and OriginalDemandedElts.
1163 SDValue Op, const APInt &OriginalDemandedBits,
1164 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1165 unsigned Depth, bool AssumeSingleUse) const {
1166 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1167 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1168 "Mask size mismatches value type size!");
1169
1170 // Don't know anything.
1171 Known = KnownBits(BitWidth);
1172
1173 EVT VT = Op.getValueType();
1174 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1175 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1176 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1177 "Unexpected vector size");
1178
1179 APInt DemandedBits = OriginalDemandedBits;
1180 APInt DemandedElts = OriginalDemandedElts;
1181 SDLoc dl(Op);
1182
1183 // Undef operand.
1184 if (Op.isUndef())
1185 return false;
1186
1187 // We can't simplify target constants.
1188 if (Op.getOpcode() == ISD::TargetConstant)
1189 return false;
1190
1191 if (Op.getOpcode() == ISD::Constant) {
1192 // We know all of the bits for a constant!
1193 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1194 return false;
1195 }
1196
1197 if (Op.getOpcode() == ISD::ConstantFP) {
1198 // We know all of the bits for a floating point constant!
1200 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1201 return false;
1202 }
1203
1204 // Other users may use these bits.
1205 bool HasMultiUse = false;
1206 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1208 // Limit search depth.
1209 return false;
1210 }
1211 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1213 DemandedElts = APInt::getAllOnes(NumElts);
1214 HasMultiUse = true;
1215 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1216 // Not demanding any bits/elts from Op.
1217 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1218 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1219 // Limit search depth.
1220 return false;
1221 }
1222
1223 KnownBits Known2;
1224 switch (Op.getOpcode()) {
1225 case ISD::SCALAR_TO_VECTOR: {
1226 if (VT.isScalableVector())
1227 return false;
1228 if (!DemandedElts[0])
1229 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1230
1231 KnownBits SrcKnown;
1232 SDValue Src = Op.getOperand(0);
1233 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1234 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1235 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1236 return true;
1237
1238 // Upper elements are undef, so only get the knownbits if we just demand
1239 // the bottom element.
1240 if (DemandedElts == 1)
1241 Known = SrcKnown.anyextOrTrunc(BitWidth);
1242 break;
1243 }
1244 case ISD::BUILD_VECTOR:
1245 // Collect the known bits that are shared by every demanded element.
1246 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1247 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1248 return false; // Don't fall through, will infinitely loop.
1249 case ISD::SPLAT_VECTOR: {
1250 SDValue Scl = Op.getOperand(0);
1251 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1252 KnownBits KnownScl;
1253 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1254 return true;
1255
1256 // Implicitly truncate the bits to match the official semantics of
1257 // SPLAT_VECTOR.
1258 Known = KnownScl.trunc(BitWidth);
1259 break;
1260 }
1261 case ISD::LOAD: {
1262 auto *LD = cast<LoadSDNode>(Op);
1263 if (getTargetConstantFromLoad(LD)) {
1264 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1265 return false; // Don't fall through, will infinitely loop.
1266 }
1267 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1268 // If this is a ZEXTLoad and we are looking at the loaded value.
1269 EVT MemVT = LD->getMemoryVT();
1270 unsigned MemBits = MemVT.getScalarSizeInBits();
1271 Known.Zero.setBitsFrom(MemBits);
1272 return false; // Don't fall through, will infinitely loop.
1273 }
1274 break;
1275 }
1277 if (VT.isScalableVector())
1278 return false;
1279 SDValue Vec = Op.getOperand(0);
1280 SDValue Scl = Op.getOperand(1);
1281 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1282 EVT VecVT = Vec.getValueType();
1283
1284 // If index isn't constant, assume we need all vector elements AND the
1285 // inserted element.
1286 APInt DemandedVecElts(DemandedElts);
1287 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1288 unsigned Idx = CIdx->getZExtValue();
1289 DemandedVecElts.clearBit(Idx);
1290
1291 // Inserted element is not required.
1292 if (!DemandedElts[Idx])
1293 return TLO.CombineTo(Op, Vec);
1294 }
1295
1296 KnownBits KnownScl;
1297 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1298 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1299 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1300 return true;
1301
1302 Known = KnownScl.anyextOrTrunc(BitWidth);
1303
1304 KnownBits KnownVec;
1305 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1306 Depth + 1))
1307 return true;
1308
1309 if (!!DemandedVecElts)
1310 Known = Known.intersectWith(KnownVec);
1311
1312 return false;
1313 }
1314 case ISD::INSERT_SUBVECTOR: {
1315 if (VT.isScalableVector())
1316 return false;
1317 // Demand any elements from the subvector and the remainder from the src its
1318 // inserted into.
1319 SDValue Src = Op.getOperand(0);
1320 SDValue Sub = Op.getOperand(1);
1321 uint64_t Idx = Op.getConstantOperandVal(2);
1322 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1323 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1324 APInt DemandedSrcElts = DemandedElts;
1325 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
1326
1327 KnownBits KnownSub, KnownSrc;
1328 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1329 Depth + 1))
1330 return true;
1331 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1332 Depth + 1))
1333 return true;
1334
1335 Known.setAllConflict();
1336 if (!!DemandedSubElts)
1337 Known = Known.intersectWith(KnownSub);
1338 if (!!DemandedSrcElts)
1339 Known = Known.intersectWith(KnownSrc);
1340
1341 // Attempt to avoid multi-use src if we don't need anything from it.
1342 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1343 !DemandedSrcElts.isAllOnes()) {
1345 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1347 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1348 if (NewSub || NewSrc) {
1349 NewSub = NewSub ? NewSub : Sub;
1350 NewSrc = NewSrc ? NewSrc : Src;
1351 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1352 Op.getOperand(2));
1353 return TLO.CombineTo(Op, NewOp);
1354 }
1355 }
1356 break;
1357 }
1359 if (VT.isScalableVector())
1360 return false;
1361 // Offset the demanded elts by the subvector index.
1362 SDValue Src = Op.getOperand(0);
1363 if (Src.getValueType().isScalableVector())
1364 break;
1365 uint64_t Idx = Op.getConstantOperandVal(1);
1366 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1367 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1368
1369 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1370 Depth + 1))
1371 return true;
1372
1373 // Attempt to avoid multi-use src if we don't need anything from it.
1374 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1376 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1377 if (DemandedSrc) {
1378 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1379 Op.getOperand(1));
1380 return TLO.CombineTo(Op, NewOp);
1381 }
1382 }
1383 break;
1384 }
1385 case ISD::CONCAT_VECTORS: {
1386 if (VT.isScalableVector())
1387 return false;
1388 Known.setAllConflict();
1389 EVT SubVT = Op.getOperand(0).getValueType();
1390 unsigned NumSubVecs = Op.getNumOperands();
1391 unsigned NumSubElts = SubVT.getVectorNumElements();
1392 for (unsigned i = 0; i != NumSubVecs; ++i) {
1393 APInt DemandedSubElts =
1394 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1395 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1396 Known2, TLO, Depth + 1))
1397 return true;
1398 // Known bits are shared by every demanded subvector element.
1399 if (!!DemandedSubElts)
1400 Known = Known.intersectWith(Known2);
1401 }
1402 break;
1403 }
1404 case ISD::VECTOR_SHUFFLE: {
1405 assert(!VT.isScalableVector());
1406 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1407
1408 // Collect demanded elements from shuffle operands..
1409 APInt DemandedLHS, DemandedRHS;
1410 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1411 DemandedRHS))
1412 break;
1413
1414 if (!!DemandedLHS || !!DemandedRHS) {
1415 SDValue Op0 = Op.getOperand(0);
1416 SDValue Op1 = Op.getOperand(1);
1417
1418 Known.setAllConflict();
1419 if (!!DemandedLHS) {
1420 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1421 Depth + 1))
1422 return true;
1423 Known = Known.intersectWith(Known2);
1424 }
1425 if (!!DemandedRHS) {
1426 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1427 Depth + 1))
1428 return true;
1429 Known = Known.intersectWith(Known2);
1430 }
1431
1432 // Attempt to avoid multi-use ops if we don't need anything from them.
1434 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1436 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1437 if (DemandedOp0 || DemandedOp1) {
1438 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1439 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1440 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1441 return TLO.CombineTo(Op, NewOp);
1442 }
1443 }
1444 break;
1445 }
1446 case ISD::AND: {
1447 SDValue Op0 = Op.getOperand(0);
1448 SDValue Op1 = Op.getOperand(1);
1449
1450 // If the RHS is a constant, check to see if the LHS would be zero without
1451 // using the bits from the RHS. Below, we use knowledge about the RHS to
1452 // simplify the LHS, here we're using information from the LHS to simplify
1453 // the RHS.
1454 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1455 // Do not increment Depth here; that can cause an infinite loop.
1456 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1457 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1458 if ((LHSKnown.Zero & DemandedBits) ==
1459 (~RHSC->getAPIntValue() & DemandedBits))
1460 return TLO.CombineTo(Op, Op0);
1461
1462 // If any of the set bits in the RHS are known zero on the LHS, shrink
1463 // the constant.
1464 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1465 DemandedElts, TLO))
1466 return true;
1467
1468 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1469 // constant, but if this 'and' is only clearing bits that were just set by
1470 // the xor, then this 'and' can be eliminated by shrinking the mask of
1471 // the xor. For example, for a 32-bit X:
1472 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1473 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1474 LHSKnown.One == ~RHSC->getAPIntValue()) {
1475 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1476 return TLO.CombineTo(Op, Xor);
1477 }
1478 }
1479
1480 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1481 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1482 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1483 (Op0.getOperand(0).isUndef() ||
1485 Op0->hasOneUse()) {
1486 unsigned NumSubElts =
1488 unsigned SubIdx = Op0.getConstantOperandVal(2);
1489 APInt DemandedSub =
1490 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1491 KnownBits KnownSubMask =
1492 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1493 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1494 SDValue NewAnd =
1495 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1496 SDValue NewInsert =
1497 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1498 Op0.getOperand(1), Op0.getOperand(2));
1499 return TLO.CombineTo(Op, NewInsert);
1500 }
1501 }
1502
1503 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1504 Depth + 1))
1505 return true;
1506 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1507 Known2, TLO, Depth + 1))
1508 return true;
1509
1510 // If all of the demanded bits are known one on one side, return the other.
1511 // These bits cannot contribute to the result of the 'and'.
1512 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1513 return TLO.CombineTo(Op, Op0);
1514 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1515 return TLO.CombineTo(Op, Op1);
1516 // If all of the demanded bits in the inputs are known zeros, return zero.
1517 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1518 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1519 // If the RHS is a constant, see if we can simplify it.
1520 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1521 TLO))
1522 return true;
1523 // If the operation can be done in a smaller type, do so.
1525 return true;
1526
1527 // Attempt to avoid multi-use ops if we don't need anything from them.
1528 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1530 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1532 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1533 if (DemandedOp0 || DemandedOp1) {
1534 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1535 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1536 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1537 return TLO.CombineTo(Op, NewOp);
1538 }
1539 }
1540
1541 Known &= Known2;
1542 break;
1543 }
1544 case ISD::OR: {
1545 SDValue Op0 = Op.getOperand(0);
1546 SDValue Op1 = Op.getOperand(1);
1547 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1548 Depth + 1)) {
1549 Op->dropFlags(SDNodeFlags::Disjoint);
1550 return true;
1551 }
1552
1553 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1554 Known2, TLO, Depth + 1)) {
1555 Op->dropFlags(SDNodeFlags::Disjoint);
1556 return true;
1557 }
1558
1559 // If all of the demanded bits are known zero on one side, return the other.
1560 // These bits cannot contribute to the result of the 'or'.
1561 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1562 return TLO.CombineTo(Op, Op0);
1563 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1564 return TLO.CombineTo(Op, Op1);
1565 // If the RHS is a constant, see if we can simplify it.
1566 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1567 return true;
1568 // If the operation can be done in a smaller type, do so.
1570 return true;
1571
1572 // Attempt to avoid multi-use ops if we don't need anything from them.
1573 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1575 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1577 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1578 if (DemandedOp0 || DemandedOp1) {
1579 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1580 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1581 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1582 return TLO.CombineTo(Op, NewOp);
1583 }
1584 }
1585
1586 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1587 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1588 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1589 Op0->hasOneUse() && Op1->hasOneUse()) {
1590 // Attempt to match all commutations - m_c_Or would've been useful!
1591 for (int I = 0; I != 2; ++I) {
1592 SDValue X = Op.getOperand(I).getOperand(0);
1593 SDValue C1 = Op.getOperand(I).getOperand(1);
1594 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1595 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1596 if (Alt.getOpcode() == ISD::OR) {
1597 for (int J = 0; J != 2; ++J) {
1598 if (X == Alt.getOperand(J)) {
1599 SDValue Y = Alt.getOperand(1 - J);
1600 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1601 {C1, C2})) {
1602 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1603 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1604 return TLO.CombineTo(
1605 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1606 }
1607 }
1608 }
1609 }
1610 }
1611 }
1612
1613 Known |= Known2;
1614 break;
1615 }
1616 case ISD::XOR: {
1617 SDValue Op0 = Op.getOperand(0);
1618 SDValue Op1 = Op.getOperand(1);
1619
1620 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1621 Depth + 1))
1622 return true;
1623 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1624 Depth + 1))
1625 return true;
1626
1627 // If all of the demanded bits are known zero on one side, return the other.
1628 // These bits cannot contribute to the result of the 'xor'.
1629 if (DemandedBits.isSubsetOf(Known.Zero))
1630 return TLO.CombineTo(Op, Op0);
1631 if (DemandedBits.isSubsetOf(Known2.Zero))
1632 return TLO.CombineTo(Op, Op1);
1633 // If the operation can be done in a smaller type, do so.
1635 return true;
1636
1637 // If all of the unknown bits are known to be zero on one side or the other
1638 // turn this into an *inclusive* or.
1639 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1640 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1641 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1642
1643 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1644 if (C) {
1645 // If one side is a constant, and all of the set bits in the constant are
1646 // also known set on the other side, turn this into an AND, as we know
1647 // the bits will be cleared.
1648 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1649 // NB: it is okay if more bits are known than are requested
1650 if (C->getAPIntValue() == Known2.One) {
1651 SDValue ANDC =
1652 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1653 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1654 }
1655
1656 // If the RHS is a constant, see if we can change it. Don't alter a -1
1657 // constant because that's a 'not' op, and that is better for combining
1658 // and codegen.
1659 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1660 // We're flipping all demanded bits. Flip the undemanded bits too.
1661 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1662 return TLO.CombineTo(Op, New);
1663 }
1664
1665 unsigned Op0Opcode = Op0.getOpcode();
1666 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1667 if (ConstantSDNode *ShiftC =
1668 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1669 // Don't crash on an oversized shift. We can not guarantee that a
1670 // bogus shift has been simplified to undef.
1671 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1672 uint64_t ShiftAmt = ShiftC->getZExtValue();
1674 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1675 : Ones.lshr(ShiftAmt);
1676 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1678 // If the xor constant is a demanded mask, do a 'not' before the
1679 // shift:
1680 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1681 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1682 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1683 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1684 Op0.getOperand(1)));
1685 }
1686 }
1687 }
1688 }
1689 }
1690
1691 // If we can't turn this into a 'not', try to shrink the constant.
1692 if (!C || !C->isAllOnes())
1693 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1694 return true;
1695
1696 // Attempt to avoid multi-use ops if we don't need anything from them.
1697 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1699 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1701 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1702 if (DemandedOp0 || DemandedOp1) {
1703 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1704 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1705 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1706 return TLO.CombineTo(Op, NewOp);
1707 }
1708 }
1709
1710 Known ^= Known2;
1711 break;
1712 }
1713 case ISD::SELECT:
1714 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1715 Known, TLO, Depth + 1))
1716 return true;
1717 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1718 Known2, TLO, Depth + 1))
1719 return true;
1720
1721 // If the operands are constants, see if we can simplify them.
1722 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1723 return true;
1724
1725 // Only known if known in both the LHS and RHS.
1726 Known = Known.intersectWith(Known2);
1727 break;
1728 case ISD::VSELECT:
1729 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1730 Known, TLO, Depth + 1))
1731 return true;
1732 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1733 Known2, TLO, Depth + 1))
1734 return true;
1735
1736 // Only known if known in both the LHS and RHS.
1737 Known = Known.intersectWith(Known2);
1738 break;
1739 case ISD::SELECT_CC:
1740 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1741 Known, TLO, Depth + 1))
1742 return true;
1743 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1744 Known2, TLO, Depth + 1))
1745 return true;
1746
1747 // If the operands are constants, see if we can simplify them.
1748 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1749 return true;
1750
1751 // Only known if known in both the LHS and RHS.
1752 Known = Known.intersectWith(Known2);
1753 break;
1754 case ISD::SETCC: {
1755 SDValue Op0 = Op.getOperand(0);
1756 SDValue Op1 = Op.getOperand(1);
1757 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1758 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1759 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1760 // -1, we may be able to bypass the setcc.
1761 if (DemandedBits.isSignMask() &&
1765 // If we're testing X < 0, then this compare isn't needed - just use X!
1766 // FIXME: We're limiting to integer types here, but this should also work
1767 // if we don't care about FP signed-zero. The use of SETLT with FP means
1768 // that we don't care about NaNs.
1769 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1771 return TLO.CombineTo(Op, Op0);
1772
1773 // TODO: Should we check for other forms of sign-bit comparisons?
1774 // Examples: X <= -1, X >= 0
1775 }
1776 if (getBooleanContents(Op0.getValueType()) ==
1778 BitWidth > 1)
1779 Known.Zero.setBitsFrom(1);
1780 break;
1781 }
1782 case ISD::SHL: {
1783 SDValue Op0 = Op.getOperand(0);
1784 SDValue Op1 = Op.getOperand(1);
1785 EVT ShiftVT = Op1.getValueType();
1786
1787 if (std::optional<unsigned> KnownSA =
1788 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1789 unsigned ShAmt = *KnownSA;
1790 if (ShAmt == 0)
1791 return TLO.CombineTo(Op, Op0);
1792
1793 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1794 // single shift. We can do this if the bottom bits (which are shifted
1795 // out) are never demanded.
1796 // TODO - support non-uniform vector amounts.
1797 if (Op0.getOpcode() == ISD::SRL) {
1798 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1799 if (std::optional<unsigned> InnerSA =
1800 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1801 unsigned C1 = *InnerSA;
1802 unsigned Opc = ISD::SHL;
1803 int Diff = ShAmt - C1;
1804 if (Diff < 0) {
1805 Diff = -Diff;
1806 Opc = ISD::SRL;
1807 }
1808 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1809 return TLO.CombineTo(
1810 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1811 }
1812 }
1813 }
1814
1815 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1816 // are not demanded. This will likely allow the anyext to be folded away.
1817 // TODO - support non-uniform vector amounts.
1818 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1819 SDValue InnerOp = Op0.getOperand(0);
1820 EVT InnerVT = InnerOp.getValueType();
1821 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1822 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1823 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1824 SDValue NarrowShl = TLO.DAG.getNode(
1825 ISD::SHL, dl, InnerVT, InnerOp,
1826 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1827 return TLO.CombineTo(
1828 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1829 }
1830
1831 // Repeat the SHL optimization above in cases where an extension
1832 // intervenes: (shl (anyext (shr x, c1)), c2) to
1833 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1834 // aren't demanded (as above) and that the shifted upper c1 bits of
1835 // x aren't demanded.
1836 // TODO - support non-uniform vector amounts.
1837 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1838 InnerOp.hasOneUse()) {
1839 if (std::optional<unsigned> SA2 = TLO.DAG.getValidShiftAmount(
1840 InnerOp, DemandedElts, Depth + 2)) {
1841 unsigned InnerShAmt = *SA2;
1842 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1843 DemandedBits.getActiveBits() <=
1844 (InnerBits - InnerShAmt + ShAmt) &&
1845 DemandedBits.countr_zero() >= ShAmt) {
1846 SDValue NewSA =
1847 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1848 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1849 InnerOp.getOperand(0));
1850 return TLO.CombineTo(
1851 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1852 }
1853 }
1854 }
1855 }
1856
1857 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1858 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1859 Depth + 1)) {
1860 // Disable the nsw and nuw flags. We can no longer guarantee that we
1861 // won't wrap after simplification.
1862 Op->dropFlags(SDNodeFlags::NoWrap);
1863 return true;
1864 }
1865 Known <<= ShAmt;
1866 // low bits known zero.
1867 Known.Zero.setLowBits(ShAmt);
1868
1869 // Attempt to avoid multi-use ops if we don't need anything from them.
1870 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1872 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1873 if (DemandedOp0) {
1874 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1875 return TLO.CombineTo(Op, NewOp);
1876 }
1877 }
1878
1879 // TODO: Can we merge this fold with the one below?
1880 // Try shrinking the operation as long as the shift amount will still be
1881 // in range.
1882 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1883 Op.getNode()->hasOneUse()) {
1884 // Search for the smallest integer type with free casts to and from
1885 // Op's type. For expedience, just check power-of-2 integer types.
1886 unsigned DemandedSize = DemandedBits.getActiveBits();
1887 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1888 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1889 EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1890 if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&
1891 isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1892 isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1893 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1894 assert(DemandedSize <= SmallVTBits &&
1895 "Narrowed below demanded bits?");
1896 // We found a type with free casts.
1897 SDValue NarrowShl = TLO.DAG.getNode(
1898 ISD::SHL, dl, SmallVT,
1899 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1900 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1901 return TLO.CombineTo(
1902 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1903 }
1904 }
1905 }
1906
1907 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1908 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1909 // Only do this if we demand the upper half so the knownbits are correct.
1910 unsigned HalfWidth = BitWidth / 2;
1911 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1912 DemandedBits.countLeadingOnes() >= HalfWidth) {
1913 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1914 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1915 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1916 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1917 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1918 // If we're demanding the upper bits at all, we must ensure
1919 // that the upper bits of the shift result are known to be zero,
1920 // which is equivalent to the narrow shift being NUW.
1921 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1922 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1923 SDNodeFlags Flags;
1924 Flags.setNoSignedWrap(IsNSW);
1925 Flags.setNoUnsignedWrap(IsNUW);
1926 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1927 SDValue NewShiftAmt =
1928 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1929 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1930 NewShiftAmt, Flags);
1931 SDValue NewExt =
1932 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1933 return TLO.CombineTo(Op, NewExt);
1934 }
1935 }
1936 }
1937 } else {
1938 // This is a variable shift, so we can't shift the demand mask by a known
1939 // amount. But if we are not demanding high bits, then we are not
1940 // demanding those bits from the pre-shifted operand either.
1941 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1942 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1943 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1944 Depth + 1)) {
1945 // Disable the nsw and nuw flags. We can no longer guarantee that we
1946 // won't wrap after simplification.
1947 Op->dropFlags(SDNodeFlags::NoWrap);
1948 return true;
1949 }
1950 Known.resetAll();
1951 }
1952 }
1953
1954 // If we are only demanding sign bits then we can use the shift source
1955 // directly.
1956 if (std::optional<unsigned> MaxSA =
1957 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1958 unsigned ShAmt = *MaxSA;
1959 unsigned NumSignBits =
1960 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1961 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1962 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1963 return TLO.CombineTo(Op, Op0);
1964 }
1965 break;
1966 }
1967 case ISD::SRL: {
1968 SDValue Op0 = Op.getOperand(0);
1969 SDValue Op1 = Op.getOperand(1);
1970 EVT ShiftVT = Op1.getValueType();
1971
1972 if (std::optional<unsigned> KnownSA =
1973 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1974 unsigned ShAmt = *KnownSA;
1975 if (ShAmt == 0)
1976 return TLO.CombineTo(Op, Op0);
1977
1978 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1979 // single shift. We can do this if the top bits (which are shifted out)
1980 // are never demanded.
1981 // TODO - support non-uniform vector amounts.
1982 if (Op0.getOpcode() == ISD::SHL) {
1983 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1984 if (std::optional<unsigned> InnerSA =
1985 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1986 unsigned C1 = *InnerSA;
1987 unsigned Opc = ISD::SRL;
1988 int Diff = ShAmt - C1;
1989 if (Diff < 0) {
1990 Diff = -Diff;
1991 Opc = ISD::SHL;
1992 }
1993 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1994 return TLO.CombineTo(
1995 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1996 }
1997 }
1998 }
1999
2000 // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
2001 // single sra. We can do this if the top bits are never demanded.
2002 if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
2003 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
2004 if (std::optional<unsigned> InnerSA =
2005 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2006 unsigned C1 = *InnerSA;
2007 // Clamp the combined shift amount if it exceeds the bit width.
2008 unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
2009 SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
2010 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
2011 Op0.getOperand(0), NewSA));
2012 }
2013 }
2014 }
2015
2016 APInt InDemandedMask = (DemandedBits << ShAmt);
2017
2018 // If the shift is exact, then it does demand the low bits (and knows that
2019 // they are zero).
2020 if (Op->getFlags().hasExact())
2021 InDemandedMask.setLowBits(ShAmt);
2022
2023 // Narrow shift to lower half - similar to ShrinkDemandedOp.
2024 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2025 if ((BitWidth % 2) == 0 && !VT.isVector()) {
2027 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
2028 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
2029 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
2030 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
2031 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
2032 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2033 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
2034 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
2035 SDValue NewShiftAmt =
2036 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
2037 SDValue NewShift =
2038 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
2039 return TLO.CombineTo(
2040 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
2041 }
2042 }
2043
2044 // Compute the new bits that are at the top now.
2045 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2046 Depth + 1))
2047 return true;
2048 Known >>= ShAmt;
2049 // High bits known zero.
2050 Known.Zero.setHighBits(ShAmt);
2051
2052 // Attempt to avoid multi-use ops if we don't need anything from them.
2053 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2055 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2056 if (DemandedOp0) {
2057 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2058 return TLO.CombineTo(Op, NewOp);
2059 }
2060 }
2061 } else {
2062 // Use generic knownbits computation as it has support for non-uniform
2063 // shift amounts.
2064 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2065 }
2066
2067 // If we are only demanding sign bits then we can use the shift source
2068 // directly.
2069 if (std::optional<unsigned> MaxSA =
2070 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
2071 unsigned ShAmt = *MaxSA;
2072 // Must already be signbits in DemandedBits bounds, and can't demand any
2073 // shifted in zeroes.
2074 if (DemandedBits.countl_zero() >= ShAmt) {
2075 unsigned NumSignBits =
2076 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
2077 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2078 return TLO.CombineTo(Op, Op0);
2079 }
2080 }
2081
2082 // Try to match AVG patterns (after shift simplification).
2083 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2084 DemandedElts, Depth + 1))
2085 return TLO.CombineTo(Op, AVG);
2086
2087 break;
2088 }
2089 case ISD::SRA: {
2090 SDValue Op0 = Op.getOperand(0);
2091 SDValue Op1 = Op.getOperand(1);
2092 EVT ShiftVT = Op1.getValueType();
2093
2094 // If we only want bits that already match the signbit then we don't need
2095 // to shift.
2096 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2097 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2098 NumHiDemandedBits)
2099 return TLO.CombineTo(Op, Op0);
2100
2101 // If this is an arithmetic shift right and only the low-bit is set, we can
2102 // always convert this into a logical shr, even if the shift amount is
2103 // variable. The low bit of the shift cannot be an input sign bit unless
2104 // the shift amount is >= the size of the datatype, which is undefined.
2105 if (DemandedBits.isOne())
2106 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2107
2108 if (std::optional<unsigned> KnownSA =
2109 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2110 unsigned ShAmt = *KnownSA;
2111 if (ShAmt == 0)
2112 return TLO.CombineTo(Op, Op0);
2113
2114 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2115 // supports sext_inreg.
2116 if (Op0.getOpcode() == ISD::SHL) {
2117 if (std::optional<unsigned> InnerSA =
2118 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2119 unsigned LowBits = BitWidth - ShAmt;
2120 EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
2121 if (VT.isVector())
2122 ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
2124
2125 if (*InnerSA == ShAmt) {
2126 if (!TLO.LegalOperations() ||
2128 return TLO.CombineTo(
2129 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2130 Op0.getOperand(0),
2131 TLO.DAG.getValueType(ExtVT)));
2132
2133 // Even if we can't convert to sext_inreg, we might be able to
2134 // remove this shift pair if the input is already sign extended.
2135 unsigned NumSignBits =
2136 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2137 if (NumSignBits > ShAmt)
2138 return TLO.CombineTo(Op, Op0.getOperand(0));
2139 }
2140 }
2141 }
2142
2143 APInt InDemandedMask = (DemandedBits << ShAmt);
2144
2145 // If the shift is exact, then it does demand the low bits (and knows that
2146 // they are zero).
2147 if (Op->getFlags().hasExact())
2148 InDemandedMask.setLowBits(ShAmt);
2149
2150 // If any of the demanded bits are produced by the sign extension, we also
2151 // demand the input sign bit.
2152 if (DemandedBits.countl_zero() < ShAmt)
2153 InDemandedMask.setSignBit();
2154
2155 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2156 Depth + 1))
2157 return true;
2158 Known >>= ShAmt;
2159
2160 // If the input sign bit is known to be zero, or if none of the top bits
2161 // are demanded, turn this into an unsigned shift right.
2162 if (Known.Zero[BitWidth - ShAmt - 1] ||
2163 DemandedBits.countl_zero() >= ShAmt) {
2164 SDNodeFlags Flags;
2165 Flags.setExact(Op->getFlags().hasExact());
2166 return TLO.CombineTo(
2167 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2168 }
2169
2170 int Log2 = DemandedBits.exactLogBase2();
2171 if (Log2 >= 0) {
2172 // The bit must come from the sign.
2173 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2174 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2175 }
2176
2177 if (Known.One[BitWidth - ShAmt - 1])
2178 // New bits are known one.
2179 Known.One.setHighBits(ShAmt);
2180
2181 // Attempt to avoid multi-use ops if we don't need anything from them.
2182 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2184 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2185 if (DemandedOp0) {
2186 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2187 return TLO.CombineTo(Op, NewOp);
2188 }
2189 }
2190 }
2191
2192 // Try to match AVG patterns (after shift simplification).
2193 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2194 DemandedElts, Depth + 1))
2195 return TLO.CombineTo(Op, AVG);
2196
2197 break;
2198 }
2199 case ISD::FSHL:
2200 case ISD::FSHR: {
2201 SDValue Op0 = Op.getOperand(0);
2202 SDValue Op1 = Op.getOperand(1);
2203 SDValue Op2 = Op.getOperand(2);
2204 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2205
2206 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2207 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2208
2209 // For fshl, 0-shift returns the 1st arg.
2210 // For fshr, 0-shift returns the 2nd arg.
2211 if (Amt == 0) {
2212 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2213 Known, TLO, Depth + 1))
2214 return true;
2215 break;
2216 }
2217
2218 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2219 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2220 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2221 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2222 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2223 Depth + 1))
2224 return true;
2225 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2226 Depth + 1))
2227 return true;
2228
2229 Known2 <<= (IsFSHL ? Amt : (BitWidth - Amt));
2230 Known >>= (IsFSHL ? (BitWidth - Amt) : Amt);
2231 Known = Known.unionWith(Known2);
2232
2233 // Attempt to avoid multi-use ops if we don't need anything from them.
2234 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2235 !DemandedElts.isAllOnes()) {
2237 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2239 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2240 if (DemandedOp0 || DemandedOp1) {
2241 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2242 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2243 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2244 DemandedOp1, Op2);
2245 return TLO.CombineTo(Op, NewOp);
2246 }
2247 }
2248 }
2249
2250 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2251 if (isPowerOf2_32(BitWidth)) {
2252 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2253 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2254 Known2, TLO, Depth + 1))
2255 return true;
2256 }
2257 break;
2258 }
2259 case ISD::ROTL:
2260 case ISD::ROTR: {
2261 SDValue Op0 = Op.getOperand(0);
2262 SDValue Op1 = Op.getOperand(1);
2263 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2264
2265 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2266 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2267 return TLO.CombineTo(Op, Op0);
2268
2269 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2270 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2271 unsigned RevAmt = BitWidth - Amt;
2272
2273 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2274 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2275 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2276 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2277 Depth + 1))
2278 return true;
2279
2280 // rot*(x, 0) --> x
2281 if (Amt == 0)
2282 return TLO.CombineTo(Op, Op0);
2283
2284 // See if we don't demand either half of the rotated bits.
2285 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2286 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2287 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2288 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2289 }
2290 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2291 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2292 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2293 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2294 }
2295 }
2296
2297 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2298 if (isPowerOf2_32(BitWidth)) {
2299 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2300 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2301 Depth + 1))
2302 return true;
2303 }
2304 break;
2305 }
2306 case ISD::SMIN:
2307 case ISD::SMAX:
2308 case ISD::UMIN:
2309 case ISD::UMAX: {
2310 unsigned Opc = Op.getOpcode();
2311 SDValue Op0 = Op.getOperand(0);
2312 SDValue Op1 = Op.getOperand(1);
2313
2314 // If we're only demanding signbits, then we can simplify to OR/AND node.
2315 unsigned BitOp =
2316 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2317 unsigned NumSignBits =
2318 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2319 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2320 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2321 if (NumSignBits >= NumDemandedUpperBits)
2322 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2323
2324 // Check if one arg is always less/greater than (or equal) to the other arg.
2325 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2326 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2327 switch (Opc) {
2328 case ISD::SMIN:
2329 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2330 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2331 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2332 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2333 Known = KnownBits::smin(Known0, Known1);
2334 break;
2335 case ISD::SMAX:
2336 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2337 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2338 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2339 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2340 Known = KnownBits::smax(Known0, Known1);
2341 break;
2342 case ISD::UMIN:
2343 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2344 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2345 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2346 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2347 Known = KnownBits::umin(Known0, Known1);
2348 break;
2349 case ISD::UMAX:
2350 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2351 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2352 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2353 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2354 Known = KnownBits::umax(Known0, Known1);
2355 break;
2356 }
2357 break;
2358 }
2359 case ISD::BITREVERSE: {
2360 SDValue Src = Op.getOperand(0);
2361 APInt DemandedSrcBits = DemandedBits.reverseBits();
2362 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2363 Depth + 1))
2364 return true;
2365 Known = Known2.reverseBits();
2366 break;
2367 }
2368 case ISD::BSWAP: {
2369 SDValue Src = Op.getOperand(0);
2370
2371 // If the only bits demanded come from one byte of the bswap result,
2372 // just shift the input byte into position to eliminate the bswap.
2373 unsigned NLZ = DemandedBits.countl_zero();
2374 unsigned NTZ = DemandedBits.countr_zero();
2375
2376 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2377 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2378 // have 14 leading zeros, round to 8.
2379 NLZ = alignDown(NLZ, 8);
2380 NTZ = alignDown(NTZ, 8);
2381 // If we need exactly one byte, we can do this transformation.
2382 if (BitWidth - NLZ - NTZ == 8) {
2383 // Replace this with either a left or right shift to get the byte into
2384 // the right place.
2385 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2386 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2387 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2388 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2389 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2390 return TLO.CombineTo(Op, NewOp);
2391 }
2392 }
2393
2394 APInt DemandedSrcBits = DemandedBits.byteSwap();
2395 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2396 Depth + 1))
2397 return true;
2398 Known = Known2.byteSwap();
2399 break;
2400 }
2401 case ISD::CTPOP: {
2402 // If only 1 bit is demanded, replace with PARITY as long as we're before
2403 // op legalization.
2404 // FIXME: Limit to scalars for now.
2405 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2406 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2407 Op.getOperand(0)));
2408
2409 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2410 break;
2411 }
2413 SDValue Op0 = Op.getOperand(0);
2414 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2415 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2416
2417 // If we only care about the highest bit, don't bother shifting right.
2418 if (DemandedBits.isSignMask()) {
2419 unsigned MinSignedBits =
2420 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2421 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2422 // However if the input is already sign extended we expect the sign
2423 // extension to be dropped altogether later and do not simplify.
2424 if (!AlreadySignExtended) {
2425 // Compute the correct shift amount type, which must be getShiftAmountTy
2426 // for scalar types after legalization.
2427 SDValue ShiftAmt =
2428 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2429 return TLO.CombineTo(Op,
2430 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2431 }
2432 }
2433
2434 // If none of the extended bits are demanded, eliminate the sextinreg.
2435 if (DemandedBits.getActiveBits() <= ExVTBits)
2436 return TLO.CombineTo(Op, Op0);
2437
2438 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2439
2440 // Since the sign extended bits are demanded, we know that the sign
2441 // bit is demanded.
2442 InputDemandedBits.setBit(ExVTBits - 1);
2443
2444 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2445 Depth + 1))
2446 return true;
2447
2448 // If the sign bit of the input is known set or clear, then we know the
2449 // top bits of the result.
2450
2451 // If the input sign bit is known zero, convert this into a zero extension.
2452 if (Known.Zero[ExVTBits - 1])
2453 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2454
2455 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2456 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2457 Known.One.setBitsFrom(ExVTBits);
2458 Known.Zero &= Mask;
2459 } else { // Input sign bit unknown
2460 Known.Zero &= Mask;
2461 Known.One &= Mask;
2462 }
2463 break;
2464 }
2465 case ISD::BUILD_PAIR: {
2466 EVT HalfVT = Op.getOperand(0).getValueType();
2467 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2468
2469 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2470 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2471
2472 KnownBits KnownLo, KnownHi;
2473
2474 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2475 return true;
2476
2477 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2478 return true;
2479
2480 Known = KnownHi.concat(KnownLo);
2481 break;
2482 }
2484 if (VT.isScalableVector())
2485 return false;
2486 [[fallthrough]];
2487 case ISD::ZERO_EXTEND: {
2488 SDValue Src = Op.getOperand(0);
2489 EVT SrcVT = Src.getValueType();
2490 unsigned InBits = SrcVT.getScalarSizeInBits();
2491 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2492 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2493
2494 // If none of the top bits are demanded, convert this into an any_extend.
2495 if (DemandedBits.getActiveBits() <= InBits) {
2496 // If we only need the non-extended bits of the bottom element
2497 // then we can just bitcast to the result.
2498 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2499 VT.getSizeInBits() == SrcVT.getSizeInBits())
2500 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2501
2502 unsigned Opc =
2504 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2505 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2506 }
2507
2508 APInt InDemandedBits = DemandedBits.trunc(InBits);
2509 APInt InDemandedElts = DemandedElts.zext(InElts);
2510 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2511 Depth + 1)) {
2512 Op->dropFlags(SDNodeFlags::NonNeg);
2513 return true;
2514 }
2515 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2516 Known = Known.zext(BitWidth);
2517
2518 // Attempt to avoid multi-use ops if we don't need anything from them.
2520 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2521 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2522 break;
2523 }
2525 if (VT.isScalableVector())
2526 return false;
2527 [[fallthrough]];
2528 case ISD::SIGN_EXTEND: {
2529 SDValue Src = Op.getOperand(0);
2530 EVT SrcVT = Src.getValueType();
2531 unsigned InBits = SrcVT.getScalarSizeInBits();
2532 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2533 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2534
2535 APInt InDemandedElts = DemandedElts.zext(InElts);
2536 APInt InDemandedBits = DemandedBits.trunc(InBits);
2537
2538 // Since some of the sign extended bits are demanded, we know that the sign
2539 // bit is demanded.
2540 InDemandedBits.setBit(InBits - 1);
2541
2542 // If none of the top bits are demanded, convert this into an any_extend.
2543 if (DemandedBits.getActiveBits() <= InBits) {
2544 // If we only need the non-extended bits of the bottom element
2545 // then we can just bitcast to the result.
2546 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2547 VT.getSizeInBits() == SrcVT.getSizeInBits())
2548 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2549
2550 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2552 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2553 InBits) {
2554 unsigned Opc =
2556 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2557 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2558 }
2559 }
2560
2561 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2562 Depth + 1))
2563 return true;
2564 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2565
2566 // If the sign bit is known one, the top bits match.
2567 Known = Known.sext(BitWidth);
2568
2569 // If the sign bit is known zero, convert this to a zero extend.
2570 if (Known.isNonNegative()) {
2571 unsigned Opc =
2573 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2574 SDNodeFlags Flags;
2575 if (!IsVecInReg)
2576 Flags |= SDNodeFlags::NonNeg;
2577 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2578 }
2579 }
2580
2581 // Attempt to avoid multi-use ops if we don't need anything from them.
2583 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2584 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2585 break;
2586 }
2588 if (VT.isScalableVector())
2589 return false;
2590 [[fallthrough]];
2591 case ISD::ANY_EXTEND: {
2592 SDValue Src = Op.getOperand(0);
2593 EVT SrcVT = Src.getValueType();
2594 unsigned InBits = SrcVT.getScalarSizeInBits();
2595 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2596 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2597
2598 // If we only need the bottom element then we can just bitcast.
2599 // TODO: Handle ANY_EXTEND?
2600 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2601 VT.getSizeInBits() == SrcVT.getSizeInBits())
2602 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2603
2604 APInt InDemandedBits = DemandedBits.trunc(InBits);
2605 APInt InDemandedElts = DemandedElts.zext(InElts);
2606 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2607 Depth + 1))
2608 return true;
2609 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2610 Known = Known.anyext(BitWidth);
2611
2612 // Attempt to avoid multi-use ops if we don't need anything from them.
2614 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2615 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2616 break;
2617 }
2618 case ISD::TRUNCATE: {
2619 SDValue Src = Op.getOperand(0);
2620
2621 // Simplify the input, using demanded bit information, and compute the known
2622 // zero/one bits live out.
2623 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2624 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2625 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2626 Depth + 1)) {
2627 // Disable the nsw and nuw flags. We can no longer guarantee that we
2628 // won't wrap after simplification.
2629 Op->dropFlags(SDNodeFlags::NoWrap);
2630 return true;
2631 }
2632 Known = Known.trunc(BitWidth);
2633
2634 // Attempt to avoid multi-use ops if we don't need anything from them.
2636 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2637 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2638
2639 // If the input is only used by this truncate, see if we can shrink it based
2640 // on the known demanded bits.
2641 switch (Src.getOpcode()) {
2642 default:
2643 break;
2644 case ISD::SRL:
2645 // Shrink SRL by a constant if none of the high bits shifted in are
2646 // demanded.
2647 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2648 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2649 // undesirable.
2650 break;
2651
2652 if (Src.getNode()->hasOneUse()) {
2653 if (isTruncateFree(Src, VT) &&
2654 !isTruncateFree(Src.getValueType(), VT)) {
2655 // If truncate is only free at trunc(srl), do not turn it into
2656 // srl(trunc). The check is done by first check the truncate is free
2657 // at Src's opcode(srl), then check the truncate is not done by
2658 // referencing sub-register. In test, if both trunc(srl) and
2659 // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2660 // trunc(srl)'s trunc is free, trunc(srl) is better.
2661 break;
2662 }
2663
2664 std::optional<unsigned> ShAmtC =
2665 TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2666 if (!ShAmtC || *ShAmtC >= BitWidth)
2667 break;
2668 unsigned ShVal = *ShAmtC;
2669
2670 APInt HighBits =
2671 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2672 HighBits.lshrInPlace(ShVal);
2673 HighBits = HighBits.trunc(BitWidth);
2674 if (!(HighBits & DemandedBits)) {
2675 // None of the shifted in bits are needed. Add a truncate of the
2676 // shift input, then shift it.
2677 SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2678 SDValue NewTrunc =
2679 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2680 return TLO.CombineTo(
2681 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2682 }
2683 }
2684 break;
2685 }
2686
2687 break;
2688 }
2689 case ISD::AssertZext: {
2690 // AssertZext demands all of the high bits, plus any of the low bits
2691 // demanded by its users.
2692 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2694 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2695 TLO, Depth + 1))
2696 return true;
2697
2698 Known.Zero |= ~InMask;
2699 Known.One &= (~Known.Zero);
2700 break;
2701 }
2703 SDValue Src = Op.getOperand(0);
2704 SDValue Idx = Op.getOperand(1);
2705 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2706 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2707
2708 if (SrcEltCnt.isScalable())
2709 return false;
2710
2711 // Demand the bits from every vector element without a constant index.
2712 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2713 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2714 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2715 if (CIdx->getAPIntValue().ult(NumSrcElts))
2716 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2717
2718 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2719 // anything about the extended bits.
2720 APInt DemandedSrcBits = DemandedBits;
2721 if (BitWidth > EltBitWidth)
2722 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2723
2724 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2725 Depth + 1))
2726 return true;
2727
2728 // Attempt to avoid multi-use ops if we don't need anything from them.
2729 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2730 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2731 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2732 SDValue NewOp =
2733 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2734 return TLO.CombineTo(Op, NewOp);
2735 }
2736 }
2737
2738 Known = Known2;
2739 if (BitWidth > EltBitWidth)
2740 Known = Known.anyext(BitWidth);
2741 break;
2742 }
2743 case ISD::BITCAST: {
2744 if (VT.isScalableVector())
2745 return false;
2746 SDValue Src = Op.getOperand(0);
2747 EVT SrcVT = Src.getValueType();
2748 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2749
2750 // If this is an FP->Int bitcast and if the sign bit is the only
2751 // thing demanded, turn this into a FGETSIGN.
2752 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2753 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2754 SrcVT.isFloatingPoint()) {
2755 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2756 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2757 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2758 SrcVT != MVT::f128) {
2759 // Cannot eliminate/lower SHL for f128 yet.
2760 EVT Ty = OpVTLegal ? VT : MVT::i32;
2761 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2762 // place. We expect the SHL to be eliminated by other optimizations.
2763 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2764 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2765 if (!OpVTLegal && OpVTSizeInBits > 32)
2766 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2767 unsigned ShVal = Op.getValueSizeInBits() - 1;
2768 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2769 return TLO.CombineTo(Op,
2770 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2771 }
2772 }
2773
2774 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2775 // Demand the elt/bit if any of the original elts/bits are demanded.
2776 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2777 unsigned Scale = BitWidth / NumSrcEltBits;
2778 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2779 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2780 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2781 for (unsigned i = 0; i != Scale; ++i) {
2782 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2783 unsigned BitOffset = EltOffset * NumSrcEltBits;
2784 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2785 if (!Sub.isZero()) {
2786 DemandedSrcBits |= Sub;
2787 for (unsigned j = 0; j != NumElts; ++j)
2788 if (DemandedElts[j])
2789 DemandedSrcElts.setBit((j * Scale) + i);
2790 }
2791 }
2792
2793 APInt KnownSrcUndef, KnownSrcZero;
2794 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2795 KnownSrcZero, TLO, Depth + 1))
2796 return true;
2797
2798 KnownBits KnownSrcBits;
2799 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2800 KnownSrcBits, TLO, Depth + 1))
2801 return true;
2802 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2803 // TODO - bigendian once we have test coverage.
2804 unsigned Scale = NumSrcEltBits / BitWidth;
2805 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2806 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2807 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2808 for (unsigned i = 0; i != NumElts; ++i)
2809 if (DemandedElts[i]) {
2810 unsigned Offset = (i % Scale) * BitWidth;
2811 DemandedSrcBits.insertBits(DemandedBits, Offset);
2812 DemandedSrcElts.setBit(i / Scale);
2813 }
2814
2815 if (SrcVT.isVector()) {
2816 APInt KnownSrcUndef, KnownSrcZero;
2817 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2818 KnownSrcZero, TLO, Depth + 1))
2819 return true;
2820 }
2821
2822 KnownBits KnownSrcBits;
2823 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2824 KnownSrcBits, TLO, Depth + 1))
2825 return true;
2826
2827 // Attempt to avoid multi-use ops if we don't need anything from them.
2828 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2829 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2830 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2831 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2832 return TLO.CombineTo(Op, NewOp);
2833 }
2834 }
2835 }
2836
2837 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2838 // recursive call where Known may be useful to the caller.
2839 if (Depth > 0) {
2840 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2841 return false;
2842 }
2843 break;
2844 }
2845 case ISD::MUL:
2846 if (DemandedBits.isPowerOf2()) {
2847 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2848 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2849 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2850 unsigned CTZ = DemandedBits.countr_zero();
2851 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2852 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2853 SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2854 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2855 return TLO.CombineTo(Op, Shl);
2856 }
2857 }
2858 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2859 // X * X is odd iff X is odd.
2860 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2861 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2862 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2863 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2864 return TLO.CombineTo(Op, And1);
2865 }
2866 [[fallthrough]];
2867 case ISD::PTRADD:
2868 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
2869 break;
2870 // PTRADD behaves like ADD if pointers are represented as integers.
2871 [[fallthrough]];
2872 case ISD::ADD:
2873 case ISD::SUB: {
2874 // Add, Sub, and Mul don't demand any bits in positions beyond that
2875 // of the highest bit demanded of them.
2876 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2877 SDNodeFlags Flags = Op.getNode()->getFlags();
2878 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2879 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2880 KnownBits KnownOp0, KnownOp1;
2881 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2882 const KnownBits &KnownRHS) {
2883 if (Op.getOpcode() == ISD::MUL)
2884 Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2885 return Demanded;
2886 };
2887 if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2888 Depth + 1) ||
2889 SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2890 DemandedElts, KnownOp0, TLO, Depth + 1) ||
2891 // See if the operation should be performed at a smaller bit width.
2893 // Disable the nsw and nuw flags. We can no longer guarantee that we
2894 // won't wrap after simplification.
2895 Op->dropFlags(SDNodeFlags::NoWrap);
2896 return true;
2897 }
2898
2899 // neg x with only low bit demanded is simply x.
2900 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2901 isNullConstant(Op0))
2902 return TLO.CombineTo(Op, Op1);
2903
2904 // Attempt to avoid multi-use ops if we don't need anything from them.
2905 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2907 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2909 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2910 if (DemandedOp0 || DemandedOp1) {
2911 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2912 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2913 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
2914 Flags & ~SDNodeFlags::NoWrap);
2915 return TLO.CombineTo(Op, NewOp);
2916 }
2917 }
2918
2919 // If we have a constant operand, we may be able to turn it into -1 if we
2920 // do not demand the high bits. This can make the constant smaller to
2921 // encode, allow more general folding, or match specialized instruction
2922 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2923 // is probably not useful (and could be detrimental).
2925 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2926 if (C && !C->isAllOnes() && !C->isOne() &&
2927 (C->getAPIntValue() | HighMask).isAllOnes()) {
2928 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2929 // Disable the nsw and nuw flags. We can no longer guarantee that we
2930 // won't wrap after simplification.
2931 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1,
2932 Flags & ~SDNodeFlags::NoWrap);
2933 return TLO.CombineTo(Op, NewOp);
2934 }
2935
2936 // Match a multiply with a disguised negated-power-of-2 and convert to a
2937 // an equivalent shift-left amount.
2938 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2939 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2940 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2941 return 0;
2942
2943 // Don't touch opaque constants. Also, ignore zero and power-of-2
2944 // multiplies. Those will get folded later.
2945 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2946 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2947 !MulC->getAPIntValue().isPowerOf2()) {
2948 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2949 if (UnmaskedC.isNegatedPowerOf2())
2950 return (-UnmaskedC).logBase2();
2951 }
2952 return 0;
2953 };
2954
2955 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2956 unsigned ShlAmt) {
2957 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2958 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2959 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2960 return TLO.CombineTo(Op, Res);
2961 };
2962
2964 if (Op.getOpcode() == ISD::ADD) {
2965 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2966 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2967 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2968 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2969 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2970 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2971 }
2972 if (Op.getOpcode() == ISD::SUB) {
2973 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2974 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2975 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2976 }
2977 }
2978
2979 if (Op.getOpcode() == ISD::MUL) {
2980 Known = KnownBits::mul(KnownOp0, KnownOp1);
2981 } else { // Op.getOpcode() is either ISD::ADD, ISD::PTRADD, or ISD::SUB.
2983 Op.getOpcode() != ISD::SUB, Flags.hasNoSignedWrap(),
2984 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2985 }
2986 break;
2987 }
2988 case ISD::FABS: {
2989 SDValue Op0 = Op.getOperand(0);
2990 APInt SignMask = APInt::getSignMask(BitWidth);
2991
2992 if (!DemandedBits.intersects(SignMask))
2993 return TLO.CombineTo(Op, Op0);
2994
2995 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
2996 Depth + 1))
2997 return true;
2998
2999 if (Known.isNonNegative())
3000 return TLO.CombineTo(Op, Op0);
3001 if (Known.isNegative())
3002 return TLO.CombineTo(
3003 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT, Op0, Op->getFlags()));
3004
3005 Known.Zero |= SignMask;
3006 Known.One &= ~SignMask;
3007
3008 break;
3009 }
3010 case ISD::FCOPYSIGN: {
3011 SDValue Op0 = Op.getOperand(0);
3012 SDValue Op1 = Op.getOperand(1);
3013
3014 unsigned BitWidth0 = Op0.getScalarValueSizeInBits();
3015 unsigned BitWidth1 = Op1.getScalarValueSizeInBits();
3016 APInt SignMask0 = APInt::getSignMask(BitWidth0);
3017 APInt SignMask1 = APInt::getSignMask(BitWidth1);
3018
3019 if (!DemandedBits.intersects(SignMask0))
3020 return TLO.CombineTo(Op, Op0);
3021
3022 if (SimplifyDemandedBits(Op0, ~SignMask0 & DemandedBits, DemandedElts,
3023 Known, TLO, Depth + 1) ||
3024 SimplifyDemandedBits(Op1, SignMask1, DemandedElts, Known2, TLO,
3025 Depth + 1))
3026 return true;
3027
3028 if (Known2.isNonNegative())
3029 return TLO.CombineTo(
3030 Op, TLO.DAG.getNode(ISD::FABS, dl, VT, Op0, Op->getFlags()));
3031
3032 if (Known2.isNegative())
3033 return TLO.CombineTo(
3034 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT,
3035 TLO.DAG.getNode(ISD::FABS, SDLoc(Op0), VT, Op0)));
3036
3037 Known.Zero &= ~SignMask0;
3038 Known.One &= ~SignMask0;
3039 break;
3040 }
3041 case ISD::FNEG: {
3042 SDValue Op0 = Op.getOperand(0);
3043 APInt SignMask = APInt::getSignMask(BitWidth);
3044
3045 if (!DemandedBits.intersects(SignMask))
3046 return TLO.CombineTo(Op, Op0);
3047
3048 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
3049 Depth + 1))
3050 return true;
3051
3052 if (!Known.isSignUnknown()) {
3053 Known.Zero ^= SignMask;
3054 Known.One ^= SignMask;
3055 }
3056
3057 break;
3058 }
3059 default:
3060 // We also ask the target about intrinsics (which could be specific to it).
3061 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3062 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3063 // TODO: Probably okay to remove after audit; here to reduce change size
3064 // in initial enablement patch for scalable vectors
3065 if (Op.getValueType().isScalableVector())
3066 break;
3068 Known, TLO, Depth))
3069 return true;
3070 break;
3071 }
3072
3073 // Just use computeKnownBits to compute output bits.
3074 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
3075 break;
3076 }
3077
3078 // If we know the value of all of the demanded bits, return this as a
3079 // constant.
3081 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
3082 // Avoid folding to a constant if any OpaqueConstant is involved.
3083 if (llvm::any_of(Op->ops(), [](SDValue V) {
3084 auto *C = dyn_cast<ConstantSDNode>(V);
3085 return C && C->isOpaque();
3086 }))
3087 return false;
3088 if (VT.isInteger())
3089 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
3090 if (VT.isFloatingPoint())
3091 return TLO.CombineTo(
3092 Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
3093 dl, VT));
3094 }
3095
3096 // A multi use 'all demanded elts' simplify failed to find any knownbits.
3097 // Try again just for the original demanded elts.
3098 // Ensure we do this AFTER constant folding above.
3099 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3100 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
3101
3102 return false;
3103}
3104
3106 const APInt &DemandedElts,
3107 DAGCombinerInfo &DCI) const {
3108 SelectionDAG &DAG = DCI.DAG;
3109 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3110 !DCI.isBeforeLegalizeOps());
3111
3112 APInt KnownUndef, KnownZero;
3113 bool Simplified =
3114 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
3115 if (Simplified) {
3116 DCI.AddToWorklist(Op.getNode());
3117 DCI.CommitTargetLoweringOpt(TLO);
3118 }
3119
3120 return Simplified;
3121}
3122
3123/// Given a vector binary operation and known undefined elements for each input
3124/// operand, compute whether each element of the output is undefined.
3126 const APInt &UndefOp0,
3127 const APInt &UndefOp1) {
3128 EVT VT = BO.getValueType();
3130 "Vector binop only");
3131
3132 EVT EltVT = VT.getVectorElementType();
3133 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3134 assert(UndefOp0.getBitWidth() == NumElts &&
3135 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3136
3137 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3138 const APInt &UndefVals) {
3139 if (UndefVals[Index])
3140 return DAG.getUNDEF(EltVT);
3141
3142 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3143 // Try hard to make sure that the getNode() call is not creating temporary
3144 // nodes. Ignore opaque integers because they do not constant fold.
3145 SDValue Elt = BV->getOperand(Index);
3146 auto *C = dyn_cast<ConstantSDNode>(Elt);
3147 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3148 return Elt;
3149 }
3150
3151 return SDValue();
3152 };
3153
3154 APInt KnownUndef = APInt::getZero(NumElts);
3155 for (unsigned i = 0; i != NumElts; ++i) {
3156 // If both inputs for this element are either constant or undef and match
3157 // the element type, compute the constant/undef result for this element of
3158 // the vector.
3159 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3160 // not handle FP constants. The code within getNode() should be refactored
3161 // to avoid the danger of creating a bogus temporary node here.
3162 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3163 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3164 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3165 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3166 KnownUndef.setBit(i);
3167 }
3168 return KnownUndef;
3169}
3170
3172 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3173 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3174 bool AssumeSingleUse) const {
3175 EVT VT = Op.getValueType();
3176 unsigned Opcode = Op.getOpcode();
3177 APInt DemandedElts = OriginalDemandedElts;
3178 unsigned NumElts = DemandedElts.getBitWidth();
3179 assert(VT.isVector() && "Expected vector op");
3180
3181 KnownUndef = KnownZero = APInt::getZero(NumElts);
3182
3184 return false;
3185
3186 // TODO: For now we assume we know nothing about scalable vectors.
3187 if (VT.isScalableVector())
3188 return false;
3189
3190 assert(VT.getVectorNumElements() == NumElts &&
3191 "Mask size mismatches value type element count!");
3192
3193 // Undef operand.
3194 if (Op.isUndef()) {
3195 KnownUndef.setAllBits();
3196 return false;
3197 }
3198
3199 // If Op has other users, assume that all elements are needed.
3200 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3201 DemandedElts.setAllBits();
3202
3203 // Not demanding any elements from Op.
3204 if (DemandedElts == 0) {
3205 KnownUndef.setAllBits();
3206 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3207 }
3208
3209 // Limit search depth.
3211 return false;
3212
3213 SDLoc DL(Op);
3214 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3215 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3216
3217 // Helper for demanding the specified elements and all the bits of both binary
3218 // operands.
3219 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3220 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3221 TLO.DAG, Depth + 1);
3222 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3223 TLO.DAG, Depth + 1);
3224 if (NewOp0 || NewOp1) {
3225 SDValue NewOp =
3226 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3227 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3228 return TLO.CombineTo(Op, NewOp);
3229 }
3230 return false;
3231 };
3232
3233 switch (Opcode) {
3234 case ISD::SCALAR_TO_VECTOR: {
3235 if (!DemandedElts[0]) {
3236 KnownUndef.setAllBits();
3237 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3238 }
3239 KnownUndef.setHighBits(NumElts - 1);
3240 break;
3241 }
3242 case ISD::BITCAST: {
3243 SDValue Src = Op.getOperand(0);
3244 EVT SrcVT = Src.getValueType();
3245
3246 if (!SrcVT.isVector()) {
3247 // TODO - bigendian once we have test coverage.
3248 if (IsLE) {
3249 APInt DemandedSrcBits = APInt::getZero(SrcVT.getSizeInBits());
3250 unsigned EltSize = VT.getScalarSizeInBits();
3251 for (unsigned I = 0; I != NumElts; ++I) {
3252 if (DemandedElts[I]) {
3253 unsigned Offset = I * EltSize;
3254 DemandedSrcBits.setBits(Offset, Offset + EltSize);
3255 }
3256 }
3257 KnownBits Known;
3258 if (SimplifyDemandedBits(Src, DemandedSrcBits, Known, TLO, Depth + 1))
3259 return true;
3260 }
3261 break;
3262 }
3263
3264 // Fast handling of 'identity' bitcasts.
3265 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3266 if (NumSrcElts == NumElts)
3267 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3268 KnownZero, TLO, Depth + 1);
3269
3270 APInt SrcDemandedElts, SrcZero, SrcUndef;
3271
3272 // Bitcast from 'large element' src vector to 'small element' vector, we
3273 // must demand a source element if any DemandedElt maps to it.
3274 if ((NumElts % NumSrcElts) == 0) {
3275 unsigned Scale = NumElts / NumSrcElts;
3276 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3277 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3278 TLO, Depth + 1))
3279 return true;
3280
3281 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3282 // of the large element.
3283 // TODO - bigendian once we have test coverage.
3284 if (IsLE) {
3285 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3286 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3287 for (unsigned i = 0; i != NumElts; ++i)
3288 if (DemandedElts[i]) {
3289 unsigned Ofs = (i % Scale) * EltSizeInBits;
3290 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3291 }
3292
3293 KnownBits Known;
3294 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3295 TLO, Depth + 1))
3296 return true;
3297
3298 // The bitcast has split each wide element into a number of
3299 // narrow subelements. We have just computed the Known bits
3300 // for wide elements. See if element splitting results in
3301 // some subelements being zero. Only for demanded elements!
3302 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3303 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3304 .isAllOnes())
3305 continue;
3306 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3307 unsigned Elt = Scale * SrcElt + SubElt;
3308 if (DemandedElts[Elt])
3309 KnownZero.setBit(Elt);
3310 }
3311 }
3312 }
3313
3314 // If the src element is zero/undef then all the output elements will be -
3315 // only demanded elements are guaranteed to be correct.
3316 for (unsigned i = 0; i != NumSrcElts; ++i) {
3317 if (SrcDemandedElts[i]) {
3318 if (SrcZero[i])
3319 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3320 if (SrcUndef[i])
3321 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3322 }
3323 }
3324 }
3325
3326 // Bitcast from 'small element' src vector to 'large element' vector, we
3327 // demand all smaller source elements covered by the larger demanded element
3328 // of this vector.
3329 if ((NumSrcElts % NumElts) == 0) {
3330 unsigned Scale = NumSrcElts / NumElts;
3331 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3332 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3333 TLO, Depth + 1))
3334 return true;
3335
3336 // If all the src elements covering an output element are zero/undef, then
3337 // the output element will be as well, assuming it was demanded.
3338 for (unsigned i = 0; i != NumElts; ++i) {
3339 if (DemandedElts[i]) {
3340 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3341 KnownZero.setBit(i);
3342 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3343 KnownUndef.setBit(i);
3344 }
3345 }
3346 }
3347 break;
3348 }
3349 case ISD::FREEZE: {
3350 SDValue N0 = Op.getOperand(0);
3351 if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
3352 /*PoisonOnly=*/false,
3353 Depth + 1))
3354 return TLO.CombineTo(Op, N0);
3355
3356 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3357 // freeze(op(x, ...)) -> op(freeze(x), ...).
3358 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3359 return TLO.CombineTo(
3361 TLO.DAG.getFreeze(N0.getOperand(0))));
3362 break;
3363 }
3364 case ISD::BUILD_VECTOR: {
3365 // Check all elements and simplify any unused elements with UNDEF.
3366 if (!DemandedElts.isAllOnes()) {
3367 // Don't simplify BROADCASTS.
3368 if (llvm::any_of(Op->op_values(),
3369 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3371 bool Updated = false;
3372 for (unsigned i = 0; i != NumElts; ++i) {
3373 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3374 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3375 KnownUndef.setBit(i);
3376 Updated = true;
3377 }
3378 }
3379 if (Updated)
3380 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3381 }
3382 }
3383 for (unsigned i = 0; i != NumElts; ++i) {
3384 SDValue SrcOp = Op.getOperand(i);
3385 if (SrcOp.isUndef()) {
3386 KnownUndef.setBit(i);
3387 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3389 KnownZero.setBit(i);
3390 }
3391 }
3392 break;
3393 }
3394 case ISD::CONCAT_VECTORS: {
3395 EVT SubVT = Op.getOperand(0).getValueType();
3396 unsigned NumSubVecs = Op.getNumOperands();
3397 unsigned NumSubElts = SubVT.getVectorNumElements();
3398 for (unsigned i = 0; i != NumSubVecs; ++i) {
3399 SDValue SubOp = Op.getOperand(i);
3400 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3401 APInt SubUndef, SubZero;
3402 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3403 Depth + 1))
3404 return true;
3405 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3406 KnownZero.insertBits(SubZero, i * NumSubElts);
3407 }
3408
3409 // Attempt to avoid multi-use ops if we don't need anything from them.
3410 if (!DemandedElts.isAllOnes()) {
3411 bool FoundNewSub = false;
3412 SmallVector<SDValue, 2> DemandedSubOps;
3413 for (unsigned i = 0; i != NumSubVecs; ++i) {
3414 SDValue SubOp = Op.getOperand(i);
3415 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3417 SubOp, SubElts, TLO.DAG, Depth + 1);
3418 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3419 FoundNewSub = NewSubOp ? true : FoundNewSub;
3420 }
3421 if (FoundNewSub) {
3422 SDValue NewOp =
3423 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3424 return TLO.CombineTo(Op, NewOp);
3425 }
3426 }
3427 break;
3428 }
3429 case ISD::INSERT_SUBVECTOR: {
3430 // Demand any elements from the subvector and the remainder from the src it
3431 // is inserted into.
3432 SDValue Src = Op.getOperand(0);
3433 SDValue Sub = Op.getOperand(1);
3434 uint64_t Idx = Op.getConstantOperandVal(2);
3435 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3436 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3437 APInt DemandedSrcElts = DemandedElts;
3438 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
3439
3440 // If none of the sub operand elements are demanded, bypass the insert.
3441 if (!DemandedSubElts)
3442 return TLO.CombineTo(Op, Src);
3443
3444 APInt SubUndef, SubZero;
3445 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3446 Depth + 1))
3447 return true;
3448
3449 // If none of the src operand elements are demanded, replace it with undef.
3450 if (!DemandedSrcElts && !Src.isUndef())
3451 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3452 TLO.DAG.getUNDEF(VT), Sub,
3453 Op.getOperand(2)));
3454
3455 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3456 TLO, Depth + 1))
3457 return true;
3458 KnownUndef.insertBits(SubUndef, Idx);
3459 KnownZero.insertBits(SubZero, Idx);
3460
3461 // Attempt to avoid multi-use ops if we don't need anything from them.
3462 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3464 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3466 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3467 if (NewSrc || NewSub) {
3468 NewSrc = NewSrc ? NewSrc : Src;
3469 NewSub = NewSub ? NewSub : Sub;
3470 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3471 NewSub, Op.getOperand(2));
3472 return TLO.CombineTo(Op, NewOp);
3473 }
3474 }
3475 break;
3476 }
3478 // Offset the demanded elts by the subvector index.
3479 SDValue Src = Op.getOperand(0);
3480 if (Src.getValueType().isScalableVector())
3481 break;
3482 uint64_t Idx = Op.getConstantOperandVal(1);
3483 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3484 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3485
3486 APInt SrcUndef, SrcZero;
3487 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3488 Depth + 1))
3489 return true;
3490 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3491 KnownZero = SrcZero.extractBits(NumElts, Idx);
3492
3493 // Attempt to avoid multi-use ops if we don't need anything from them.
3494 if (!DemandedElts.isAllOnes()) {
3496 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3497 if (NewSrc) {
3498 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3499 Op.getOperand(1));
3500 return TLO.CombineTo(Op, NewOp);
3501 }
3502 }
3503 break;
3504 }
3506 SDValue Vec = Op.getOperand(0);
3507 SDValue Scl = Op.getOperand(1);
3508 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3509
3510 // For a legal, constant insertion index, if we don't need this insertion
3511 // then strip it, else remove it from the demanded elts.
3512 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3513 unsigned Idx = CIdx->getZExtValue();
3514 if (!DemandedElts[Idx])
3515 return TLO.CombineTo(Op, Vec);
3516
3517 APInt DemandedVecElts(DemandedElts);
3518 DemandedVecElts.clearBit(Idx);
3519 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3520 KnownZero, TLO, Depth + 1))
3521 return true;
3522
3523 KnownUndef.setBitVal(Idx, Scl.isUndef());
3524
3525 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3526 break;
3527 }
3528
3529 APInt VecUndef, VecZero;
3530 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3531 Depth + 1))
3532 return true;
3533 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3534 break;
3535 }
3536 case ISD::VSELECT: {
3537 SDValue Sel = Op.getOperand(0);
3538 SDValue LHS = Op.getOperand(1);
3539 SDValue RHS = Op.getOperand(2);
3540
3541 // Try to transform the select condition based on the current demanded
3542 // elements.
3543 APInt UndefSel, ZeroSel;
3544 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3545 Depth + 1))
3546 return true;
3547
3548 // See if we can simplify either vselect operand.
3549 APInt DemandedLHS(DemandedElts);
3550 APInt DemandedRHS(DemandedElts);
3551 APInt UndefLHS, ZeroLHS;
3552 APInt UndefRHS, ZeroRHS;
3553 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3554 Depth + 1))
3555 return true;
3556 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3557 Depth + 1))
3558 return true;
3559
3560 KnownUndef = UndefLHS & UndefRHS;
3561 KnownZero = ZeroLHS & ZeroRHS;
3562
3563 // If we know that the selected element is always zero, we don't need the
3564 // select value element.
3565 APInt DemandedSel = DemandedElts & ~KnownZero;
3566 if (DemandedSel != DemandedElts)
3567 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3568 Depth + 1))
3569 return true;
3570
3571 break;
3572 }
3573 case ISD::VECTOR_SHUFFLE: {
3574 SDValue LHS = Op.getOperand(0);
3575 SDValue RHS = Op.getOperand(1);
3576 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3577
3578 // Collect demanded elements from shuffle operands..
3579 APInt DemandedLHS(NumElts, 0);
3580 APInt DemandedRHS(NumElts, 0);
3581 for (unsigned i = 0; i != NumElts; ++i) {
3582 int M = ShuffleMask[i];
3583 if (M < 0 || !DemandedElts[i])
3584 continue;
3585 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3586 if (M < (int)NumElts)
3587 DemandedLHS.setBit(M);
3588 else
3589 DemandedRHS.setBit(M - NumElts);
3590 }
3591
3592 // If either side isn't demanded, replace it by UNDEF. We handle this
3593 // explicitly here to also simplify in case of multiple uses (on the
3594 // contrary to the SimplifyDemandedVectorElts calls below).
3595 bool FoldLHS = !DemandedLHS && !LHS.isUndef();
3596 bool FoldRHS = !DemandedRHS && !RHS.isUndef();
3597 if (FoldLHS || FoldRHS) {
3598 LHS = FoldLHS ? TLO.DAG.getUNDEF(LHS.getValueType()) : LHS;
3599 RHS = FoldRHS ? TLO.DAG.getUNDEF(RHS.getValueType()) : RHS;
3600 SDValue NewOp =
3601 TLO.DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, ShuffleMask);
3602 return TLO.CombineTo(Op, NewOp);
3603 }
3604
3605 // See if we can simplify either shuffle operand.
3606 APInt UndefLHS, ZeroLHS;
3607 APInt UndefRHS, ZeroRHS;
3608 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3609 Depth + 1))
3610 return true;
3611 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3612 Depth + 1))
3613 return true;
3614
3615 // Simplify mask using undef elements from LHS/RHS.
3616 bool Updated = false;
3617 bool IdentityLHS = true, IdentityRHS = true;
3618 SmallVector<int, 32> NewMask(ShuffleMask);
3619 for (unsigned i = 0; i != NumElts; ++i) {
3620 int &M = NewMask[i];
3621 if (M < 0)
3622 continue;
3623 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3624 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3625 Updated = true;
3626 M = -1;
3627 }
3628 IdentityLHS &= (M < 0) || (M == (int)i);
3629 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3630 }
3631
3632 // Update legal shuffle masks based on demanded elements if it won't reduce
3633 // to Identity which can cause premature removal of the shuffle mask.
3634 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3635 SDValue LegalShuffle =
3636 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3637 if (LegalShuffle)
3638 return TLO.CombineTo(Op, LegalShuffle);
3639 }
3640
3641 // Propagate undef/zero elements from LHS/RHS.
3642 for (unsigned i = 0; i != NumElts; ++i) {
3643 int M = ShuffleMask[i];
3644 if (M < 0) {
3645 KnownUndef.setBit(i);
3646 } else if (M < (int)NumElts) {
3647 if (UndefLHS[M])
3648 KnownUndef.setBit(i);
3649 if (ZeroLHS[M])
3650 KnownZero.setBit(i);
3651 } else {
3652 if (UndefRHS[M - NumElts])
3653 KnownUndef.setBit(i);
3654 if (ZeroRHS[M - NumElts])
3655 KnownZero.setBit(i);
3656 }
3657 }
3658 break;
3659 }
3663 APInt SrcUndef, SrcZero;
3664 SDValue Src = Op.getOperand(0);
3665 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3666 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3667 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3668 Depth + 1))
3669 return true;
3670 KnownZero = SrcZero.zextOrTrunc(NumElts);
3671 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3672
3673 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3674 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3675 DemandedSrcElts == 1) {
3676 // aext - if we just need the bottom element then we can bitcast.
3677 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3678 }
3679
3680 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3681 // zext(undef) upper bits are guaranteed to be zero.
3682 if (DemandedElts.isSubsetOf(KnownUndef))
3683 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3684 KnownUndef.clearAllBits();
3685
3686 // zext - if we just need the bottom element then we can mask:
3687 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3688 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3689 Op->isOnlyUserOf(Src.getNode()) &&
3690 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3691 SDLoc DL(Op);
3692 EVT SrcVT = Src.getValueType();
3693 EVT SrcSVT = SrcVT.getScalarType();
3694 SmallVector<SDValue> MaskElts;
3695 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3696 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3697 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3698 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3699 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3700 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3701 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3702 }
3703 }
3704 }
3705 break;
3706 }
3707
3708 // TODO: There are more binop opcodes that could be handled here - MIN,
3709 // MAX, saturated math, etc.
3710 case ISD::ADD: {
3711 SDValue Op0 = Op.getOperand(0);
3712 SDValue Op1 = Op.getOperand(1);
3713 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3714 APInt UndefLHS, ZeroLHS;
3715 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3716 Depth + 1, /*AssumeSingleUse*/ true))
3717 return true;
3718 }
3719 [[fallthrough]];
3720 }
3721 case ISD::AVGCEILS:
3722 case ISD::AVGCEILU:
3723 case ISD::AVGFLOORS:
3724 case ISD::AVGFLOORU:
3725 case ISD::OR:
3726 case ISD::XOR:
3727 case ISD::SUB:
3728 case ISD::FADD:
3729 case ISD::FSUB:
3730 case ISD::FMUL:
3731 case ISD::FDIV:
3732 case ISD::FREM: {
3733 SDValue Op0 = Op.getOperand(0);
3734 SDValue Op1 = Op.getOperand(1);
3735
3736 APInt UndefRHS, ZeroRHS;
3737 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3738 Depth + 1))
3739 return true;
3740 APInt UndefLHS, ZeroLHS;
3741 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3742 Depth + 1))
3743 return true;
3744
3745 KnownZero = ZeroLHS & ZeroRHS;
3746 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3747
3748 // Attempt to avoid multi-use ops if we don't need anything from them.
3749 // TODO - use KnownUndef to relax the demandedelts?
3750 if (!DemandedElts.isAllOnes())
3751 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3752 return true;
3753 break;
3754 }
3755 case ISD::SHL:
3756 case ISD::SRL:
3757 case ISD::SRA:
3758 case ISD::ROTL:
3759 case ISD::ROTR: {
3760 SDValue Op0 = Op.getOperand(0);
3761 SDValue Op1 = Op.getOperand(1);
3762
3763 APInt UndefRHS, ZeroRHS;
3764 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3765 Depth + 1))
3766 return true;
3767 APInt UndefLHS, ZeroLHS;
3768 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3769 Depth + 1))
3770 return true;
3771
3772 KnownZero = ZeroLHS;
3773 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3774
3775 // Attempt to avoid multi-use ops if we don't need anything from them.
3776 // TODO - use KnownUndef to relax the demandedelts?
3777 if (!DemandedElts.isAllOnes())
3778 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3779 return true;
3780 break;
3781 }
3782 case ISD::MUL:
3783 case ISD::MULHU:
3784 case ISD::MULHS:
3785 case ISD::AND: {
3786 SDValue Op0 = Op.getOperand(0);
3787 SDValue Op1 = Op.getOperand(1);
3788
3789 APInt SrcUndef, SrcZero;
3790 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3791 Depth + 1))
3792 return true;
3793 // If we know that a demanded element was zero in Op1 we don't need to
3794 // demand it in Op0 - its guaranteed to be zero.
3795 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3796 if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3797 TLO, Depth + 1))
3798 return true;
3799
3800 KnownUndef &= DemandedElts0;
3801 KnownZero &= DemandedElts0;
3802
3803 // If every element pair has a zero/undef then just fold to zero.
3804 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3805 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3806 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3807 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3808
3809 // If either side has a zero element, then the result element is zero, even
3810 // if the other is an UNDEF.
3811 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3812 // and then handle 'and' nodes with the rest of the binop opcodes.
3813 KnownZero |= SrcZero;
3814 KnownUndef &= SrcUndef;
3815 KnownUndef &= ~KnownZero;
3816
3817 // Attempt to avoid multi-use ops if we don't need anything from them.
3818 if (!DemandedElts.isAllOnes())
3819 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3820 return true;
3821 break;
3822 }
3823 case ISD::TRUNCATE:
3824 case ISD::SIGN_EXTEND:
3825 case ISD::ZERO_EXTEND:
3826 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3827 KnownZero, TLO, Depth + 1))
3828 return true;
3829
3830 if (!DemandedElts.isAllOnes())
3832 Op.getOperand(0), DemandedElts, TLO.DAG, Depth + 1))
3833 return TLO.CombineTo(Op, TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp));
3834
3835 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3836 // zext(undef) upper bits are guaranteed to be zero.
3837 if (DemandedElts.isSubsetOf(KnownUndef))
3838 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3839 KnownUndef.clearAllBits();
3840 }
3841 break;
3842 case ISD::SINT_TO_FP:
3843 case ISD::UINT_TO_FP:
3844 case ISD::FP_TO_SINT:
3845 case ISD::FP_TO_UINT:
3846 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3847 KnownZero, TLO, Depth + 1))
3848 return true;
3849 // Don't fall through to generic undef -> undef handling.
3850 return false;
3851 default: {
3852 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3853 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3854 KnownZero, TLO, Depth))
3855 return true;
3856 } else {
3857 KnownBits Known;
3858 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3859 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3860 TLO, Depth, AssumeSingleUse))
3861 return true;
3862 }
3863 break;
3864 }
3865 }
3866 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3867
3868 // Constant fold all undef cases.
3869 // TODO: Handle zero cases as well.
3870 if (DemandedElts.isSubsetOf(KnownUndef))
3871 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3872
3873 return false;
3874}
3875
3876/// Determine which of the bits specified in Mask are known to be either zero or
3877/// one and return them in the Known.
3879 KnownBits &Known,
3880 const APInt &DemandedElts,
3881 const SelectionDAG &DAG,
3882 unsigned Depth) const {
3883 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3884 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3885 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3886 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3887 "Should use MaskedValueIsZero if you don't know whether Op"
3888 " is a target node!");
3889 Known.resetAll();
3890}
3891
3894 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3895 unsigned Depth) const {
3896 Known.resetAll();
3897}
3898
3901 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3902 unsigned Depth) const {
3903 Known.resetAll();
3904}
3905
3907 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3908 // The low bits are known zero if the pointer is aligned.
3909 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3910}
3911
3917
3918/// This method can be implemented by targets that want to expose additional
3919/// information about sign bits to the DAG Combiner.
3921 const APInt &,
3922 const SelectionDAG &,
3923 unsigned Depth) const {
3924 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3925 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3926 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3927 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3928 "Should use ComputeNumSignBits if you don't know whether Op"
3929 " is a target node!");
3930 return 1;
3931}
3932
3934 GISelValueTracking &Analysis, Register R, const APInt &DemandedElts,
3935 const MachineRegisterInfo &MRI, unsigned Depth) const {
3936 return 1;
3937}
3938
3940 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3941 TargetLoweringOpt &TLO, unsigned Depth) const {
3942 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3943 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3944 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3945 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3946 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3947 " is a target node!");
3948 return false;
3949}
3950
3952 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3953 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3954 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3955 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3956 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3957 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3958 "Should use SimplifyDemandedBits if you don't know whether Op"
3959 " is a target node!");
3960 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3961 return false;
3962}
3963
3965 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3966 SelectionDAG &DAG, unsigned Depth) const {
3967 assert(
3968 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3969 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3970 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3971 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3972 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3973 " is a target node!");
3974 return SDValue();
3975}
3976
3977SDValue
3980 SelectionDAG &DAG) const {
3981 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3982 if (!LegalMask) {
3983 std::swap(N0, N1);
3985 LegalMask = isShuffleMaskLegal(Mask, VT);
3986 }
3987
3988 if (!LegalMask)
3989 return SDValue();
3990
3991 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3992}
3993
3995 return nullptr;
3996}
3997
3999 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4000 bool PoisonOnly, unsigned Depth) const {
4001 assert(
4002 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4003 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4004 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4005 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4006 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
4007 " is a target node!");
4008
4009 // If Op can't create undef/poison and none of its operands are undef/poison
4010 // then Op is never undef/poison.
4011 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
4012 /*ConsiderFlags*/ true, Depth) &&
4013 all_of(Op->ops(), [&](SDValue V) {
4014 return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
4015 Depth + 1);
4016 });
4017}
4018
4020 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4021 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
4022 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4023 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4024 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4025 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4026 "Should use canCreateUndefOrPoison if you don't know whether Op"
4027 " is a target node!");
4028 // Be conservative and return true.
4029 return true;
4030}
4031
4033 const APInt &DemandedElts,
4034 const SelectionDAG &DAG,
4035 bool SNaN,
4036 unsigned Depth) const {
4037 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4038 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4039 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4040 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4041 "Should use isKnownNeverNaN if you don't know whether Op"
4042 " is a target node!");
4043 return false;
4044}
4045
4047 const APInt &DemandedElts,
4048 APInt &UndefElts,
4049 const SelectionDAG &DAG,
4050 unsigned Depth) const {
4051 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4052 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4053 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4054 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4055 "Should use isSplatValue if you don't know whether Op"
4056 " is a target node!");
4057 return false;
4058}
4059
4060// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
4061// work with truncating build vectors and vectors with elements of less than
4062// 8 bits.
4064 if (!N)
4065 return false;
4066
4067 unsigned EltWidth;
4068 APInt CVal;
4069 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
4070 /*AllowTruncation=*/true)) {
4071 CVal = CN->getAPIntValue();
4072 EltWidth = N.getValueType().getScalarSizeInBits();
4073 } else
4074 return false;
4075
4076 // If this is a truncating splat, truncate the splat value.
4077 // Otherwise, we may fail to match the expected values below.
4078 if (EltWidth < CVal.getBitWidth())
4079 CVal = CVal.trunc(EltWidth);
4080
4081 switch (getBooleanContents(N.getValueType())) {
4083 return CVal[0];
4085 return CVal.isOne();
4087 return CVal.isAllOnes();
4088 }
4089
4090 llvm_unreachable("Invalid boolean contents");
4091}
4092
4094 if (!N)
4095 return false;
4096
4098 if (!CN) {
4100 if (!BV)
4101 return false;
4102
4103 // Only interested in constant splats, we don't care about undef
4104 // elements in identifying boolean constants and getConstantSplatNode
4105 // returns NULL if all ops are undef;
4106 CN = BV->getConstantSplatNode();
4107 if (!CN)
4108 return false;
4109 }
4110
4111 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
4112 return !CN->getAPIntValue()[0];
4113
4114 return CN->isZero();
4115}
4116
4118 bool SExt) const {
4119 if (VT == MVT::i1)
4120 return N->isOne();
4121
4123 switch (Cnt) {
4125 // An extended value of 1 is always true, unless its original type is i1,
4126 // in which case it will be sign extended to -1.
4127 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
4130 return N->isAllOnes() && SExt;
4131 }
4132 llvm_unreachable("Unexpected enumeration.");
4133}
4134
4135/// This helper function of SimplifySetCC tries to optimize the comparison when
4136/// either operand of the SetCC node is a bitwise-and instruction.
4137SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4138 ISD::CondCode Cond, const SDLoc &DL,
4139 DAGCombinerInfo &DCI) const {
4140 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4141 std::swap(N0, N1);
4142
4143 SelectionDAG &DAG = DCI.DAG;
4144 EVT OpVT = N0.getValueType();
4145 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
4146 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4147 return SDValue();
4148
4149 // (X & Y) != 0 --> zextOrTrunc(X & Y)
4150 // iff everything but LSB is known zero:
4151 if (Cond == ISD::SETNE && isNullConstant(N1) &&
4154 unsigned NumEltBits = OpVT.getScalarSizeInBits();
4155 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
4156 if (DAG.MaskedValueIsZero(N0, UpperBits))
4157 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
4158 }
4159
4160 // Try to eliminate a power-of-2 mask constant by converting to a signbit
4161 // test in a narrow type that we can truncate to with no cost. Examples:
4162 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4163 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4164 // TODO: This conservatively checks for type legality on the source and
4165 // destination types. That may inhibit optimizations, but it also
4166 // allows setcc->shift transforms that may be more beneficial.
4167 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4168 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4169 isTypeLegal(OpVT) && N0.hasOneUse()) {
4170 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4171 AndC->getAPIntValue().getActiveBits());
4172 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4173 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4174 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4175 return DAG.getSetCC(DL, VT, Trunc, Zero,
4177 }
4178 }
4179
4180 // Match these patterns in any of their permutations:
4181 // (X & Y) == Y
4182 // (X & Y) != Y
4183 SDValue X, Y;
4184 if (N0.getOperand(0) == N1) {
4185 X = N0.getOperand(1);
4186 Y = N0.getOperand(0);
4187 } else if (N0.getOperand(1) == N1) {
4188 X = N0.getOperand(0);
4189 Y = N0.getOperand(1);
4190 } else {
4191 return SDValue();
4192 }
4193
4194 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4195 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4196 // its liable to create and infinite loop.
4197 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4198 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4200 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4201 // Note that where Y is variable and is known to have at most one bit set
4202 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4203 // equivalent when Y == 0.
4204 assert(OpVT.isInteger());
4206 if (DCI.isBeforeLegalizeOps() ||
4208 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4209 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4210 // If the target supports an 'and-not' or 'and-complement' logic operation,
4211 // try to use that to make a comparison operation more efficient.
4212 // But don't do this transform if the mask is a single bit because there are
4213 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4214 // 'rlwinm' on PPC).
4215
4216 // Bail out if the compare operand that we want to turn into a zero is
4217 // already a zero (otherwise, infinite loop).
4218 if (isNullConstant(Y))
4219 return SDValue();
4220
4221 // Transform this into: ~X & Y == 0.
4222 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4223 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4224 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4225 }
4226
4227 return SDValue();
4228}
4229
4230/// This helper function of SimplifySetCC tries to optimize the comparison when
4231/// either operand of the SetCC node is a bitwise-or instruction.
4232/// For now, this just transforms (X | Y) ==/!= Y into X & ~Y ==/!= 0.
4233SDValue TargetLowering::foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1,
4234 ISD::CondCode Cond, const SDLoc &DL,
4235 DAGCombinerInfo &DCI) const {
4236 if (N1.getOpcode() == ISD::OR && N0.getOpcode() != ISD::OR)
4237 std::swap(N0, N1);
4238
4239 SelectionDAG &DAG = DCI.DAG;
4240 EVT OpVT = N0.getValueType();
4241 if (!N0.hasOneUse() || !OpVT.isInteger() ||
4242 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4243 return SDValue();
4244
4245 // (X | Y) == Y
4246 // (X | Y) != Y
4247 SDValue X;
4248 if (sd_match(N0, m_Or(m_Value(X), m_Specific(N1))) && hasAndNotCompare(X)) {
4249 // If the target supports an 'and-not' or 'and-complement' logic operation,
4250 // try to use that to make a comparison operation more efficient.
4251
4252 // Bail out if the compare operand that we want to turn into a zero is
4253 // already a zero (otherwise, infinite loop).
4254 if (isNullConstant(N1))
4255 return SDValue();
4256
4257 // Transform this into: X & ~Y ==/!= 0.
4258 SDValue NotY = DAG.getNOT(SDLoc(N1), N1, OpVT);
4259 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, X, NotY);
4260 return DAG.getSetCC(DL, VT, NewAnd, DAG.getConstant(0, DL, OpVT), Cond);
4261 }
4262
4263 return SDValue();
4264}
4265
4266/// There are multiple IR patterns that could be checking whether certain
4267/// truncation of a signed number would be lossy or not. The pattern which is
4268/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4269/// We are looking for the following pattern: (KeptBits is a constant)
4270/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4271/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4272/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4273/// We will unfold it into the natural trunc+sext pattern:
4274/// ((%x << C) a>> C) dstcond %x
4275/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4276SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4277 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4278 const SDLoc &DL) const {
4279 // We must be comparing with a constant.
4280 ConstantSDNode *C1;
4281 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4282 return SDValue();
4283
4284 // N0 should be: add %x, (1 << (KeptBits-1))
4285 if (N0->getOpcode() != ISD::ADD)
4286 return SDValue();
4287
4288 // And we must be 'add'ing a constant.
4289 ConstantSDNode *C01;
4290 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4291 return SDValue();
4292
4293 SDValue X = N0->getOperand(0);
4294 EVT XVT = X.getValueType();
4295
4296 // Validate constants ...
4297
4298 APInt I1 = C1->getAPIntValue();
4299
4300 ISD::CondCode NewCond;
4301 if (Cond == ISD::CondCode::SETULT) {
4302 NewCond = ISD::CondCode::SETEQ;
4303 } else if (Cond == ISD::CondCode::SETULE) {
4304 NewCond = ISD::CondCode::SETEQ;
4305 // But need to 'canonicalize' the constant.
4306 I1 += 1;
4307 } else if (Cond == ISD::CondCode::SETUGT) {
4308 NewCond = ISD::CondCode::SETNE;
4309 // But need to 'canonicalize' the constant.
4310 I1 += 1;
4311 } else if (Cond == ISD::CondCode::SETUGE) {
4312 NewCond = ISD::CondCode::SETNE;
4313 } else
4314 return SDValue();
4315
4316 APInt I01 = C01->getAPIntValue();
4317
4318 auto checkConstants = [&I1, &I01]() -> bool {
4319 // Both of them must be power-of-two, and the constant from setcc is bigger.
4320 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4321 };
4322
4323 if (checkConstants()) {
4324 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4325 } else {
4326 // What if we invert constants? (and the target predicate)
4327 I1.negate();
4328 I01.negate();
4329 assert(XVT.isInteger());
4330 NewCond = getSetCCInverse(NewCond, XVT);
4331 if (!checkConstants())
4332 return SDValue();
4333 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4334 }
4335
4336 // They are power-of-two, so which bit is set?
4337 const unsigned KeptBits = I1.logBase2();
4338 const unsigned KeptBitsMinusOne = I01.logBase2();
4339
4340 // Magic!
4341 if (KeptBits != (KeptBitsMinusOne + 1))
4342 return SDValue();
4343 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4344
4345 // We don't want to do this in every single case.
4346 SelectionDAG &DAG = DCI.DAG;
4347 if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4348 return SDValue();
4349
4350 // Unfold into: sext_inreg(%x) cond %x
4351 // Where 'cond' will be either 'eq' or 'ne'.
4352 SDValue SExtInReg = DAG.getNode(
4354 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4355 return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4356}
4357
4358// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4359SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4360 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4361 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4363 "Should be a comparison with 0.");
4364 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4365 "Valid only for [in]equality comparisons.");
4366
4367 unsigned NewShiftOpcode;
4368 SDValue X, C, Y;
4369
4370 SelectionDAG &DAG = DCI.DAG;
4371
4372 // Look for '(C l>>/<< Y)'.
4373 auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4374 // The shift should be one-use.
4375 if (!V.hasOneUse())
4376 return false;
4377 unsigned OldShiftOpcode = V.getOpcode();
4378 switch (OldShiftOpcode) {
4379 case ISD::SHL:
4380 NewShiftOpcode = ISD::SRL;
4381 break;
4382 case ISD::SRL:
4383 NewShiftOpcode = ISD::SHL;
4384 break;
4385 default:
4386 return false; // must be a logical shift.
4387 }
4388 // We should be shifting a constant.
4389 // FIXME: best to use isConstantOrConstantVector().
4390 C = V.getOperand(0);
4391 ConstantSDNode *CC =
4392 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4393 if (!CC)
4394 return false;
4395 Y = V.getOperand(1);
4396
4397 ConstantSDNode *XC =
4398 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4400 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4401 };
4402
4403 // LHS of comparison should be an one-use 'and'.
4404 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4405 return SDValue();
4406
4407 X = N0.getOperand(0);
4408 SDValue Mask = N0.getOperand(1);
4409
4410 // 'and' is commutative!
4411 if (!Match(Mask)) {
4412 std::swap(X, Mask);
4413 if (!Match(Mask))
4414 return SDValue();
4415 }
4416
4417 EVT VT = X.getValueType();
4418
4419 // Produce:
4420 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4421 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4422 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4423 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4424 return T2;
4425}
4426
4427/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4428/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4429/// handle the commuted versions of these patterns.
4430SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4431 ISD::CondCode Cond, const SDLoc &DL,
4432 DAGCombinerInfo &DCI) const {
4433 unsigned BOpcode = N0.getOpcode();
4434 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4435 "Unexpected binop");
4436 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4437
4438 // (X + Y) == X --> Y == 0
4439 // (X - Y) == X --> Y == 0
4440 // (X ^ Y) == X --> Y == 0
4441 SelectionDAG &DAG = DCI.DAG;
4442 EVT OpVT = N0.getValueType();
4443 SDValue X = N0.getOperand(0);
4444 SDValue Y = N0.getOperand(1);
4445 if (X == N1)
4446 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4447
4448 if (Y != N1)
4449 return SDValue();
4450
4451 // (X + Y) == Y --> X == 0
4452 // (X ^ Y) == Y --> X == 0
4453 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4454 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4455
4456 // The shift would not be valid if the operands are boolean (i1).
4457 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4458 return SDValue();
4459
4460 // (X - Y) == Y --> X == Y << 1
4461 SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4462 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4463 if (!DCI.isCalledByLegalizer())
4464 DCI.AddToWorklist(YShl1.getNode());
4465 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4466}
4467
4469 SDValue N0, const APInt &C1,
4470 ISD::CondCode Cond, const SDLoc &dl,
4471 SelectionDAG &DAG) {
4472 // Look through truncs that don't change the value of a ctpop.
4473 // FIXME: Add vector support? Need to be careful with setcc result type below.
4474 SDValue CTPOP = N0;
4475 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4477 CTPOP = N0.getOperand(0);
4478
4479 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4480 return SDValue();
4481
4482 EVT CTVT = CTPOP.getValueType();
4483 SDValue CTOp = CTPOP.getOperand(0);
4484
4485 // Expand a power-of-2-or-zero comparison based on ctpop:
4486 // (ctpop x) u< 2 -> (x & x-1) == 0
4487 // (ctpop x) u> 1 -> (x & x-1) != 0
4488 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4489 // Keep the CTPOP if it is a cheap vector op.
4490 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4491 return SDValue();
4492
4493 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4494 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4495 return SDValue();
4496 if (C1 == 0 && (Cond == ISD::SETULT))
4497 return SDValue(); // This is handled elsewhere.
4498
4499 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4500
4501 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4502 SDValue Result = CTOp;
4503 for (unsigned i = 0; i < Passes; i++) {
4504 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4505 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4506 }
4508 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4509 }
4510
4511 // Expand a power-of-2 comparison based on ctpop
4512 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4513 // Keep the CTPOP if it is cheap.
4514 if (TLI.isCtpopFast(CTVT))
4515 return SDValue();
4516
4517 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4518 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4519 assert(CTVT.isInteger());
4520 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4521
4522 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4523 // check before emitting a potentially unnecessary op.
4524 if (DAG.isKnownNeverZero(CTOp)) {
4525 // (ctpop x) == 1 --> (x & x-1) == 0
4526 // (ctpop x) != 1 --> (x & x-1) != 0
4527 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4528 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4529 return RHS;
4530 }
4531
4532 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4533 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4534 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4536 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4537 }
4538
4539 return SDValue();
4540}
4541
4543 ISD::CondCode Cond, const SDLoc &dl,
4544 SelectionDAG &DAG) {
4545 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4546 return SDValue();
4547
4548 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4549 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4550 return SDValue();
4551
4552 auto getRotateSource = [](SDValue X) {
4553 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4554 return X.getOperand(0);
4555 return SDValue();
4556 };
4557
4558 // Peek through a rotated value compared against 0 or -1:
4559 // (rot X, Y) == 0/-1 --> X == 0/-1
4560 // (rot X, Y) != 0/-1 --> X != 0/-1
4561 if (SDValue R = getRotateSource(N0))
4562 return DAG.getSetCC(dl, VT, R, N1, Cond);
4563
4564 // Peek through an 'or' of a rotated value compared against 0:
4565 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4566 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4567 //
4568 // TODO: Add the 'and' with -1 sibling.
4569 // TODO: Recurse through a series of 'or' ops to find the rotate.
4570 EVT OpVT = N0.getValueType();
4571 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4572 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4573 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4574 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4575 }
4576 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4577 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4578 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4579 }
4580 }
4581
4582 return SDValue();
4583}
4584
4586 ISD::CondCode Cond, const SDLoc &dl,
4587 SelectionDAG &DAG) {
4588 // If we are testing for all-bits-clear, we might be able to do that with
4589 // less shifting since bit-order does not matter.
4590 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4591 return SDValue();
4592
4593 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4594 if (!C1 || !C1->isZero())
4595 return SDValue();
4596
4597 if (!N0.hasOneUse() ||
4598 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4599 return SDValue();
4600
4601 unsigned BitWidth = N0.getScalarValueSizeInBits();
4602 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4603 if (!ShAmtC)
4604 return SDValue();
4605
4606 uint64_t ShAmt = ShAmtC->getAPIntValue().urem(BitWidth);
4607 if (ShAmt == 0)
4608 return SDValue();
4609
4610 // Canonicalize fshr as fshl to reduce pattern-matching.
4611 if (N0.getOpcode() == ISD::FSHR)
4612 ShAmt = BitWidth - ShAmt;
4613
4614 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4615 SDValue X, Y;
4616 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4617 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4618 return false;
4619 if (Or.getOperand(0) == Other) {
4620 X = Or.getOperand(0);
4621 Y = Or.getOperand(1);
4622 return true;
4623 }
4624 if (Or.getOperand(1) == Other) {
4625 X = Or.getOperand(1);
4626 Y = Or.getOperand(0);
4627 return true;
4628 }
4629 return false;
4630 };
4631
4632 EVT OpVT = N0.getValueType();
4633 EVT ShAmtVT = N0.getOperand(2).getValueType();
4634 SDValue F0 = N0.getOperand(0);
4635 SDValue F1 = N0.getOperand(1);
4636 if (matchOr(F0, F1)) {
4637 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4638 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4639 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4640 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4641 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4642 }
4643 if (matchOr(F1, F0)) {
4644 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4645 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4646 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4647 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4648 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4649 }
4650
4651 return SDValue();
4652}
4653
4654/// Try to simplify a setcc built with the specified operands and cc. If it is
4655/// unable to simplify it, return a null SDValue.
4657 ISD::CondCode Cond, bool foldBooleans,
4658 DAGCombinerInfo &DCI,
4659 const SDLoc &dl) const {
4660 SelectionDAG &DAG = DCI.DAG;
4661 const DataLayout &Layout = DAG.getDataLayout();
4662 EVT OpVT = N0.getValueType();
4663 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4664
4665 // Constant fold or commute setcc.
4666 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4667 return Fold;
4668
4669 bool N0ConstOrSplat =
4670 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4671 bool N1ConstOrSplat =
4672 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4673
4674 // Canonicalize toward having the constant on the RHS.
4675 // TODO: Handle non-splat vector constants. All undef causes trouble.
4676 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4677 // infinite loop here when we encounter one.
4679 if (N0ConstOrSplat && !N1ConstOrSplat &&
4680 (DCI.isBeforeLegalizeOps() ||
4681 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4682 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4683
4684 // If we have a subtract with the same 2 non-constant operands as this setcc
4685 // -- but in reverse order -- then try to commute the operands of this setcc
4686 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4687 // instruction on some targets.
4688 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4689 (DCI.isBeforeLegalizeOps() ||
4690 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4691 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4692 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4693 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4694
4695 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4696 return V;
4697
4698 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4699 return V;
4700
4701 if (auto *N1C = isConstOrConstSplat(N1)) {
4702 const APInt &C1 = N1C->getAPIntValue();
4703
4704 // Optimize some CTPOP cases.
4705 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4706 return V;
4707
4708 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4709 // X * Y == 0 --> (X == 0) || (Y == 0)
4710 // X * Y != 0 --> (X != 0) && (Y != 0)
4711 // TODO: This bails out if minsize is set, but if the target doesn't have a
4712 // single instruction multiply for this type, it would likely be
4713 // smaller to decompose.
4714 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4715 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4716 (N0->getFlags().hasNoUnsignedWrap() ||
4717 N0->getFlags().hasNoSignedWrap()) &&
4718 !Attr.hasFnAttr(Attribute::MinSize)) {
4719 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4720 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4721 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4722 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4723 }
4724
4725 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4726 // equality comparison, then we're just comparing whether X itself is
4727 // zero.
4728 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4729 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4731 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4732 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4733 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4734 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4735 // (srl (ctlz x), 5) == 0 -> X != 0
4736 // (srl (ctlz x), 5) != 1 -> X != 0
4737 Cond = ISD::SETNE;
4738 } else {
4739 // (srl (ctlz x), 5) != 0 -> X == 0
4740 // (srl (ctlz x), 5) == 1 -> X == 0
4741 Cond = ISD::SETEQ;
4742 }
4743 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4744 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4745 Cond);
4746 }
4747 }
4748 }
4749 }
4750
4751 // FIXME: Support vectors.
4752 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4753 const APInt &C1 = N1C->getAPIntValue();
4754
4755 // (zext x) == C --> x == (trunc C)
4756 // (sext x) == C --> x == (trunc C)
4757 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4758 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4759 unsigned MinBits = N0.getValueSizeInBits();
4760 SDValue PreExt;
4761 bool Signed = false;
4762 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4763 // ZExt
4764 MinBits = N0->getOperand(0).getValueSizeInBits();
4765 PreExt = N0->getOperand(0);
4766 } else if (N0->getOpcode() == ISD::AND) {
4767 // DAGCombine turns costly ZExts into ANDs
4768 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4769 if ((C->getAPIntValue()+1).isPowerOf2()) {
4770 MinBits = C->getAPIntValue().countr_one();
4771 PreExt = N0->getOperand(0);
4772 }
4773 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4774 // SExt
4775 MinBits = N0->getOperand(0).getValueSizeInBits();
4776 PreExt = N0->getOperand(0);
4777 Signed = true;
4778 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4779 // ZEXTLOAD / SEXTLOAD
4780 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4781 MinBits = LN0->getMemoryVT().getSizeInBits();
4782 PreExt = N0;
4783 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4784 Signed = true;
4785 MinBits = LN0->getMemoryVT().getSizeInBits();
4786 PreExt = N0;
4787 }
4788 }
4789
4790 // Figure out how many bits we need to preserve this constant.
4791 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4792
4793 // Make sure we're not losing bits from the constant.
4794 if (MinBits > 0 &&
4795 MinBits < C1.getBitWidth() &&
4796 MinBits >= ReqdBits) {
4797 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4798 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4799 // Will get folded away.
4800 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4801 if (MinBits == 1 && C1 == 1)
4802 // Invert the condition.
4803 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4805 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4806 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4807 }
4808
4809 // If truncating the setcc operands is not desirable, we can still
4810 // simplify the expression in some cases:
4811 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4812 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4813 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4814 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4815 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4816 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4817 SDValue TopSetCC = N0->getOperand(0);
4818 unsigned N0Opc = N0->getOpcode();
4819 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4820 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4821 TopSetCC.getOpcode() == ISD::SETCC &&
4822 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4823 (isConstFalseVal(N1) ||
4824 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4825
4826 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4827 (!N1C->isZero() && Cond == ISD::SETNE);
4828
4829 if (!Inverse)
4830 return TopSetCC;
4831
4833 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4834 TopSetCC.getOperand(0).getValueType());
4835 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4836 TopSetCC.getOperand(1),
4837 InvCond);
4838 }
4839 }
4840 }
4841
4842 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4843 // equality or unsigned, and all 1 bits of the const are in the same
4844 // partial word, see if we can shorten the load.
4845 if (DCI.isBeforeLegalize() &&
4847 N0.getOpcode() == ISD::AND && C1 == 0 &&
4848 N0.getNode()->hasOneUse() &&
4849 isa<LoadSDNode>(N0.getOperand(0)) &&
4850 N0.getOperand(0).getNode()->hasOneUse() &&
4852 auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4853 APInt bestMask;
4854 unsigned bestWidth = 0, bestOffset = 0;
4855 if (Lod->isSimple() && Lod->isUnindexed() &&
4856 (Lod->getMemoryVT().isByteSized() ||
4857 isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4858 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4859 unsigned origWidth = N0.getValueSizeInBits();
4860 unsigned maskWidth = origWidth;
4861 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4862 // 8 bits, but have to be careful...
4863 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4864 origWidth = Lod->getMemoryVT().getSizeInBits();
4865 const APInt &Mask = N0.getConstantOperandAPInt(1);
4866 // Only consider power-of-2 widths (and at least one byte) as candiates
4867 // for the narrowed load.
4868 for (unsigned width = 8; width < origWidth; width *= 2) {
4869 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4870 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4871 // Avoid accessing any padding here for now (we could use memWidth
4872 // instead of origWidth here otherwise).
4873 unsigned maxOffset = origWidth - width;
4874 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4875 if (Mask.isSubsetOf(newMask)) {
4876 unsigned ptrOffset =
4877 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4878 unsigned IsFast = 0;
4879 assert((ptrOffset % 8) == 0 && "Non-Bytealigned pointer offset");
4880 Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4882 ptrOffset / 8) &&
4884 *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4885 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4886 IsFast) {
4887 bestOffset = ptrOffset / 8;
4888 bestMask = Mask.lshr(offset);
4889 bestWidth = width;
4890 break;
4891 }
4892 }
4893 newMask <<= 8;
4894 }
4895 if (bestWidth)
4896 break;
4897 }
4898 }
4899 if (bestWidth) {
4900 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4901 SDValue Ptr = Lod->getBasePtr();
4902 if (bestOffset != 0)
4903 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4904 SDValue NewLoad =
4905 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4906 Lod->getPointerInfo().getWithOffset(bestOffset),
4907 Lod->getBaseAlign());
4908 SDValue And =
4909 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4910 DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4911 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4912 }
4913 }
4914
4915 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4916 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4917 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4918
4919 // If the comparison constant has bits in the upper part, the
4920 // zero-extended value could never match.
4922 C1.getBitWidth() - InSize))) {
4923 switch (Cond) {
4924 case ISD::SETUGT:
4925 case ISD::SETUGE:
4926 case ISD::SETEQ:
4927 return DAG.getConstant(0, dl, VT);
4928 case ISD::SETULT:
4929 case ISD::SETULE:
4930 case ISD::SETNE:
4931 return DAG.getConstant(1, dl, VT);
4932 case ISD::SETGT:
4933 case ISD::SETGE:
4934 // True if the sign bit of C1 is set.
4935 return DAG.getConstant(C1.isNegative(), dl, VT);
4936 case ISD::SETLT:
4937 case ISD::SETLE:
4938 // True if the sign bit of C1 isn't set.
4939 return DAG.getConstant(C1.isNonNegative(), dl, VT);
4940 default:
4941 break;
4942 }
4943 }
4944
4945 // Otherwise, we can perform the comparison with the low bits.
4946 switch (Cond) {
4947 case ISD::SETEQ:
4948 case ISD::SETNE:
4949 case ISD::SETUGT:
4950 case ISD::SETUGE:
4951 case ISD::SETULT:
4952 case ISD::SETULE: {
4953 EVT newVT = N0.getOperand(0).getValueType();
4954 // FIXME: Should use isNarrowingProfitable.
4955 if (DCI.isBeforeLegalizeOps() ||
4956 (isOperationLegal(ISD::SETCC, newVT) &&
4957 isCondCodeLegal(Cond, newVT.getSimpleVT()) &&
4959 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4960 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4961
4962 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4963 NewConst, Cond);
4964 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4965 }
4966 break;
4967 }
4968 default:
4969 break; // todo, be more careful with signed comparisons
4970 }
4971 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4972 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4974 OpVT)) {
4975 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4976 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4977 EVT ExtDstTy = N0.getValueType();
4978 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4979
4980 // If the constant doesn't fit into the number of bits for the source of
4981 // the sign extension, it is impossible for both sides to be equal.
4982 if (C1.getSignificantBits() > ExtSrcTyBits)
4983 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4984
4985 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4986 ExtDstTy != ExtSrcTy && "Unexpected types!");
4987 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4988 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4989 DAG.getConstant(Imm, dl, ExtDstTy));
4990 if (!DCI.isCalledByLegalizer())
4991 DCI.AddToWorklist(ZextOp.getNode());
4992 // Otherwise, make this a use of a zext.
4993 return DAG.getSetCC(dl, VT, ZextOp,
4994 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4995 } else if ((N1C->isZero() || N1C->isOne()) &&
4996 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4997 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
4998 // excluded as they are handled below whilst checking for foldBooleans.
4999 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
5000 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
5001 (N0.getValueType() == MVT::i1 ||
5005 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
5006 if (TrueWhenTrue)
5007 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
5008 // Invert the condition.
5009 if (N0.getOpcode() == ISD::SETCC) {
5012 if (DCI.isBeforeLegalizeOps() ||
5014 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
5015 }
5016 }
5017
5018 if ((N0.getOpcode() == ISD::XOR ||
5019 (N0.getOpcode() == ISD::AND &&
5020 N0.getOperand(0).getOpcode() == ISD::XOR &&
5021 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
5022 isOneConstant(N0.getOperand(1))) {
5023 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
5024 // can only do this if the top bits are known zero.
5025 unsigned BitWidth = N0.getValueSizeInBits();
5026 if (DAG.MaskedValueIsZero(N0,
5028 BitWidth-1))) {
5029 // Okay, get the un-inverted input value.
5030 SDValue Val;
5031 if (N0.getOpcode() == ISD::XOR) {
5032 Val = N0.getOperand(0);
5033 } else {
5034 assert(N0.getOpcode() == ISD::AND &&
5035 N0.getOperand(0).getOpcode() == ISD::XOR);
5036 // ((X^1)&1)^1 -> X & 1
5037 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
5038 N0.getOperand(0).getOperand(0),
5039 N0.getOperand(1));
5040 }
5041
5042 return DAG.getSetCC(dl, VT, Val, N1,
5044 }
5045 } else if (N1C->isOne()) {
5046 SDValue Op0 = N0;
5047 if (Op0.getOpcode() == ISD::TRUNCATE)
5048 Op0 = Op0.getOperand(0);
5049
5050 if ((Op0.getOpcode() == ISD::XOR) &&
5051 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
5052 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
5053 SDValue XorLHS = Op0.getOperand(0);
5054 SDValue XorRHS = Op0.getOperand(1);
5055 // Ensure that the input setccs return an i1 type or 0/1 value.
5056 if (Op0.getValueType() == MVT::i1 ||
5061 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
5063 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
5064 }
5065 }
5066 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
5067 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
5068 if (Op0.getValueType().bitsGT(VT))
5069 Op0 = DAG.getNode(ISD::AND, dl, VT,
5070 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
5071 DAG.getConstant(1, dl, VT));
5072 else if (Op0.getValueType().bitsLT(VT))
5073 Op0 = DAG.getNode(ISD::AND, dl, VT,
5074 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
5075 DAG.getConstant(1, dl, VT));
5076
5077 return DAG.getSetCC(dl, VT, Op0,
5078 DAG.getConstant(0, dl, Op0.getValueType()),
5080 }
5081 if (Op0.getOpcode() == ISD::AssertZext &&
5082 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
5083 return DAG.getSetCC(dl, VT, Op0,
5084 DAG.getConstant(0, dl, Op0.getValueType()),
5086 }
5087 }
5088
5089 // Given:
5090 // icmp eq/ne (urem %x, %y), 0
5091 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
5092 // icmp eq/ne %x, 0
5093 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
5094 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5095 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
5096 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
5097 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
5098 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
5099 }
5100
5101 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
5102 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
5103 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5105 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
5106 N1C->isAllOnes()) {
5107 return DAG.getSetCC(dl, VT, N0.getOperand(0),
5108 DAG.getConstant(0, dl, OpVT),
5110 }
5111
5112 // fold (setcc (trunc x) c) -> (setcc x c)
5113 if (N0.getOpcode() == ISD::TRUNCATE &&
5115 (N0->getFlags().hasNoSignedWrap() &&
5118 EVT NewVT = N0.getOperand(0).getValueType();
5119 SDValue NewConst = DAG.getConstant(
5121 ? C1.sext(NewVT.getSizeInBits())
5122 : C1.zext(NewVT.getSizeInBits()),
5123 dl, NewVT);
5124 return DAG.getSetCC(dl, VT, N0.getOperand(0), NewConst, Cond);
5125 }
5126
5127 if (SDValue V =
5128 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
5129 return V;
5130 }
5131
5132 // These simplifications apply to splat vectors as well.
5133 // TODO: Handle more splat vector cases.
5134 if (auto *N1C = isConstOrConstSplat(N1)) {
5135 const APInt &C1 = N1C->getAPIntValue();
5136
5137 APInt MinVal, MaxVal;
5138 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
5140 MinVal = APInt::getSignedMinValue(OperandBitSize);
5141 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
5142 } else {
5143 MinVal = APInt::getMinValue(OperandBitSize);
5144 MaxVal = APInt::getMaxValue(OperandBitSize);
5145 }
5146
5147 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
5148 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
5149 // X >= MIN --> true
5150 if (C1 == MinVal)
5151 return DAG.getBoolConstant(true, dl, VT, OpVT);
5152
5153 if (!VT.isVector()) { // TODO: Support this for vectors.
5154 // X >= C0 --> X > (C0 - 1)
5155 APInt C = C1 - 1;
5157 if ((DCI.isBeforeLegalizeOps() ||
5158 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5159 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5160 isLegalICmpImmediate(C.getSExtValue())))) {
5161 return DAG.getSetCC(dl, VT, N0,
5162 DAG.getConstant(C, dl, N1.getValueType()),
5163 NewCC);
5164 }
5165 }
5166 }
5167
5168 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
5169 // X <= MAX --> true
5170 if (C1 == MaxVal)
5171 return DAG.getBoolConstant(true, dl, VT, OpVT);
5172
5173 // X <= C0 --> X < (C0 + 1)
5174 if (!VT.isVector()) { // TODO: Support this for vectors.
5175 APInt C = C1 + 1;
5177 if ((DCI.isBeforeLegalizeOps() ||
5178 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5179 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5180 isLegalICmpImmediate(C.getSExtValue())))) {
5181 return DAG.getSetCC(dl, VT, N0,
5182 DAG.getConstant(C, dl, N1.getValueType()),
5183 NewCC);
5184 }
5185 }
5186 }
5187
5188 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
5189 if (C1 == MinVal)
5190 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
5191
5192 // TODO: Support this for vectors after legalize ops.
5193 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5194 // Canonicalize setlt X, Max --> setne X, Max
5195 if (C1 == MaxVal)
5196 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5197
5198 // If we have setult X, 1, turn it into seteq X, 0
5199 if (C1 == MinVal+1)
5200 return DAG.getSetCC(dl, VT, N0,
5201 DAG.getConstant(MinVal, dl, N0.getValueType()),
5202 ISD::SETEQ);
5203 }
5204 }
5205
5206 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5207 if (C1 == MaxVal)
5208 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
5209
5210 // TODO: Support this for vectors after legalize ops.
5211 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5212 // Canonicalize setgt X, Min --> setne X, Min
5213 if (C1 == MinVal)
5214 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5215
5216 // If we have setugt X, Max-1, turn it into seteq X, Max
5217 if (C1 == MaxVal-1)
5218 return DAG.getSetCC(dl, VT, N0,
5219 DAG.getConstant(MaxVal, dl, N0.getValueType()),
5220 ISD::SETEQ);
5221 }
5222 }
5223
5224 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5225 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5226 if (C1.isZero())
5227 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5228 VT, N0, N1, Cond, DCI, dl))
5229 return CC;
5230
5231 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5232 // For example, when high 32-bits of i64 X are known clear:
5233 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5234 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5235 bool CmpZero = N1C->isZero();
5236 bool CmpNegOne = N1C->isAllOnes();
5237 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5238 // Match or(lo,shl(hi,bw/2)) pattern.
5239 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5240 unsigned EltBits = V.getScalarValueSizeInBits();
5241 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5242 return false;
5243 SDValue LHS = V.getOperand(0);
5244 SDValue RHS = V.getOperand(1);
5245 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5246 // Unshifted element must have zero upperbits.
5247 if (RHS.getOpcode() == ISD::SHL &&
5248 isa<ConstantSDNode>(RHS.getOperand(1)) &&
5249 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5250 DAG.MaskedValueIsZero(LHS, HiBits)) {
5251 Lo = LHS;
5252 Hi = RHS.getOperand(0);
5253 return true;
5254 }
5255 if (LHS.getOpcode() == ISD::SHL &&
5256 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5257 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5258 DAG.MaskedValueIsZero(RHS, HiBits)) {
5259 Lo = RHS;
5260 Hi = LHS.getOperand(0);
5261 return true;
5262 }
5263 return false;
5264 };
5265
5266 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5267 unsigned EltBits = N0.getScalarValueSizeInBits();
5268 unsigned HalfBits = EltBits / 2;
5269 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5270 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5271 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5272 SDValue NewN0 =
5273 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5274 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5275 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5276 };
5277
5278 SDValue Lo, Hi;
5279 if (IsConcat(N0, Lo, Hi))
5280 return MergeConcat(Lo, Hi);
5281
5282 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5283 SDValue Lo0, Lo1, Hi0, Hi1;
5284 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5285 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5286 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5287 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5288 }
5289 }
5290 }
5291 }
5292
5293 // If we have "setcc X, C0", check to see if we can shrink the immediate
5294 // by changing cc.
5295 // TODO: Support this for vectors after legalize ops.
5296 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5297 // SETUGT X, SINTMAX -> SETLT X, 0
5298 // SETUGE X, SINTMIN -> SETLT X, 0
5299 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5300 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5301 return DAG.getSetCC(dl, VT, N0,
5302 DAG.getConstant(0, dl, N1.getValueType()),
5303 ISD::SETLT);
5304
5305 // SETULT X, SINTMIN -> SETGT X, -1
5306 // SETULE X, SINTMAX -> SETGT X, -1
5307 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5308 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5309 return DAG.getSetCC(dl, VT, N0,
5310 DAG.getAllOnesConstant(dl, N1.getValueType()),
5311 ISD::SETGT);
5312 }
5313 }
5314
5315 // Back to non-vector simplifications.
5316 // TODO: Can we do these for vector splats?
5317 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5318 const APInt &C1 = N1C->getAPIntValue();
5319 EVT ShValTy = N0.getValueType();
5320
5321 // Fold bit comparisons when we can. This will result in an
5322 // incorrect value when boolean false is negative one, unless
5323 // the bitsize is 1 in which case the false value is the same
5324 // in practice regardless of the representation.
5325 if ((VT.getSizeInBits() == 1 ||
5327 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5328 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5329 N0.getOpcode() == ISD::AND) {
5330 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5331 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5332 // Perform the xform if the AND RHS is a single bit.
5333 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5334 if (AndRHS->getAPIntValue().isPowerOf2() &&
5335 !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5336 return DAG.getNode(
5337 ISD::TRUNCATE, dl, VT,
5338 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5339 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5340 }
5341 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5342 // (X & 8) == 8 --> (X & 8) >> 3
5343 // Perform the xform if C1 is a single bit.
5344 unsigned ShCt = C1.logBase2();
5345 if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5346 return DAG.getNode(
5347 ISD::TRUNCATE, dl, VT,
5348 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5349 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5350 }
5351 }
5352 }
5353 }
5354
5355 if (C1.getSignificantBits() <= 64 &&
5357 // (X & -256) == 256 -> (X >> 8) == 1
5358 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5359 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5360 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5361 const APInt &AndRHSC = AndRHS->getAPIntValue();
5362 if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5363 unsigned ShiftBits = AndRHSC.countr_zero();
5364 if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5365 // If using an unsigned shift doesn't yield a legal compare
5366 // immediate, try using sra instead.
5367 APInt NewC = C1.lshr(ShiftBits);
5368 if (NewC.getSignificantBits() <= 64 &&
5370 APInt SignedC = C1.ashr(ShiftBits);
5371 if (SignedC.getSignificantBits() <= 64 &&
5373 SDValue Shift = DAG.getNode(
5374 ISD::SRA, dl, ShValTy, N0.getOperand(0),
5375 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5376 SDValue CmpRHS = DAG.getConstant(SignedC, dl, ShValTy);
5377 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5378 }
5379 }
5380 SDValue Shift = DAG.getNode(
5381 ISD::SRL, dl, ShValTy, N0.getOperand(0),
5382 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5383 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5384 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5385 }
5386 }
5387 }
5388 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5389 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5390 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5391 // X < 0x100000000 -> (X >> 32) < 1
5392 // X >= 0x100000000 -> (X >> 32) >= 1
5393 // X <= 0x0ffffffff -> (X >> 32) < 1
5394 // X > 0x0ffffffff -> (X >> 32) >= 1
5395 unsigned ShiftBits;
5396 APInt NewC = C1;
5397 ISD::CondCode NewCond = Cond;
5398 if (AdjOne) {
5399 ShiftBits = C1.countr_one();
5400 NewC = NewC + 1;
5401 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5402 } else {
5403 ShiftBits = C1.countr_zero();
5404 }
5405 NewC.lshrInPlace(ShiftBits);
5406 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5408 !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5409 SDValue Shift =
5410 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5411 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5412 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5413 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5414 }
5415 }
5416 }
5417 }
5418
5420 auto *CFP = cast<ConstantFPSDNode>(N1);
5421 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5422
5423 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5424 // constant if knowing that the operand is non-nan is enough. We prefer to
5425 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5426 // materialize 0.0.
5427 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5428 return DAG.getSetCC(dl, VT, N0, N0, Cond);
5429
5430 // setcc (fneg x), C -> setcc swap(pred) x, -C
5431 if (N0.getOpcode() == ISD::FNEG) {
5433 if (DCI.isBeforeLegalizeOps() ||
5434 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5435 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5436 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5437 }
5438 }
5439
5440 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5442 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5443 bool IsFabs = N0.getOpcode() == ISD::FABS;
5444 SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5445 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5446 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5447 : (IsFabs ? fcInf : fcPosInf);
5448 if (Cond == ISD::SETUEQ)
5449 Flag |= fcNan;
5450 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5451 DAG.getTargetConstant(Flag, dl, MVT::i32));
5452 }
5453 }
5454
5455 // If the condition is not legal, see if we can find an equivalent one
5456 // which is legal.
5458 // If the comparison was an awkward floating-point == or != and one of
5459 // the comparison operands is infinity or negative infinity, convert the
5460 // condition to a less-awkward <= or >=.
5461 if (CFP->getValueAPF().isInfinity()) {
5462 bool IsNegInf = CFP->getValueAPF().isNegative();
5464 switch (Cond) {
5465 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5466 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5467 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5468 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5469 default: break;
5470 }
5471 if (NewCond != ISD::SETCC_INVALID &&
5472 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5473 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5474 }
5475 }
5476 }
5477
5478 if (N0 == N1) {
5479 // The sext(setcc()) => setcc() optimization relies on the appropriate
5480 // constant being emitted.
5481 assert(!N0.getValueType().isInteger() &&
5482 "Integer types should be handled by FoldSetCC");
5483
5484 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5485 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5486 if (UOF == 2) // FP operators that are undefined on NaNs.
5487 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5488 if (UOF == unsigned(EqTrue))
5489 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5490 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5491 // if it is not already.
5492 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5493 if (NewCond != Cond &&
5494 (DCI.isBeforeLegalizeOps() ||
5495 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5496 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5497 }
5498
5499 // ~X > ~Y --> Y > X
5500 // ~X < ~Y --> Y < X
5501 // ~X < C --> X > ~C
5502 // ~X > C --> X < ~C
5503 if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5504 N0.getValueType().isInteger()) {
5505 if (isBitwiseNot(N0)) {
5506 if (isBitwiseNot(N1))
5507 return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5508
5511 SDValue Not = DAG.getNOT(dl, N1, OpVT);
5512 return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5513 }
5514 }
5515 }
5516
5517 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5518 N0.getValueType().isInteger()) {
5519 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5520 N0.getOpcode() == ISD::XOR) {
5521 // Simplify (X+Y) == (X+Z) --> Y == Z
5522 if (N0.getOpcode() == N1.getOpcode()) {
5523 if (N0.getOperand(0) == N1.getOperand(0))
5524 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5525 if (N0.getOperand(1) == N1.getOperand(1))
5526 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5527 if (isCommutativeBinOp(N0.getOpcode())) {
5528 // If X op Y == Y op X, try other combinations.
5529 if (N0.getOperand(0) == N1.getOperand(1))
5530 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5531 Cond);
5532 if (N0.getOperand(1) == N1.getOperand(0))
5533 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5534 Cond);
5535 }
5536 }
5537
5538 // If RHS is a legal immediate value for a compare instruction, we need
5539 // to be careful about increasing register pressure needlessly.
5540 bool LegalRHSImm = false;
5541
5542 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5543 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5544 // Turn (X+C1) == C2 --> X == C2-C1
5545 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5546 return DAG.getSetCC(
5547 dl, VT, N0.getOperand(0),
5548 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5549 dl, N0.getValueType()),
5550 Cond);
5551
5552 // Turn (X^C1) == C2 --> X == C1^C2
5553 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5554 return DAG.getSetCC(
5555 dl, VT, N0.getOperand(0),
5556 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5557 dl, N0.getValueType()),
5558 Cond);
5559 }
5560
5561 // Turn (C1-X) == C2 --> X == C1-C2
5562 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5563 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5564 return DAG.getSetCC(
5565 dl, VT, N0.getOperand(1),
5566 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5567 dl, N0.getValueType()),
5568 Cond);
5569
5570 // Could RHSC fold directly into a compare?
5571 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5572 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5573 }
5574
5575 // (X+Y) == X --> Y == 0 and similar folds.
5576 // Don't do this if X is an immediate that can fold into a cmp
5577 // instruction and X+Y has other uses. It could be an induction variable
5578 // chain, and the transform would increase register pressure.
5579 if (!LegalRHSImm || N0.hasOneUse())
5580 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5581 return V;
5582 }
5583
5584 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5585 N1.getOpcode() == ISD::XOR)
5586 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5587 return V;
5588
5589 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5590 return V;
5591
5592 if (SDValue V = foldSetCCWithOr(VT, N0, N1, Cond, dl, DCI))
5593 return V;
5594 }
5595
5596 // Fold remainder of division by a constant.
5597 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5598 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5599 // When division is cheap or optimizing for minimum size,
5600 // fall through to DIVREM creation by skipping this fold.
5601 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5602 if (N0.getOpcode() == ISD::UREM) {
5603 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5604 return Folded;
5605 } else if (N0.getOpcode() == ISD::SREM) {
5606 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5607 return Folded;
5608 }
5609 }
5610 }
5611
5612 // Fold away ALL boolean setcc's.
5613 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5614 SDValue Temp;
5615 switch (Cond) {
5616 default: llvm_unreachable("Unknown integer setcc!");
5617 case ISD::SETEQ: // X == Y -> ~(X^Y)
5618 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5619 N0 = DAG.getNOT(dl, Temp, OpVT);
5620 if (!DCI.isCalledByLegalizer())
5621 DCI.AddToWorklist(Temp.getNode());
5622 break;
5623 case ISD::SETNE: // X != Y --> (X^Y)
5624 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5625 break;
5626 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5627 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5628 Temp = DAG.getNOT(dl, N0, OpVT);
5629 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5630 if (!DCI.isCalledByLegalizer())
5631 DCI.AddToWorklist(Temp.getNode());
5632 break;
5633 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5634 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5635 Temp = DAG.getNOT(dl, N1, OpVT);
5636 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5637 if (!DCI.isCalledByLegalizer())
5638 DCI.AddToWorklist(Temp.getNode());
5639 break;
5640 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5641 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5642 Temp = DAG.getNOT(dl, N0, OpVT);
5643 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5644 if (!DCI.isCalledByLegalizer())
5645 DCI.AddToWorklist(Temp.getNode());
5646 break;
5647 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5648 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5649 Temp = DAG.getNOT(dl, N1, OpVT);
5650 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5651 break;
5652 }
5653 if (VT.getScalarType() != MVT::i1) {
5654 if (!DCI.isCalledByLegalizer())
5655 DCI.AddToWorklist(N0.getNode());
5656 // FIXME: If running after legalize, we probably can't do this.
5658 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5659 }
5660 return N0;
5661 }
5662
5663 // Fold (setcc (trunc x) (trunc y)) -> (setcc x y)
5664 if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE &&
5665 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
5667 N1->getFlags().hasNoUnsignedWrap()) ||
5669 N1->getFlags().hasNoSignedWrap())) &&
5671 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5672 }
5673
5674 // Could not fold it.
5675 return SDValue();
5676}
5677
5678/// Returns true (and the GlobalValue and the offset) if the node is a
5679/// GlobalAddress + offset.
5681 int64_t &Offset) const {
5682
5683 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5684
5685 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5686 GA = GASD->getGlobal();
5687 Offset += GASD->getOffset();
5688 return true;
5689 }
5690
5691 if (N->isAnyAdd()) {
5692 SDValue N1 = N->getOperand(0);
5693 SDValue N2 = N->getOperand(1);
5694 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5695 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5696 Offset += V->getSExtValue();
5697 return true;
5698 }
5699 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5700 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5701 Offset += V->getSExtValue();
5702 return true;
5703 }
5704 }
5705 }
5706
5707 return false;
5708}
5709
5711 DAGCombinerInfo &DCI) const {
5712 // Default implementation: no optimization.
5713 return SDValue();
5714}
5715
5716//===----------------------------------------------------------------------===//
5717// Inline Assembler Implementation Methods
5718//===----------------------------------------------------------------------===//
5719
5722 unsigned S = Constraint.size();
5723
5724 if (S == 1) {
5725 switch (Constraint[0]) {
5726 default: break;
5727 case 'r':
5728 return C_RegisterClass;
5729 case 'm': // memory
5730 case 'o': // offsetable
5731 case 'V': // not offsetable
5732 return C_Memory;
5733 case 'p': // Address.
5734 return C_Address;
5735 case 'n': // Simple Integer
5736 case 'E': // Floating Point Constant
5737 case 'F': // Floating Point Constant
5738 return C_Immediate;
5739 case 'i': // Simple Integer or Relocatable Constant
5740 case 's': // Relocatable Constant
5741 case 'X': // Allow ANY value.
5742 case 'I': // Target registers.
5743 case 'J':
5744 case 'K':
5745 case 'L':
5746 case 'M':
5747 case 'N':
5748 case 'O':
5749 case 'P':
5750 case '<':
5751 case '>':
5752 return C_Other;
5753 }
5754 }
5755
5756 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5757 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5758 return C_Memory;
5759 return C_Register;
5760 }
5761 return C_Unknown;
5762}
5763
5764/// Try to replace an X constraint, which matches anything, with another that
5765/// has more specific requirements based on the type of the corresponding
5766/// operand.
5767const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5768 if (ConstraintVT.isInteger())
5769 return "r";
5770 if (ConstraintVT.isFloatingPoint())
5771 return "f"; // works for many targets
5772 return nullptr;
5773}
5774
5776 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5777 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5778 return SDValue();
5779}
5780
5781/// Lower the specified operand into the Ops vector.
5782/// If it is invalid, don't add anything to Ops.
5784 StringRef Constraint,
5785 std::vector<SDValue> &Ops,
5786 SelectionDAG &DAG) const {
5787
5788 if (Constraint.size() > 1)
5789 return;
5790
5791 char ConstraintLetter = Constraint[0];
5792 switch (ConstraintLetter) {
5793 default: break;
5794 case 'X': // Allows any operand
5795 case 'i': // Simple Integer or Relocatable Constant
5796 case 'n': // Simple Integer
5797 case 's': { // Relocatable Constant
5798
5800 uint64_t Offset = 0;
5801
5802 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5803 // etc., since getelementpointer is variadic. We can't use
5804 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5805 // while in this case the GA may be furthest from the root node which is
5806 // likely an ISD::ADD.
5807 while (true) {
5808 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5809 // gcc prints these as sign extended. Sign extend value to 64 bits
5810 // now; without this it would get ZExt'd later in
5811 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5812 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5813 BooleanContent BCont = getBooleanContents(MVT::i64);
5814 ISD::NodeType ExtOpc =
5815 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5816 int64_t ExtVal =
5817 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5818 Ops.push_back(
5819 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5820 return;
5821 }
5822 if (ConstraintLetter != 'n') {
5823 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5824 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5825 GA->getValueType(0),
5826 Offset + GA->getOffset()));
5827 return;
5828 }
5829 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5830 Ops.push_back(DAG.getTargetBlockAddress(
5831 BA->getBlockAddress(), BA->getValueType(0),
5832 Offset + BA->getOffset(), BA->getTargetFlags()));
5833 return;
5834 }
5836 Ops.push_back(Op);
5837 return;
5838 }
5839 }
5840 const unsigned OpCode = Op.getOpcode();
5841 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5842 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5843 Op = Op.getOperand(1);
5844 // Subtraction is not commutative.
5845 else if (OpCode == ISD::ADD &&
5846 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5847 Op = Op.getOperand(0);
5848 else
5849 return;
5850 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5851 continue;
5852 }
5853 return;
5854 }
5855 break;
5856 }
5857 }
5858}
5859
5863
5864std::pair<unsigned, const TargetRegisterClass *>
5866 StringRef Constraint,
5867 MVT VT) const {
5868 if (!Constraint.starts_with("{"))
5869 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5870 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5871
5872 // Remove the braces from around the name.
5873 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5874
5875 std::pair<unsigned, const TargetRegisterClass *> R =
5876 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5877
5878 // Figure out which register class contains this reg.
5879 for (const TargetRegisterClass *RC : RI->regclasses()) {
5880 // If none of the value types for this register class are valid, we
5881 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5882 if (!isLegalRC(*RI, *RC))
5883 continue;
5884
5885 for (const MCPhysReg &PR : *RC) {
5886 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5887 std::pair<unsigned, const TargetRegisterClass *> S =
5888 std::make_pair(PR, RC);
5889
5890 // If this register class has the requested value type, return it,
5891 // otherwise keep searching and return the first class found
5892 // if no other is found which explicitly has the requested type.
5893 if (RI->isTypeLegalForClass(*RC, VT))
5894 return S;
5895 if (!R.second)
5896 R = S;
5897 }
5898 }
5899 }
5900
5901 return R;
5902}
5903
5904//===----------------------------------------------------------------------===//
5905// Constraint Selection.
5906
5907/// Return true of this is an input operand that is a matching constraint like
5908/// "4".
5910 assert(!ConstraintCode.empty() && "No known constraint!");
5911 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5912}
5913
5914/// If this is an input matching constraint, this method returns the output
5915/// operand it matches.
5917 assert(!ConstraintCode.empty() && "No known constraint!");
5918 return atoi(ConstraintCode.c_str());
5919}
5920
5921/// Split up the constraint string from the inline assembly value into the
5922/// specific constraints and their prefixes, and also tie in the associated
5923/// operand values.
5924/// If this returns an empty vector, and if the constraint string itself
5925/// isn't empty, there was an error parsing.
5928 const TargetRegisterInfo *TRI,
5929 const CallBase &Call) const {
5930 /// Information about all of the constraints.
5931 AsmOperandInfoVector ConstraintOperands;
5932 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5933 unsigned maCount = 0; // Largest number of multiple alternative constraints.
5934
5935 // Do a prepass over the constraints, canonicalizing them, and building up the
5936 // ConstraintOperands list.
5937 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5938 unsigned ResNo = 0; // ResNo - The result number of the next output.
5939 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5940
5941 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5942 ConstraintOperands.emplace_back(std::move(CI));
5943 AsmOperandInfo &OpInfo = ConstraintOperands.back();
5944
5945 // Update multiple alternative constraint count.
5946 if (OpInfo.multipleAlternatives.size() > maCount)
5947 maCount = OpInfo.multipleAlternatives.size();
5948
5949 OpInfo.ConstraintVT = MVT::Other;
5950
5951 // Compute the value type for each operand.
5952 switch (OpInfo.Type) {
5954 // Indirect outputs just consume an argument.
5955 if (OpInfo.isIndirect) {
5956 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5957 break;
5958 }
5959
5960 // The return value of the call is this value. As such, there is no
5961 // corresponding argument.
5962 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5963 if (auto *STy = dyn_cast<StructType>(Call.getType())) {
5964 OpInfo.ConstraintVT =
5965 getAsmOperandValueType(DL, STy->getElementType(ResNo))
5966 .getSimpleVT();
5967 } else {
5968 assert(ResNo == 0 && "Asm only has one result!");
5969 OpInfo.ConstraintVT =
5971 }
5972 ++ResNo;
5973 break;
5974 case InlineAsm::isInput:
5975 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5976 break;
5977 case InlineAsm::isLabel:
5978 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5979 ++LabelNo;
5980 continue;
5982 // Nothing to do.
5983 break;
5984 }
5985
5986 if (OpInfo.CallOperandVal) {
5987 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5988 if (OpInfo.isIndirect) {
5989 OpTy = Call.getParamElementType(ArgNo);
5990 assert(OpTy && "Indirect operand must have elementtype attribute");
5991 }
5992
5993 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5994 if (StructType *STy = dyn_cast<StructType>(OpTy))
5995 if (STy->getNumElements() == 1)
5996 OpTy = STy->getElementType(0);
5997
5998 // If OpTy is not a single value, it may be a struct/union that we
5999 // can tile with integers.
6000 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
6001 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
6002 switch (BitSize) {
6003 default: break;
6004 case 1:
6005 case 8:
6006 case 16:
6007 case 32:
6008 case 64:
6009 case 128:
6010 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
6011 break;
6012 }
6013 }
6014
6015 EVT VT = getAsmOperandValueType(DL, OpTy, true);
6016 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6017 ArgNo++;
6018 }
6019 }
6020
6021 // If we have multiple alternative constraints, select the best alternative.
6022 if (!ConstraintOperands.empty()) {
6023 if (maCount) {
6024 unsigned bestMAIndex = 0;
6025 int bestWeight = -1;
6026 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
6027 int weight = -1;
6028 unsigned maIndex;
6029 // Compute the sums of the weights for each alternative, keeping track
6030 // of the best (highest weight) one so far.
6031 for (maIndex = 0; maIndex < maCount; ++maIndex) {
6032 int weightSum = 0;
6033 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6034 cIndex != eIndex; ++cIndex) {
6035 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6036 if (OpInfo.Type == InlineAsm::isClobber)
6037 continue;
6038
6039 // If this is an output operand with a matching input operand,
6040 // look up the matching input. If their types mismatch, e.g. one
6041 // is an integer, the other is floating point, or their sizes are
6042 // different, flag it as an maCantMatch.
6043 if (OpInfo.hasMatchingInput()) {
6044 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6045 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6046 if ((OpInfo.ConstraintVT.isInteger() !=
6047 Input.ConstraintVT.isInteger()) ||
6048 (OpInfo.ConstraintVT.getSizeInBits() !=
6049 Input.ConstraintVT.getSizeInBits())) {
6050 weightSum = -1; // Can't match.
6051 break;
6052 }
6053 }
6054 }
6055 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
6056 if (weight == -1) {
6057 weightSum = -1;
6058 break;
6059 }
6060 weightSum += weight;
6061 }
6062 // Update best.
6063 if (weightSum > bestWeight) {
6064 bestWeight = weightSum;
6065 bestMAIndex = maIndex;
6066 }
6067 }
6068
6069 // Now select chosen alternative in each constraint.
6070 for (AsmOperandInfo &cInfo : ConstraintOperands)
6071 if (cInfo.Type != InlineAsm::isClobber)
6072 cInfo.selectAlternative(bestMAIndex);
6073 }
6074 }
6075
6076 // Check and hook up tied operands, choose constraint code to use.
6077 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6078 cIndex != eIndex; ++cIndex) {
6079 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6080
6081 // If this is an output operand with a matching input operand, look up the
6082 // matching input. If their types mismatch, e.g. one is an integer, the
6083 // other is floating point, or their sizes are different, flag it as an
6084 // error.
6085 if (OpInfo.hasMatchingInput()) {
6086 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6087
6088 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6089 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
6090 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
6091 OpInfo.ConstraintVT);
6092 std::pair<unsigned, const TargetRegisterClass *> InputRC =
6093 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
6094 Input.ConstraintVT);
6095 const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
6096 OpInfo.ConstraintVT.isFloatingPoint();
6097 const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
6098 Input.ConstraintVT.isFloatingPoint();
6099 if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
6100 (MatchRC.second != InputRC.second)) {
6101 report_fatal_error("Unsupported asm: input constraint"
6102 " with a matching output constraint of"
6103 " incompatible type!");
6104 }
6105 }
6106 }
6107 }
6108
6109 return ConstraintOperands;
6110}
6111
6112/// Return a number indicating our preference for chosing a type of constraint
6113/// over another, for the purpose of sorting them. Immediates are almost always
6114/// preferrable (when they can be emitted). A higher return value means a
6115/// stronger preference for one constraint type relative to another.
6116/// FIXME: We should prefer registers over memory but doing so may lead to
6117/// unrecoverable register exhaustion later.
6118/// https://github.com/llvm/llvm-project/issues/20571
6120 switch (CT) {
6123 return 4;
6126 return 3;
6128 return 2;
6130 return 1;
6132 return 0;
6133 }
6134 llvm_unreachable("Invalid constraint type");
6135}
6136
6137/// Examine constraint type and operand type and determine a weight value.
6138/// This object must already have been set up with the operand type
6139/// and the current alternative constraint selected.
6142 AsmOperandInfo &info, int maIndex) const {
6144 if (maIndex >= (int)info.multipleAlternatives.size())
6145 rCodes = &info.Codes;
6146 else
6147 rCodes = &info.multipleAlternatives[maIndex].Codes;
6148 ConstraintWeight BestWeight = CW_Invalid;
6149
6150 // Loop over the options, keeping track of the most general one.
6151 for (const std::string &rCode : *rCodes) {
6152 ConstraintWeight weight =
6153 getSingleConstraintMatchWeight(info, rCode.c_str());
6154 if (weight > BestWeight)
6155 BestWeight = weight;
6156 }
6157
6158 return BestWeight;
6159}
6160
6161/// Examine constraint type and operand type and determine a weight value.
6162/// This object must already have been set up with the operand type
6163/// and the current alternative constraint selected.
6166 AsmOperandInfo &info, const char *constraint) const {
6168 Value *CallOperandVal = info.CallOperandVal;
6169 // If we don't have a value, we can't do a match,
6170 // but allow it at the lowest weight.
6171 if (!CallOperandVal)
6172 return CW_Default;
6173 // Look at the constraint type.
6174 switch (*constraint) {
6175 case 'i': // immediate integer.
6176 case 'n': // immediate integer with a known value.
6177 if (isa<ConstantInt>(CallOperandVal))
6178 weight = CW_Constant;
6179 break;
6180 case 's': // non-explicit intregal immediate.
6181 if (isa<GlobalValue>(CallOperandVal))
6182 weight = CW_Constant;
6183 break;
6184 case 'E': // immediate float if host format.
6185 case 'F': // immediate float.
6186 if (isa<ConstantFP>(CallOperandVal))
6187 weight = CW_Constant;
6188 break;
6189 case '<': // memory operand with autodecrement.
6190 case '>': // memory operand with autoincrement.
6191 case 'm': // memory operand.
6192 case 'o': // offsettable memory operand
6193 case 'V': // non-offsettable memory operand
6194 weight = CW_Memory;
6195 break;
6196 case 'r': // general register.
6197 case 'g': // general register, memory operand or immediate integer.
6198 // note: Clang converts "g" to "imr".
6199 if (CallOperandVal->getType()->isIntegerTy())
6200 weight = CW_Register;
6201 break;
6202 case 'X': // any operand.
6203 default:
6204 weight = CW_Default;
6205 break;
6206 }
6207 return weight;
6208}
6209
6210/// If there are multiple different constraints that we could pick for this
6211/// operand (e.g. "imr") try to pick the 'best' one.
6212/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6213/// into seven classes:
6214/// Register -> one specific register
6215/// RegisterClass -> a group of regs
6216/// Memory -> memory
6217/// Address -> a symbolic memory reference
6218/// Immediate -> immediate values
6219/// Other -> magic values (such as "Flag Output Operands")
6220/// Unknown -> something we don't recognize yet and can't handle
6221/// Ideally, we would pick the most specific constraint possible: if we have
6222/// something that fits into a register, we would pick it. The problem here
6223/// is that if we have something that could either be in a register or in
6224/// memory that use of the register could cause selection of *other*
6225/// operands to fail: they might only succeed if we pick memory. Because of
6226/// this the heuristic we use is:
6227///
6228/// 1) If there is an 'other' constraint, and if the operand is valid for
6229/// that constraint, use it. This makes us take advantage of 'i'
6230/// constraints when available.
6231/// 2) Otherwise, pick the most general constraint present. This prefers
6232/// 'm' over 'r', for example.
6233///
6235 TargetLowering::AsmOperandInfo &OpInfo) const {
6236 ConstraintGroup Ret;
6237
6238 Ret.reserve(OpInfo.Codes.size());
6239 for (StringRef Code : OpInfo.Codes) {
6241
6242 // Indirect 'other' or 'immediate' constraints are not allowed.
6243 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6244 CType == TargetLowering::C_Register ||
6246 continue;
6247
6248 // Things with matching constraints can only be registers, per gcc
6249 // documentation. This mainly affects "g" constraints.
6250 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6251 continue;
6252
6253 Ret.emplace_back(Code, CType);
6254 }
6255
6257 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6258 });
6259
6260 return Ret;
6261}
6262
6263/// If we have an immediate, see if we can lower it. Return true if we can,
6264/// false otherwise.
6266 SDValue Op, SelectionDAG *DAG,
6267 const TargetLowering &TLI) {
6268
6269 assert((P.second == TargetLowering::C_Other ||
6270 P.second == TargetLowering::C_Immediate) &&
6271 "need immediate or other");
6272
6273 if (!Op.getNode())
6274 return false;
6275
6276 std::vector<SDValue> ResultOps;
6277 TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6278 return !ResultOps.empty();
6279}
6280
6281/// Determines the constraint code and constraint type to use for the specific
6282/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6284 SDValue Op,
6285 SelectionDAG *DAG) const {
6286 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6287
6288 // Single-letter constraints ('r') are very common.
6289 if (OpInfo.Codes.size() == 1) {
6290 OpInfo.ConstraintCode = OpInfo.Codes[0];
6291 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6292 } else {
6294 if (G.empty())
6295 return;
6296
6297 unsigned BestIdx = 0;
6298 for (const unsigned E = G.size();
6299 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6300 G[BestIdx].second == TargetLowering::C_Immediate);
6301 ++BestIdx) {
6302 if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6303 break;
6304 // If we're out of constraints, just pick the first one.
6305 if (BestIdx + 1 == E) {
6306 BestIdx = 0;
6307 break;
6308 }
6309 }
6310
6311 OpInfo.ConstraintCode = G[BestIdx].first;
6312 OpInfo.ConstraintType = G[BestIdx].second;
6313 }
6314
6315 // 'X' matches anything.
6316 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6317 // Constants are handled elsewhere. For Functions, the type here is the
6318 // type of the result, which is not what we want to look at; leave them
6319 // alone.
6320 Value *v = OpInfo.CallOperandVal;
6321 if (isa<ConstantInt>(v) || isa<Function>(v)) {
6322 return;
6323 }
6324
6325 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6326 OpInfo.ConstraintCode = "i";
6327 return;
6328 }
6329
6330 // Otherwise, try to resolve it to something we know about by looking at
6331 // the actual operand type.
6332 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6333 OpInfo.ConstraintCode = Repl;
6334 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6335 }
6336 }
6337}
6338
6339/// Given an exact SDIV by a constant, create a multiplication
6340/// with the multiplicative inverse of the constant.
6341/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6343 const SDLoc &dl, SelectionDAG &DAG,
6344 SmallVectorImpl<SDNode *> &Created) {
6345 SDValue Op0 = N->getOperand(0);
6346 SDValue Op1 = N->getOperand(1);
6347 EVT VT = N->getValueType(0);
6348 EVT SVT = VT.getScalarType();
6349 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6350 EVT ShSVT = ShVT.getScalarType();
6351
6352 bool UseSRA = false;
6353 SmallVector<SDValue, 16> Shifts, Factors;
6354
6355 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6356 if (C->isZero())
6357 return false;
6358 APInt Divisor = C->getAPIntValue();
6359 unsigned Shift = Divisor.countr_zero();
6360 if (Shift) {
6361 Divisor.ashrInPlace(Shift);
6362 UseSRA = true;
6363 }
6364 APInt Factor = Divisor.multiplicativeInverse();
6365 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6366 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6367 return true;
6368 };
6369
6370 // Collect all magic values from the build vector.
6371 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6372 return SDValue();
6373
6374 SDValue Shift, Factor;
6375 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6376 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6377 Factor = DAG.getBuildVector(VT, dl, Factors);
6378 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6379 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6380 "Expected matchUnaryPredicate to return one element for scalable "
6381 "vectors");
6382 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6383 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6384 } else {
6385 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6386 Shift = Shifts[0];
6387 Factor = Factors[0];
6388 }
6389
6390 SDValue Res = Op0;
6391 if (UseSRA) {
6392 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, SDNodeFlags::Exact);
6393 Created.push_back(Res.getNode());
6394 }
6395
6396 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6397}
6398
6399/// Given an exact UDIV by a constant, create a multiplication
6400/// with the multiplicative inverse of the constant.
6401/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6403 const SDLoc &dl, SelectionDAG &DAG,
6404 SmallVectorImpl<SDNode *> &Created) {
6405 EVT VT = N->getValueType(0);
6406 EVT SVT = VT.getScalarType();
6407 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6408 EVT ShSVT = ShVT.getScalarType();
6409
6410 bool UseSRL = false;
6411 SmallVector<SDValue, 16> Shifts, Factors;
6412
6413 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6414 if (C->isZero())
6415 return false;
6416 APInt Divisor = C->getAPIntValue();
6417 unsigned Shift = Divisor.countr_zero();
6418 if (Shift) {
6419 Divisor.lshrInPlace(Shift);
6420 UseSRL = true;
6421 }
6422 // Calculate the multiplicative inverse modulo BW.
6423 APInt Factor = Divisor.multiplicativeInverse();
6424 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6425 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6426 return true;
6427 };
6428
6429 SDValue Op1 = N->getOperand(1);
6430
6431 // Collect all magic values from the build vector.
6432 if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6433 return SDValue();
6434
6435 SDValue Shift, Factor;
6436 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6437 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6438 Factor = DAG.getBuildVector(VT, dl, Factors);
6439 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6440 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6441 "Expected matchUnaryPredicate to return one element for scalable "
6442 "vectors");
6443 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6444 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6445 } else {
6446 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6447 Shift = Shifts[0];
6448 Factor = Factors[0];
6449 }
6450
6451 SDValue Res = N->getOperand(0);
6452 if (UseSRL) {
6453 Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, SDNodeFlags::Exact);
6454 Created.push_back(Res.getNode());
6455 }
6456
6457 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6458}
6459
6461 SelectionDAG &DAG,
6462 SmallVectorImpl<SDNode *> &Created) const {
6463 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6464 if (isIntDivCheap(N->getValueType(0), Attr))
6465 return SDValue(N, 0); // Lower SDIV as SDIV
6466 return SDValue();
6467}
6468
6469SDValue
6471 SelectionDAG &DAG,
6472 SmallVectorImpl<SDNode *> &Created) const {
6473 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6474 if (isIntDivCheap(N->getValueType(0), Attr))
6475 return SDValue(N, 0); // Lower SREM as SREM
6476 return SDValue();
6477}
6478
6479/// Build sdiv by power-of-2 with conditional move instructions
6480/// Ref: "Hacker's Delight" by Henry Warren 10-1
6481/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6482/// bgez x, label
6483/// add x, x, 2**k-1
6484/// label:
6485/// sra res, x, k
6486/// neg res, res (when the divisor is negative)
6488 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6489 SmallVectorImpl<SDNode *> &Created) const {
6490 unsigned Lg2 = Divisor.countr_zero();
6491 EVT VT = N->getValueType(0);
6492
6493 SDLoc DL(N);
6494 SDValue N0 = N->getOperand(0);
6495 SDValue Zero = DAG.getConstant(0, DL, VT);
6496 APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6497 SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6498
6499 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6500 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6501 SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6502 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6503 SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6504
6505 Created.push_back(Cmp.getNode());
6506 Created.push_back(Add.getNode());
6507 Created.push_back(CMov.getNode());
6508
6509 // Divide by pow2.
6510 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, CMov,
6511 DAG.getShiftAmountConstant(Lg2, VT, DL));
6512
6513 // If we're dividing by a positive value, we're done. Otherwise, we must
6514 // negate the result.
6515 if (Divisor.isNonNegative())
6516 return SRA;
6517
6518 Created.push_back(SRA.getNode());
6519 return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6520}
6521
6522/// Given an ISD::SDIV node expressing a divide by constant,
6523/// return a DAG expression to select that will generate the same value by
6524/// multiplying by a magic number.
6525/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6527 bool IsAfterLegalization,
6528 bool IsAfterLegalTypes,
6529 SmallVectorImpl<SDNode *> &Created) const {
6530 SDLoc dl(N);
6531 EVT VT = N->getValueType(0);
6532 EVT SVT = VT.getScalarType();
6533 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6534 EVT ShSVT = ShVT.getScalarType();
6535 unsigned EltBits = VT.getScalarSizeInBits();
6536 EVT MulVT;
6537
6538 // Check to see if we can do this.
6539 // FIXME: We should be more aggressive here.
6540 if (!isTypeLegal(VT)) {
6541 // Limit this to simple scalars for now.
6542 if (VT.isVector() || !VT.isSimple())
6543 return SDValue();
6544
6545 // If this type will be promoted to a large enough type with a legal
6546 // multiply operation, we can go ahead and do this transform.
6548 return SDValue();
6549
6550 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6551 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6552 !isOperationLegal(ISD::MUL, MulVT))
6553 return SDValue();
6554 }
6555
6556 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6557 if (N->getFlags().hasExact())
6558 return BuildExactSDIV(*this, N, dl, DAG, Created);
6559
6560 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6561
6562 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6563 if (C->isZero())
6564 return false;
6565
6566 const APInt &Divisor = C->getAPIntValue();
6568 int NumeratorFactor = 0;
6569 int ShiftMask = -1;
6570
6571 if (Divisor.isOne() || Divisor.isAllOnes()) {
6572 // If d is +1/-1, we just multiply the numerator by +1/-1.
6573 NumeratorFactor = Divisor.getSExtValue();
6574 magics.Magic = 0;
6575 magics.ShiftAmount = 0;
6576 ShiftMask = 0;
6577 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6578 // If d > 0 and m < 0, add the numerator.
6579 NumeratorFactor = 1;
6580 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6581 // If d < 0 and m > 0, subtract the numerator.
6582 NumeratorFactor = -1;
6583 }
6584
6585 MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6586 Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6587 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6588 ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6589 return true;
6590 };
6591
6592 SDValue N0 = N->getOperand(0);
6593 SDValue N1 = N->getOperand(1);
6594
6595 // Collect the shifts / magic values from each element.
6596 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
6597 return SDValue();
6598
6599 SDValue MagicFactor, Factor, Shift, ShiftMask;
6600 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6601 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6602 Factor = DAG.getBuildVector(VT, dl, Factors);
6603 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6604 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6605 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6606 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6607 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6608 "Expected matchUnaryPredicate to return one element for scalable "
6609 "vectors");
6610 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6611 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6612 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6613 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6614 } else {
6615 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6616 MagicFactor = MagicFactors[0];
6617 Factor = Factors[0];
6618 Shift = Shifts[0];
6619 ShiftMask = ShiftMasks[0];
6620 }
6621
6622 // Multiply the numerator (operand 0) by the magic value.
6623 // FIXME: We should support doing a MUL in a wider type.
6624 auto GetMULHS = [&](SDValue X, SDValue Y) {
6625 // If the type isn't legal, use a wider mul of the type calculated
6626 // earlier.
6627 if (!isTypeLegal(VT)) {
6628 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6629 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6630 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6631 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6632 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6633 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6634 }
6635
6636 if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6637 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6638 if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6639 SDValue LoHi =
6640 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6641 return SDValue(LoHi.getNode(), 1);
6642 }
6643 // If type twice as wide legal, widen and use a mul plus a shift.
6644 unsigned Size = VT.getScalarSizeInBits();
6645 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6646 if (VT.isVector())
6647 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6649 // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6650 // custom lowered. This is very expensive so avoid it at all costs for
6651 // constant divisors.
6652 if ((!IsAfterLegalTypes && isOperationExpand(ISD::SDIV, VT) &&
6655 X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
6656 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6657 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6658 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6659 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6660 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6661 }
6662 return SDValue();
6663 };
6664
6665 SDValue Q = GetMULHS(N0, MagicFactor);
6666 if (!Q)
6667 return SDValue();
6668
6669 Created.push_back(Q.getNode());
6670
6671 // (Optionally) Add/subtract the numerator using Factor.
6672 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6673 Created.push_back(Factor.getNode());
6674 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6675 Created.push_back(Q.getNode());
6676
6677 // Shift right algebraic by shift value.
6678 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6679 Created.push_back(Q.getNode());
6680
6681 // Extract the sign bit, mask it and add it to the quotient.
6682 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6683 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6684 Created.push_back(T.getNode());
6685 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6686 Created.push_back(T.getNode());
6687 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6688}
6689
6690/// Given an ISD::UDIV node expressing a divide by constant,
6691/// return a DAG expression to select that will generate the same value by
6692/// multiplying by a magic number.
6693/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6695 bool IsAfterLegalization,
6696 bool IsAfterLegalTypes,
6697 SmallVectorImpl<SDNode *> &Created) const {
6698 SDLoc dl(N);
6699 EVT VT = N->getValueType(0);
6700 EVT SVT = VT.getScalarType();
6701 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6702 EVT ShSVT = ShVT.getScalarType();
6703 unsigned EltBits = VT.getScalarSizeInBits();
6704 EVT MulVT;
6705
6706 // Check to see if we can do this.
6707 // FIXME: We should be more aggressive here.
6708 if (!isTypeLegal(VT)) {
6709 // Limit this to simple scalars for now.
6710 if (VT.isVector() || !VT.isSimple())
6711 return SDValue();
6712
6713 // If this type will be promoted to a large enough type with a legal
6714 // multiply operation, we can go ahead and do this transform.
6716 return SDValue();
6717
6718 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6719 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6720 !isOperationLegal(ISD::MUL, MulVT))
6721 return SDValue();
6722 }
6723
6724 // If the udiv has an 'exact' bit we can use a simpler lowering.
6725 if (N->getFlags().hasExact())
6726 return BuildExactUDIV(*this, N, dl, DAG, Created);
6727
6728 SDValue N0 = N->getOperand(0);
6729 SDValue N1 = N->getOperand(1);
6730
6731 // Try to use leading zeros of the dividend to reduce the multiplier and
6732 // avoid expensive fixups.
6733 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6734
6735 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6736 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6737
6738 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6739 if (C->isZero())
6740 return false;
6741 const APInt& Divisor = C->getAPIntValue();
6742
6743 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6744
6745 // Magic algorithm doesn't work for division by 1. We need to emit a select
6746 // at the end.
6747 if (Divisor.isOne()) {
6748 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6749 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6750 } else {
6753 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
6754
6755 MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6756
6757 assert(magics.PreShift < Divisor.getBitWidth() &&
6758 "We shouldn't generate an undefined shift!");
6759 assert(magics.PostShift < Divisor.getBitWidth() &&
6760 "We shouldn't generate an undefined shift!");
6761 assert((!magics.IsAdd || magics.PreShift == 0) &&
6762 "Unexpected pre-shift");
6763 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6764 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6765 NPQFactor = DAG.getConstant(
6766 magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
6767 : APInt::getZero(EltBits),
6768 dl, SVT);
6769 UseNPQ |= magics.IsAdd;
6770 UsePreShift |= magics.PreShift != 0;
6771 UsePostShift |= magics.PostShift != 0;
6772 }
6773
6774 PreShifts.push_back(PreShift);
6775 MagicFactors.push_back(MagicFactor);
6776 NPQFactors.push_back(NPQFactor);
6777 PostShifts.push_back(PostShift);
6778 return true;
6779 };
6780
6781 // Collect the shifts/magic values from each element.
6782 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6783 return SDValue();
6784
6785 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6786 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6787 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6788 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6789 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6790 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6791 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6792 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6793 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6794 "Expected matchUnaryPredicate to return one for scalable vectors");
6795 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6796 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6797 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6798 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6799 } else {
6800 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6801 PreShift = PreShifts[0];
6802 MagicFactor = MagicFactors[0];
6803 PostShift = PostShifts[0];
6804 }
6805
6806 SDValue Q = N0;
6807 if (UsePreShift) {
6808 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6809 Created.push_back(Q.getNode());
6810 }
6811
6812 // FIXME: We should support doing a MUL in a wider type.
6813 auto GetMULHU = [&](SDValue X, SDValue Y) {
6814 // If the type isn't legal, use a wider mul of the type calculated
6815 // earlier.
6816 if (!isTypeLegal(VT)) {
6817 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6818 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6819 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6820 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6821 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6822 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6823 }
6824
6825 if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6826 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6827 if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6828 SDValue LoHi =
6829 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6830 return SDValue(LoHi.getNode(), 1);
6831 }
6832 // If type twice as wide legal, widen and use a mul plus a shift.
6833 unsigned Size = VT.getScalarSizeInBits();
6834 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6835 if (VT.isVector())
6836 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6838 // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6839 // custom lowered. This is very expensive so avoid it at all costs for
6840 // constant divisors.
6841 if ((!IsAfterLegalTypes && isOperationExpand(ISD::UDIV, VT) &&
6844 X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6845 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6846 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6847 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6848 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6849 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6850 }
6851 return SDValue(); // No mulhu or equivalent
6852 };
6853
6854 // Multiply the numerator (operand 0) by the magic value.
6855 Q = GetMULHU(Q, MagicFactor);
6856 if (!Q)
6857 return SDValue();
6858
6859 Created.push_back(Q.getNode());
6860
6861 if (UseNPQ) {
6862 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6863 Created.push_back(NPQ.getNode());
6864
6865 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6866 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6867 if (VT.isVector())
6868 NPQ = GetMULHU(NPQ, NPQFactor);
6869 else
6870 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6871
6872 Created.push_back(NPQ.getNode());
6873
6874 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6875 Created.push_back(Q.getNode());
6876 }
6877
6878 if (UsePostShift) {
6879 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6880 Created.push_back(Q.getNode());
6881 }
6882
6883 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6884
6885 SDValue One = DAG.getConstant(1, dl, VT);
6886 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6887 return DAG.getSelect(dl, VT, IsOne, N0, Q);
6888}
6889
6890/// If all values in Values that *don't* match the predicate are same 'splat'
6891/// value, then replace all values with that splat value.
6892/// Else, if AlternativeReplacement was provided, then replace all values that
6893/// do match predicate with AlternativeReplacement value.
6894static void
6896 std::function<bool(SDValue)> Predicate,
6897 SDValue AlternativeReplacement = SDValue()) {
6898 SDValue Replacement;
6899 // Is there a value for which the Predicate does *NOT* match? What is it?
6900 auto SplatValue = llvm::find_if_not(Values, Predicate);
6901 if (SplatValue != Values.end()) {
6902 // Does Values consist only of SplatValue's and values matching Predicate?
6903 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6904 return Value == *SplatValue || Predicate(Value);
6905 })) // Then we shall replace values matching predicate with SplatValue.
6906 Replacement = *SplatValue;
6907 }
6908 if (!Replacement) {
6909 // Oops, we did not find the "baseline" splat value.
6910 if (!AlternativeReplacement)
6911 return; // Nothing to do.
6912 // Let's replace with provided value then.
6913 Replacement = AlternativeReplacement;
6914 }
6915 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6916}
6917
6918/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6919/// where the divisor is constant and the comparison target is zero,
6920/// return a DAG expression that will generate the same comparison result
6921/// using only multiplications, additions and shifts/rotations.
6922/// Ref: "Hacker's Delight" 10-17.
6923SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6924 SDValue CompTargetNode,
6926 DAGCombinerInfo &DCI,
6927 const SDLoc &DL) const {
6929 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6930 DCI, DL, Built)) {
6931 for (SDNode *N : Built)
6932 DCI.AddToWorklist(N);
6933 return Folded;
6934 }
6935
6936 return SDValue();
6937}
6938
6939SDValue
6940TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6941 SDValue CompTargetNode, ISD::CondCode Cond,
6942 DAGCombinerInfo &DCI, const SDLoc &DL,
6943 SmallVectorImpl<SDNode *> &Created) const {
6944 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6945 // - D must be constant, with D = D0 * 2^K where D0 is odd
6946 // - P is the multiplicative inverse of D0 modulo 2^W
6947 // - Q = floor(((2^W) - 1) / D)
6948 // where W is the width of the common type of N and D.
6949 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6950 "Only applicable for (in)equality comparisons.");
6951
6952 SelectionDAG &DAG = DCI.DAG;
6953
6954 EVT VT = REMNode.getValueType();
6955 EVT SVT = VT.getScalarType();
6956 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6957 EVT ShSVT = ShVT.getScalarType();
6958
6959 // If MUL is unavailable, we cannot proceed in any case.
6960 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6961 return SDValue();
6962
6963 bool ComparingWithAllZeros = true;
6964 bool AllComparisonsWithNonZerosAreTautological = true;
6965 bool HadTautologicalLanes = false;
6966 bool AllLanesAreTautological = true;
6967 bool HadEvenDivisor = false;
6968 bool AllDivisorsArePowerOfTwo = true;
6969 bool HadTautologicalInvertedLanes = false;
6970 SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
6971
6972 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6973 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6974 if (CDiv->isZero())
6975 return false;
6976
6977 const APInt &D = CDiv->getAPIntValue();
6978 const APInt &Cmp = CCmp->getAPIntValue();
6979
6980 ComparingWithAllZeros &= Cmp.isZero();
6981
6982 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6983 // if C2 is not less than C1, the comparison is always false.
6984 // But we will only be able to produce the comparison that will give the
6985 // opposive tautological answer. So this lane would need to be fixed up.
6986 bool TautologicalInvertedLane = D.ule(Cmp);
6987 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6988
6989 // If all lanes are tautological (either all divisors are ones, or divisor
6990 // is not greater than the constant we are comparing with),
6991 // we will prefer to avoid the fold.
6992 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6993 HadTautologicalLanes |= TautologicalLane;
6994 AllLanesAreTautological &= TautologicalLane;
6995
6996 // If we are comparing with non-zero, we need'll need to subtract said
6997 // comparison value from the LHS. But there is no point in doing that if
6998 // every lane where we are comparing with non-zero is tautological..
6999 if (!Cmp.isZero())
7000 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
7001
7002 // Decompose D into D0 * 2^K
7003 unsigned K = D.countr_zero();
7004 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7005 APInt D0 = D.lshr(K);
7006
7007 // D is even if it has trailing zeros.
7008 HadEvenDivisor |= (K != 0);
7009 // D is a power-of-two if D0 is one.
7010 // If all divisors are power-of-two, we will prefer to avoid the fold.
7011 AllDivisorsArePowerOfTwo &= D0.isOne();
7012
7013 // P = inv(D0, 2^W)
7014 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7015 unsigned W = D.getBitWidth();
7016 APInt P = D0.multiplicativeInverse();
7017 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7018
7019 // Q = floor((2^W - 1) u/ D)
7020 // R = ((2^W - 1) u% D)
7021 APInt Q, R;
7023
7024 // If we are comparing with zero, then that comparison constant is okay,
7025 // else it may need to be one less than that.
7026 if (Cmp.ugt(R))
7027 Q -= 1;
7028
7030 "We are expecting that K is always less than all-ones for ShSVT");
7031
7032 // If the lane is tautological the result can be constant-folded.
7033 if (TautologicalLane) {
7034 // Set P and K amount to a bogus values so we can try to splat them.
7035 P = 0;
7036 K = -1;
7037 // And ensure that comparison constant is tautological,
7038 // it will always compare true/false.
7039 Q = -1;
7040 }
7041
7042 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7043 KAmts.push_back(
7044 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7045 /*implicitTrunc=*/true),
7046 DL, ShSVT));
7047 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7048 return true;
7049 };
7050
7051 SDValue N = REMNode.getOperand(0);
7052 SDValue D = REMNode.getOperand(1);
7053
7054 // Collect the values from each element.
7055 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
7056 return SDValue();
7057
7058 // If all lanes are tautological, the result can be constant-folded.
7059 if (AllLanesAreTautological)
7060 return SDValue();
7061
7062 // If this is a urem by a powers-of-two, avoid the fold since it can be
7063 // best implemented as a bit test.
7064 if (AllDivisorsArePowerOfTwo)
7065 return SDValue();
7066
7067 SDValue PVal, KVal, QVal;
7068 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7069 if (HadTautologicalLanes) {
7070 // Try to turn PAmts into a splat, since we don't care about the values
7071 // that are currently '0'. If we can't, just keep '0'`s.
7073 // Try to turn KAmts into a splat, since we don't care about the values
7074 // that are currently '-1'. If we can't, change them to '0'`s.
7076 DAG.getConstant(0, DL, ShSVT));
7077 }
7078
7079 PVal = DAG.getBuildVector(VT, DL, PAmts);
7080 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7081 QVal = DAG.getBuildVector(VT, DL, QAmts);
7082 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7083 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
7084 "Expected matchBinaryPredicate to return one element for "
7085 "SPLAT_VECTORs");
7086 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7087 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7088 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7089 } else {
7090 PVal = PAmts[0];
7091 KVal = KAmts[0];
7092 QVal = QAmts[0];
7093 }
7094
7095 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
7096 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
7097 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
7098 assert(CompTargetNode.getValueType() == N.getValueType() &&
7099 "Expecting that the types on LHS and RHS of comparisons match.");
7100 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
7101 }
7102
7103 // (mul N, P)
7104 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7105 Created.push_back(Op0.getNode());
7106
7107 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7108 // divisors as a performance improvement, since rotating by 0 is a no-op.
7109 if (HadEvenDivisor) {
7110 // We need ROTR to do this.
7111 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7112 return SDValue();
7113 // UREM: (rotr (mul N, P), K)
7114 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7115 Created.push_back(Op0.getNode());
7116 }
7117
7118 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
7119 SDValue NewCC =
7120 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7122 if (!HadTautologicalInvertedLanes)
7123 return NewCC;
7124
7125 // If any lanes previously compared always-false, the NewCC will give
7126 // always-true result for them, so we need to fixup those lanes.
7127 // Or the other way around for inequality predicate.
7128 assert(VT.isVector() && "Can/should only get here for vectors.");
7129 Created.push_back(NewCC.getNode());
7130
7131 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7132 // if C2 is not less than C1, the comparison is always false.
7133 // But we have produced the comparison that will give the
7134 // opposive tautological answer. So these lanes would need to be fixed up.
7135 SDValue TautologicalInvertedChannels =
7136 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
7137 Created.push_back(TautologicalInvertedChannels.getNode());
7138
7139 // NOTE: we avoid letting illegal types through even if we're before legalize
7140 // ops – legalization has a hard time producing good code for this.
7141 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
7142 // If we have a vector select, let's replace the comparison results in the
7143 // affected lanes with the correct tautological result.
7144 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
7145 DL, SETCCVT, SETCCVT);
7146 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
7147 Replacement, NewCC);
7148 }
7149
7150 // Else, we can just invert the comparison result in the appropriate lanes.
7151 //
7152 // NOTE: see the note above VSELECT above.
7153 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
7154 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
7155 TautologicalInvertedChannels);
7156
7157 return SDValue(); // Don't know how to lower.
7158}
7159
7160/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
7161/// where the divisor is constant and the comparison target is zero,
7162/// return a DAG expression that will generate the same comparison result
7163/// using only multiplications, additions and shifts/rotations.
7164/// Ref: "Hacker's Delight" 10-17.
7165SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
7166 SDValue CompTargetNode,
7168 DAGCombinerInfo &DCI,
7169 const SDLoc &DL) const {
7171 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7172 DCI, DL, Built)) {
7173 assert(Built.size() <= 7 && "Max size prediction failed.");
7174 for (SDNode *N : Built)
7175 DCI.AddToWorklist(N);
7176 return Folded;
7177 }
7178
7179 return SDValue();
7180}
7181
7182SDValue
7183TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
7184 SDValue CompTargetNode, ISD::CondCode Cond,
7185 DAGCombinerInfo &DCI, const SDLoc &DL,
7186 SmallVectorImpl<SDNode *> &Created) const {
7187 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
7188 // Fold:
7189 // (seteq/ne (srem N, D), 0)
7190 // To:
7191 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
7192 //
7193 // - D must be constant, with D = D0 * 2^K where D0 is odd
7194 // - P is the multiplicative inverse of D0 modulo 2^W
7195 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7196 // - Q = floor((2 * A) / (2^K))
7197 // where W is the width of the common type of N and D.
7198 //
7199 // When D is a power of two (and thus D0 is 1), the normal
7200 // formula for A and Q don't apply, because the derivation
7201 // depends on D not dividing 2^(W-1), and thus theorem ZRS
7202 // does not apply. This specifically fails when N = INT_MIN.
7203 //
7204 // Instead, for power-of-two D, we use:
7205 // - A = 2^(W-1)
7206 // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
7207 // - Q = 2^(W-K) - 1
7208 // |-> Test that the top K bits are zero after rotation
7209 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7210 "Only applicable for (in)equality comparisons.");
7211
7212 SelectionDAG &DAG = DCI.DAG;
7213
7214 EVT VT = REMNode.getValueType();
7215 EVT SVT = VT.getScalarType();
7216 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7217 EVT ShSVT = ShVT.getScalarType();
7218
7219 // If we are after ops legalization, and MUL is unavailable, we can not
7220 // proceed.
7221 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7222 return SDValue();
7223
7224 // TODO: Could support comparing with non-zero too.
7225 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
7226 if (!CompTarget || !CompTarget->isZero())
7227 return SDValue();
7228
7229 bool HadIntMinDivisor = false;
7230 bool HadOneDivisor = false;
7231 bool AllDivisorsAreOnes = true;
7232 bool HadEvenDivisor = false;
7233 bool NeedToApplyOffset = false;
7234 bool AllDivisorsArePowerOfTwo = true;
7235 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7236
7237 auto BuildSREMPattern = [&](ConstantSDNode *C) {
7238 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7239 if (C->isZero())
7240 return false;
7241
7242 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7243
7244 // WARNING: this fold is only valid for positive divisors!
7245 APInt D = C->getAPIntValue();
7246 if (D.isNegative())
7247 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
7248
7249 HadIntMinDivisor |= D.isMinSignedValue();
7250
7251 // If all divisors are ones, we will prefer to avoid the fold.
7252 HadOneDivisor |= D.isOne();
7253 AllDivisorsAreOnes &= D.isOne();
7254
7255 // Decompose D into D0 * 2^K
7256 unsigned K = D.countr_zero();
7257 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7258 APInt D0 = D.lshr(K);
7259
7260 if (!D.isMinSignedValue()) {
7261 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
7262 // we don't care about this lane in this fold, we'll special-handle it.
7263 HadEvenDivisor |= (K != 0);
7264 }
7265
7266 // D is a power-of-two if D0 is one. This includes INT_MIN.
7267 // If all divisors are power-of-two, we will prefer to avoid the fold.
7268 AllDivisorsArePowerOfTwo &= D0.isOne();
7269
7270 // P = inv(D0, 2^W)
7271 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7272 unsigned W = D.getBitWidth();
7273 APInt P = D0.multiplicativeInverse();
7274 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7275
7276 // A = floor((2^(W - 1) - 1) / D0) & -2^K
7277 APInt A = APInt::getSignedMaxValue(W).udiv(D0);
7278 A.clearLowBits(K);
7279
7280 if (!D.isMinSignedValue()) {
7281 // If divisor INT_MIN, then we don't care about this lane in this fold,
7282 // we'll special-handle it.
7283 NeedToApplyOffset |= A != 0;
7284 }
7285
7286 // Q = floor((2 * A) / (2^K))
7287 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7288
7290 "We are expecting that A is always less than all-ones for SVT");
7292 "We are expecting that K is always less than all-ones for ShSVT");
7293
7294 // If D was a power of two, apply the alternate constant derivation.
7295 if (D0.isOne()) {
7296 // A = 2^(W-1)
7298 // - Q = 2^(W-K) - 1
7299 Q = APInt::getAllOnes(W - K).zext(W);
7300 }
7301
7302 // If the divisor is 1 the result can be constant-folded. Likewise, we
7303 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
7304 if (D.isOne()) {
7305 // Set P, A and K to a bogus values so we can try to splat them.
7306 P = 0;
7307 A = -1;
7308 K = -1;
7309
7310 // x ?% 1 == 0 <--> true <--> x u<= -1
7311 Q = -1;
7312 }
7313
7314 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7315 AAmts.push_back(DAG.getConstant(A, DL, SVT));
7316 KAmts.push_back(
7317 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7318 /*implicitTrunc=*/true),
7319 DL, ShSVT));
7320 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7321 return true;
7322 };
7323
7324 SDValue N = REMNode.getOperand(0);
7325 SDValue D = REMNode.getOperand(1);
7326
7327 // Collect the values from each element.
7328 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7329 return SDValue();
7330
7331 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7332 if (AllDivisorsAreOnes)
7333 return SDValue();
7334
7335 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7336 // since it can be best implemented as a bit test.
7337 if (AllDivisorsArePowerOfTwo)
7338 return SDValue();
7339
7340 SDValue PVal, AVal, KVal, QVal;
7341 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7342 if (HadOneDivisor) {
7343 // Try to turn PAmts into a splat, since we don't care about the values
7344 // that are currently '0'. If we can't, just keep '0'`s.
7346 // Try to turn AAmts into a splat, since we don't care about the
7347 // values that are currently '-1'. If we can't, change them to '0'`s.
7349 DAG.getConstant(0, DL, SVT));
7350 // Try to turn KAmts into a splat, since we don't care about the values
7351 // that are currently '-1'. If we can't, change them to '0'`s.
7353 DAG.getConstant(0, DL, ShSVT));
7354 }
7355
7356 PVal = DAG.getBuildVector(VT, DL, PAmts);
7357 AVal = DAG.getBuildVector(VT, DL, AAmts);
7358 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7359 QVal = DAG.getBuildVector(VT, DL, QAmts);
7360 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7361 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7362 QAmts.size() == 1 &&
7363 "Expected matchUnaryPredicate to return one element for scalable "
7364 "vectors");
7365 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7366 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7367 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7368 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7369 } else {
7370 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7371 PVal = PAmts[0];
7372 AVal = AAmts[0];
7373 KVal = KAmts[0];
7374 QVal = QAmts[0];
7375 }
7376
7377 // (mul N, P)
7378 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7379 Created.push_back(Op0.getNode());
7380
7381 if (NeedToApplyOffset) {
7382 // We need ADD to do this.
7383 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7384 return SDValue();
7385
7386 // (add (mul N, P), A)
7387 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7388 Created.push_back(Op0.getNode());
7389 }
7390
7391 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7392 // divisors as a performance improvement, since rotating by 0 is a no-op.
7393 if (HadEvenDivisor) {
7394 // We need ROTR to do this.
7395 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7396 return SDValue();
7397 // SREM: (rotr (add (mul N, P), A), K)
7398 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7399 Created.push_back(Op0.getNode());
7400 }
7401
7402 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7403 SDValue Fold =
7404 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7406
7407 // If we didn't have lanes with INT_MIN divisor, then we're done.
7408 if (!HadIntMinDivisor)
7409 return Fold;
7410
7411 // That fold is only valid for positive divisors. Which effectively means,
7412 // it is invalid for INT_MIN divisors. So if we have such a lane,
7413 // we must fix-up results for said lanes.
7414 assert(VT.isVector() && "Can/should only get here for vectors.");
7415
7416 // NOTE: we avoid letting illegal types through even if we're before legalize
7417 // ops – legalization has a hard time producing good code for the code that
7418 // follows.
7419 if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7423 return SDValue();
7424
7425 Created.push_back(Fold.getNode());
7426
7427 SDValue IntMin = DAG.getConstant(
7429 SDValue IntMax = DAG.getConstant(
7431 SDValue Zero =
7433
7434 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7435 SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7436 Created.push_back(DivisorIsIntMin.getNode());
7437
7438 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7439 SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7440 Created.push_back(Masked.getNode());
7441 SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7442 Created.push_back(MaskedIsZero.getNode());
7443
7444 // To produce final result we need to blend 2 vectors: 'SetCC' and
7445 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7446 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7447 // constant-folded, select can get lowered to a shuffle with constant mask.
7448 SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7449 MaskedIsZero, Fold);
7450
7451 return Blended;
7452}
7453
7455 const DenormalMode &Mode) const {
7456 SDLoc DL(Op);
7457 EVT VT = Op.getValueType();
7458 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7459 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7460
7461 // This is specifically a check for the handling of denormal inputs, not the
7462 // result.
7463 if (Mode.Input == DenormalMode::PreserveSign ||
7464 Mode.Input == DenormalMode::PositiveZero) {
7465 // Test = X == 0.0
7466 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7467 }
7468
7469 // Testing it with denormal inputs to avoid wrong estimate.
7470 //
7471 // Test = fabs(X) < SmallestNormal
7472 const fltSemantics &FltSem = VT.getFltSemantics();
7473 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7474 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7475 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
7476 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
7477}
7478
7480 bool LegalOps, bool OptForSize,
7482 unsigned Depth) const {
7483 // fneg is removable even if it has multiple uses.
7484 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7486 return Op.getOperand(0);
7487 }
7488
7489 // Don't recurse exponentially.
7491 return SDValue();
7492
7493 // Pre-increment recursion depth for use in recursive calls.
7494 ++Depth;
7495 const SDNodeFlags Flags = Op->getFlags();
7496 EVT VT = Op.getValueType();
7497 unsigned Opcode = Op.getOpcode();
7498
7499 // Don't allow anything with multiple uses unless we know it is free.
7500 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7501 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7502 isFPExtFree(VT, Op.getOperand(0).getValueType());
7503 if (!IsFreeExtend)
7504 return SDValue();
7505 }
7506
7507 auto RemoveDeadNode = [&](SDValue N) {
7508 if (N && N.getNode()->use_empty())
7509 DAG.RemoveDeadNode(N.getNode());
7510 };
7511
7512 SDLoc DL(Op);
7513
7514 // Because getNegatedExpression can delete nodes we need a handle to keep
7515 // temporary nodes alive in case the recursion manages to create an identical
7516 // node.
7517 std::list<HandleSDNode> Handles;
7518
7519 switch (Opcode) {
7520 case ISD::ConstantFP: {
7521 // Don't invert constant FP values after legalization unless the target says
7522 // the negated constant is legal.
7523 bool IsOpLegal =
7525 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7526 OptForSize);
7527
7528 if (LegalOps && !IsOpLegal)
7529 break;
7530
7531 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7532 V.changeSign();
7533 SDValue CFP = DAG.getConstantFP(V, DL, VT);
7534
7535 // If we already have the use of the negated floating constant, it is free
7536 // to negate it even it has multiple uses.
7537 if (!Op.hasOneUse() && CFP.use_empty())
7538 break;
7540 return CFP;
7541 }
7542 case ISD::BUILD_VECTOR: {
7543 // Only permit BUILD_VECTOR of constants.
7544 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7545 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7546 }))
7547 break;
7548
7549 bool IsOpLegal =
7552 llvm::all_of(Op->op_values(), [&](SDValue N) {
7553 return N.isUndef() ||
7554 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7555 OptForSize);
7556 });
7557
7558 if (LegalOps && !IsOpLegal)
7559 break;
7560
7562 for (SDValue C : Op->op_values()) {
7563 if (C.isUndef()) {
7564 Ops.push_back(C);
7565 continue;
7566 }
7567 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7568 V.changeSign();
7569 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7570 }
7572 return DAG.getBuildVector(VT, DL, Ops);
7573 }
7574 case ISD::FADD: {
7575 if (!Flags.hasNoSignedZeros())
7576 break;
7577
7578 // After operation legalization, it might not be legal to create new FSUBs.
7579 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7580 break;
7581 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7582
7583 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7585 SDValue NegX =
7586 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7587 // Prevent this node from being deleted by the next call.
7588 if (NegX)
7589 Handles.emplace_back(NegX);
7590
7591 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7593 SDValue NegY =
7594 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7595
7596 // We're done with the handles.
7597 Handles.clear();
7598
7599 // Negate the X if its cost is less or equal than Y.
7600 if (NegX && (CostX <= CostY)) {
7601 Cost = CostX;
7602 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7603 if (NegY != N)
7604 RemoveDeadNode(NegY);
7605 return N;
7606 }
7607
7608 // Negate the Y if it is not expensive.
7609 if (NegY) {
7610 Cost = CostY;
7611 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7612 if (NegX != N)
7613 RemoveDeadNode(NegX);
7614 return N;
7615 }
7616 break;
7617 }
7618 case ISD::FSUB: {
7619 // We can't turn -(A-B) into B-A when we honor signed zeros.
7620 if (!Flags.hasNoSignedZeros())
7621 break;
7622
7623 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7624 // fold (fneg (fsub 0, Y)) -> Y
7625 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7626 if (C->isZero()) {
7628 return Y;
7629 }
7630
7631 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7633 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7634 }
7635 case ISD::FMUL:
7636 case ISD::FDIV: {
7637 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7638
7639 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7641 SDValue NegX =
7642 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7643 // Prevent this node from being deleted by the next call.
7644 if (NegX)
7645 Handles.emplace_back(NegX);
7646
7647 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7649 SDValue NegY =
7650 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7651
7652 // We're done with the handles.
7653 Handles.clear();
7654
7655 // Negate the X if its cost is less or equal than Y.
7656 if (NegX && (CostX <= CostY)) {
7657 Cost = CostX;
7658 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7659 if (NegY != N)
7660 RemoveDeadNode(NegY);
7661 return N;
7662 }
7663
7664 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7665 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7666 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7667 break;
7668
7669 // Negate the Y if it is not expensive.
7670 if (NegY) {
7671 Cost = CostY;
7672 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7673 if (NegX != N)
7674 RemoveDeadNode(NegX);
7675 return N;
7676 }
7677 break;
7678 }
7679 case ISD::FMA:
7680 case ISD::FMULADD:
7681 case ISD::FMAD: {
7682 if (!Flags.hasNoSignedZeros())
7683 break;
7684
7685 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7687 SDValue NegZ =
7688 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7689 // Give up if fail to negate the Z.
7690 if (!NegZ)
7691 break;
7692
7693 // Prevent this node from being deleted by the next two calls.
7694 Handles.emplace_back(NegZ);
7695
7696 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7698 SDValue NegX =
7699 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7700 // Prevent this node from being deleted by the next call.
7701 if (NegX)
7702 Handles.emplace_back(NegX);
7703
7704 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7706 SDValue NegY =
7707 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7708
7709 // We're done with the handles.
7710 Handles.clear();
7711
7712 // Negate the X if its cost is less or equal than Y.
7713 if (NegX && (CostX <= CostY)) {
7714 Cost = std::min(CostX, CostZ);
7715 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7716 if (NegY != N)
7717 RemoveDeadNode(NegY);
7718 return N;
7719 }
7720
7721 // Negate the Y if it is not expensive.
7722 if (NegY) {
7723 Cost = std::min(CostY, CostZ);
7724 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7725 if (NegX != N)
7726 RemoveDeadNode(NegX);
7727 return N;
7728 }
7729 break;
7730 }
7731
7732 case ISD::FP_EXTEND:
7733 case ISD::FSIN:
7734 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7735 OptForSize, Cost, Depth))
7736 return DAG.getNode(Opcode, DL, VT, NegV);
7737 break;
7738 case ISD::FP_ROUND:
7739 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7740 OptForSize, Cost, Depth))
7741 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7742 break;
7743 case ISD::SELECT:
7744 case ISD::VSELECT: {
7745 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7746 // iff at least one cost is cheaper and the other is neutral/cheaper
7747 SDValue LHS = Op.getOperand(1);
7749 SDValue NegLHS =
7750 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7751 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7752 RemoveDeadNode(NegLHS);
7753 break;
7754 }
7755
7756 // Prevent this node from being deleted by the next call.
7757 Handles.emplace_back(NegLHS);
7758
7759 SDValue RHS = Op.getOperand(2);
7761 SDValue NegRHS =
7762 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7763
7764 // We're done with the handles.
7765 Handles.clear();
7766
7767 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7768 (CostLHS != NegatibleCost::Cheaper &&
7769 CostRHS != NegatibleCost::Cheaper)) {
7770 RemoveDeadNode(NegLHS);
7771 RemoveDeadNode(NegRHS);
7772 break;
7773 }
7774
7775 Cost = std::min(CostLHS, CostRHS);
7776 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7777 }
7778 }
7779
7780 return SDValue();
7781}
7782
7783//===----------------------------------------------------------------------===//
7784// Legalization Utilities
7785//===----------------------------------------------------------------------===//
7786
7787bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7788 SDValue LHS, SDValue RHS,
7790 EVT HiLoVT, SelectionDAG &DAG,
7791 MulExpansionKind Kind, SDValue LL,
7792 SDValue LH, SDValue RL, SDValue RH) const {
7793 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7794 Opcode == ISD::SMUL_LOHI);
7795
7796 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7798 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7800 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7802 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7804
7805 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7806 return false;
7807
7808 unsigned OuterBitSize = VT.getScalarSizeInBits();
7809 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7810
7811 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7812 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7813 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7814
7815 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7816 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7817 bool Signed) -> bool {
7818 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7819 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7820 Hi = SDValue(Lo.getNode(), 1);
7821 return true;
7822 }
7823 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7824 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7825 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7826 return true;
7827 }
7828 return false;
7829 };
7830
7831 SDValue Lo, Hi;
7832
7833 if (!LL.getNode() && !RL.getNode() &&
7835 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7836 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7837 }
7838
7839 if (!LL.getNode())
7840 return false;
7841
7842 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7843 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7844 DAG.MaskedValueIsZero(RHS, HighMask)) {
7845 // The inputs are both zero-extended.
7846 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7847 Result.push_back(Lo);
7848 Result.push_back(Hi);
7849 if (Opcode != ISD::MUL) {
7850 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7851 Result.push_back(Zero);
7852 Result.push_back(Zero);
7853 }
7854 return true;
7855 }
7856 }
7857
7858 if (!VT.isVector() && Opcode == ISD::MUL &&
7859 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7860 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7861 // The input values are both sign-extended.
7862 // TODO non-MUL case?
7863 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7864 Result.push_back(Lo);
7865 Result.push_back(Hi);
7866 return true;
7867 }
7868 }
7869
7870 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7871 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7872
7873 if (!LH.getNode() && !RH.getNode() &&
7876 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7877 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7878 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7879 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7880 }
7881
7882 if (!LH.getNode())
7883 return false;
7884
7885 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7886 return false;
7887
7888 Result.push_back(Lo);
7889
7890 if (Opcode == ISD::MUL) {
7891 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7892 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7893 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7894 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7895 Result.push_back(Hi);
7896 return true;
7897 }
7898
7899 // Compute the full width result.
7900 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7901 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7902 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7903 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7904 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7905 };
7906
7907 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7908 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7909 return false;
7910
7911 // This is effectively the add part of a multiply-add of half-sized operands,
7912 // so it cannot overflow.
7913 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7914
7915 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7916 return false;
7917
7918 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7919 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7920
7921 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7923 if (UseGlue)
7924 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7925 Merge(Lo, Hi));
7926 else
7927 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7928 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7929
7930 SDValue Carry = Next.getValue(1);
7931 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7932 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7933
7934 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7935 return false;
7936
7937 if (UseGlue)
7938 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7939 Carry);
7940 else
7941 Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
7942 Zero, Carry);
7943
7944 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7945
7946 if (Opcode == ISD::SMUL_LOHI) {
7947 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7948 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7949 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
7950
7951 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7952 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
7953 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
7954 }
7955
7956 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7957 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7958 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7959 return true;
7960}
7961
7963 SelectionDAG &DAG, MulExpansionKind Kind,
7964 SDValue LL, SDValue LH, SDValue RL,
7965 SDValue RH) const {
7967 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
7968 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
7969 DAG, Kind, LL, LH, RL, RH);
7970 if (Ok) {
7971 assert(Result.size() == 2);
7972 Lo = Result[0];
7973 Hi = Result[1];
7974 }
7975 return Ok;
7976}
7977
7978// Optimize unsigned division or remainder by constants for types twice as large
7979// as a legal VT.
7980//
7981// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7982// can be computed
7983// as:
7984// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7985// Remainder = Sum % Constant
7986// This is based on "Remainder by Summing Digits" from Hacker's Delight.
7987//
7988// For division, we can compute the remainder using the algorithm described
7989// above, subtract it from the dividend to get an exact multiple of Constant.
7990// Then multiply that exact multiply by the multiplicative inverse modulo
7991// (1 << (BitWidth / 2)) to get the quotient.
7992
7993// If Constant is even, we can shift right the dividend and the divisor by the
7994// number of trailing zeros in Constant before applying the remainder algorithm.
7995// If we're after the quotient, we can subtract this value from the shifted
7996// dividend and multiply by the multiplicative inverse of the shifted divisor.
7997// If we want the remainder, we shift the value left by the number of trailing
7998// zeros and add the bits that were shifted out of the dividend.
8001 EVT HiLoVT, SelectionDAG &DAG,
8002 SDValue LL, SDValue LH) const {
8003 unsigned Opcode = N->getOpcode();
8004 EVT VT = N->getValueType(0);
8005
8006 // TODO: Support signed division/remainder.
8007 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
8008 return false;
8009 assert(
8010 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
8011 "Unexpected opcode");
8012
8013 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
8014 if (!CN)
8015 return false;
8016
8017 APInt Divisor = CN->getAPIntValue();
8018 unsigned BitWidth = Divisor.getBitWidth();
8019 unsigned HBitWidth = BitWidth / 2;
8021 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
8022
8023 // Divisor needs to less than (1 << HBitWidth).
8024 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
8025 if (Divisor.uge(HalfMaxPlus1))
8026 return false;
8027
8028 // We depend on the UREM by constant optimization in DAGCombiner that requires
8029 // high multiply.
8030 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
8032 return false;
8033
8034 // Don't expand if optimizing for size.
8035 if (DAG.shouldOptForSize())
8036 return false;
8037
8038 // Early out for 0 or 1 divisors.
8039 if (Divisor.ule(1))
8040 return false;
8041
8042 // If the divisor is even, shift it until it becomes odd.
8043 unsigned TrailingZeros = 0;
8044 if (!Divisor[0]) {
8045 TrailingZeros = Divisor.countr_zero();
8046 Divisor.lshrInPlace(TrailingZeros);
8047 }
8048
8049 SDLoc dl(N);
8050 SDValue Sum;
8051 SDValue PartialRem;
8052
8053 // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
8054 // then add in the carry.
8055 // TODO: If we can't split it in half, we might be able to split into 3 or
8056 // more pieces using a smaller bit width.
8057 if (HalfMaxPlus1.urem(Divisor).isOne()) {
8058 assert(!LL == !LH && "Expected both input halves or no input halves!");
8059 if (!LL)
8060 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
8061
8062 // Shift the input by the number of TrailingZeros in the divisor. The
8063 // shifted out bits will be added to the remainder later.
8064 if (TrailingZeros) {
8065 // Save the shifted off bits if we need the remainder.
8066 if (Opcode != ISD::UDIV) {
8067 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
8068 PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
8069 DAG.getConstant(Mask, dl, HiLoVT));
8070 }
8071
8072 LL = DAG.getNode(
8073 ISD::OR, dl, HiLoVT,
8074 DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
8075 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
8076 DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
8077 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
8078 HiLoVT, dl)));
8079 LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
8080 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8081 }
8082
8083 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
8084 EVT SetCCType =
8085 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
8087 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
8088 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
8089 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
8090 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
8091 } else {
8092 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
8093 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
8094 // If the boolean for the target is 0 or 1, we can add the setcc result
8095 // directly.
8096 if (getBooleanContents(HiLoVT) ==
8098 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
8099 else
8100 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
8101 DAG.getConstant(0, dl, HiLoVT));
8102 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
8103 }
8104 }
8105
8106 // If we didn't find a sum, we can't do the expansion.
8107 if (!Sum)
8108 return false;
8109
8110 // Perform a HiLoVT urem on the Sum using truncated divisor.
8111 SDValue RemL =
8112 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
8113 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
8114 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
8115
8116 if (Opcode != ISD::UREM) {
8117 // Subtract the remainder from the shifted dividend.
8118 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
8119 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
8120
8121 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
8122
8123 // Multiply by the multiplicative inverse of the divisor modulo
8124 // (1 << BitWidth).
8125 APInt MulFactor = Divisor.multiplicativeInverse();
8126
8127 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
8128 DAG.getConstant(MulFactor, dl, VT));
8129
8130 // Split the quotient into low and high parts.
8131 SDValue QuotL, QuotH;
8132 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
8133 Result.push_back(QuotL);
8134 Result.push_back(QuotH);
8135 }
8136
8137 if (Opcode != ISD::UDIV) {
8138 // If we shifted the input, shift the remainder left and add the bits we
8139 // shifted off the input.
8140 if (TrailingZeros) {
8141 RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
8142 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8143 RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
8144 }
8145 Result.push_back(RemL);
8146 Result.push_back(DAG.getConstant(0, dl, HiLoVT));
8147 }
8148
8149 return true;
8150}
8151
8152// Check that (every element of) Z is undef or not an exact multiple of BW.
8153static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
8155 Z,
8156 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
8157 /*AllowUndefs=*/true, /*AllowTruncation=*/true);
8158}
8159
8161 EVT VT = Node->getValueType(0);
8162 SDValue ShX, ShY;
8163 SDValue ShAmt, InvShAmt;
8164 SDValue X = Node->getOperand(0);
8165 SDValue Y = Node->getOperand(1);
8166 SDValue Z = Node->getOperand(2);
8167 SDValue Mask = Node->getOperand(3);
8168 SDValue VL = Node->getOperand(4);
8169
8170 unsigned BW = VT.getScalarSizeInBits();
8171 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
8172 SDLoc DL(SDValue(Node, 0));
8173
8174 EVT ShVT = Z.getValueType();
8175 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8176 // fshl: X << C | Y >> (BW - C)
8177 // fshr: X << (BW - C) | Y >> C
8178 // where C = Z % BW is not zero
8179 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8180 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8181 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
8182 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
8183 VL);
8184 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
8185 VL);
8186 } else {
8187 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8188 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8189 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
8190 if (isPowerOf2_32(BW)) {
8191 // Z % BW -> Z & (BW - 1)
8192 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
8193 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8194 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
8195 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
8196 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
8197 } else {
8198 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8199 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8200 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
8201 }
8202
8203 SDValue One = DAG.getConstant(1, DL, ShVT);
8204 if (IsFSHL) {
8205 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
8206 SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
8207 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
8208 } else {
8209 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
8210 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
8211 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
8212 }
8213 }
8214 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
8215}
8216
8218 SelectionDAG &DAG) const {
8219 if (Node->isVPOpcode())
8220 return expandVPFunnelShift(Node, DAG);
8221
8222 EVT VT = Node->getValueType(0);
8223
8224 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8228 return SDValue();
8229
8230 SDValue X = Node->getOperand(0);
8231 SDValue Y = Node->getOperand(1);
8232 SDValue Z = Node->getOperand(2);
8233
8234 unsigned BW = VT.getScalarSizeInBits();
8235 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8236 SDLoc DL(SDValue(Node, 0));
8237
8238 EVT ShVT = Z.getValueType();
8239
8240 // If a funnel shift in the other direction is more supported, use it.
8241 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8242 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8243 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
8244 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8245 // fshl X, Y, Z -> fshr X, Y, -Z
8246 // fshr X, Y, Z -> fshl X, Y, -Z
8247 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8248 Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
8249 } else {
8250 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8251 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8252 SDValue One = DAG.getConstant(1, DL, ShVT);
8253 if (IsFSHL) {
8254 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8255 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8256 } else {
8257 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8258 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8259 }
8260 Z = DAG.getNOT(DL, Z, ShVT);
8261 }
8262 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8263 }
8264
8265 SDValue ShX, ShY;
8266 SDValue ShAmt, InvShAmt;
8267 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8268 // fshl: X << C | Y >> (BW - C)
8269 // fshr: X << (BW - C) | Y >> C
8270 // where C = Z % BW is not zero
8271 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8272 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8273 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8274 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8275 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8276 } else {
8277 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8278 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8279 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8280 if (isPowerOf2_32(BW)) {
8281 // Z % BW -> Z & (BW - 1)
8282 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8283 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8284 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8285 } else {
8286 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8287 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8288 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8289 }
8290
8291 SDValue One = DAG.getConstant(1, DL, ShVT);
8292 if (IsFSHL) {
8293 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8294 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8295 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8296 } else {
8297 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8298 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8299 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8300 }
8301 }
8302 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8303}
8304
8305// TODO: Merge with expandFunnelShift.
8307 SelectionDAG &DAG) const {
8308 EVT VT = Node->getValueType(0);
8309 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8310 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8311 SDValue Op0 = Node->getOperand(0);
8312 SDValue Op1 = Node->getOperand(1);
8313 SDLoc DL(SDValue(Node, 0));
8314
8315 EVT ShVT = Op1.getValueType();
8316 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8317
8318 // If a rotate in the other direction is more supported, use it.
8319 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8320 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8321 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8322 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8323 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8324 }
8325
8326 if (!AllowVectorOps && VT.isVector() &&
8332 return SDValue();
8333
8334 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8335 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8336 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8337 SDValue ShVal;
8338 SDValue HsVal;
8339 if (isPowerOf2_32(EltSizeInBits)) {
8340 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8341 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8342 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8343 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8344 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8345 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8346 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8347 } else {
8348 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8349 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8350 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8351 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8352 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8353 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8354 SDValue One = DAG.getConstant(1, DL, ShVT);
8355 HsVal =
8356 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8357 }
8358 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8359}
8360
8362 SelectionDAG &DAG) const {
8363 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8364 EVT VT = Node->getValueType(0);
8365 unsigned VTBits = VT.getScalarSizeInBits();
8366 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8367
8368 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8369 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8370 SDValue ShOpLo = Node->getOperand(0);
8371 SDValue ShOpHi = Node->getOperand(1);
8372 SDValue ShAmt = Node->getOperand(2);
8373 EVT ShAmtVT = ShAmt.getValueType();
8374 EVT ShAmtCCVT =
8375 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8376 SDLoc dl(Node);
8377
8378 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8379 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8380 // away during isel.
8381 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8382 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8383 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8384 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8385 : DAG.getConstant(0, dl, VT);
8386
8387 SDValue Tmp2, Tmp3;
8388 if (IsSHL) {
8389 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8390 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8391 } else {
8392 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8393 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8394 }
8395
8396 // If the shift amount is larger or equal than the width of a part we don't
8397 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8398 // values for large shift amounts.
8399 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8400 DAG.getConstant(VTBits, dl, ShAmtVT));
8401 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8402 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8403
8404 if (IsSHL) {
8405 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8406 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8407 } else {
8408 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8409 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8410 }
8411}
8412
8414 SelectionDAG &DAG) const {
8415 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8416 SDValue Src = Node->getOperand(OpNo);
8417 EVT SrcVT = Src.getValueType();
8418 EVT DstVT = Node->getValueType(0);
8419 SDLoc dl(SDValue(Node, 0));
8420
8421 // FIXME: Only f32 to i64 conversions are supported.
8422 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8423 return false;
8424
8425 if (Node->isStrictFPOpcode())
8426 // When a NaN is converted to an integer a trap is allowed. We can't
8427 // use this expansion here because it would eliminate that trap. Other
8428 // traps are also allowed and cannot be eliminated. See
8429 // IEEE 754-2008 sec 5.8.
8430 return false;
8431
8432 // Expand f32 -> i64 conversion
8433 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8434 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8435 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8436 EVT IntVT = SrcVT.changeTypeToInteger();
8437 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8438
8439 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8440 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8441 SDValue Bias = DAG.getConstant(127, dl, IntVT);
8442 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8443 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8444 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8445
8446 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8447
8448 SDValue ExponentBits = DAG.getNode(
8449 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8450 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8451 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8452
8453 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8454 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8455 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8456 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8457
8458 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8459 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8460 DAG.getConstant(0x00800000, dl, IntVT));
8461
8462 R = DAG.getZExtOrTrunc(R, dl, DstVT);
8463
8464 R = DAG.getSelectCC(
8465 dl, Exponent, ExponentLoBit,
8466 DAG.getNode(ISD::SHL, dl, DstVT, R,
8467 DAG.getZExtOrTrunc(
8468 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8469 dl, IntShVT)),
8470 DAG.getNode(ISD::SRL, dl, DstVT, R,
8471 DAG.getZExtOrTrunc(
8472 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8473 dl, IntShVT)),
8474 ISD::SETGT);
8475
8476 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8477 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8478
8479 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8480 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8481 return true;
8482}
8483
8485 SDValue &Chain,
8486 SelectionDAG &DAG) const {
8487 SDLoc dl(SDValue(Node, 0));
8488 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8489 SDValue Src = Node->getOperand(OpNo);
8490
8491 EVT SrcVT = Src.getValueType();
8492 EVT DstVT = Node->getValueType(0);
8493 EVT SetCCVT =
8494 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8495 EVT DstSetCCVT =
8496 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8497
8498 // Only expand vector types if we have the appropriate vector bit operations.
8499 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8501 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8503 return false;
8504
8505 // If the maximum float value is smaller then the signed integer range,
8506 // the destination signmask can't be represented by the float, so we can
8507 // just use FP_TO_SINT directly.
8508 const fltSemantics &APFSem = SrcVT.getFltSemantics();
8509 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8510 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8512 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8513 if (Node->isStrictFPOpcode()) {
8514 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8515 { Node->getOperand(0), Src });
8516 Chain = Result.getValue(1);
8517 } else
8518 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8519 return true;
8520 }
8521
8522 // Don't expand it if there isn't cheap fsub instruction.
8524 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8525 return false;
8526
8527 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8528 SDValue Sel;
8529
8530 if (Node->isStrictFPOpcode()) {
8531 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8532 Node->getOperand(0), /*IsSignaling*/ true);
8533 Chain = Sel.getValue(1);
8534 } else {
8535 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8536 }
8537
8538 bool Strict = Node->isStrictFPOpcode() ||
8539 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8540
8541 if (Strict) {
8542 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8543 // signmask then offset (the result of which should be fully representable).
8544 // Sel = Src < 0x8000000000000000
8545 // FltOfs = select Sel, 0, 0x8000000000000000
8546 // IntOfs = select Sel, 0, 0x8000000000000000
8547 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8548
8549 // TODO: Should any fast-math-flags be set for the FSUB?
8550 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8551 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8552 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8553 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8554 DAG.getConstant(0, dl, DstVT),
8555 DAG.getConstant(SignMask, dl, DstVT));
8556 SDValue SInt;
8557 if (Node->isStrictFPOpcode()) {
8558 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8559 { Chain, Src, FltOfs });
8560 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8561 { Val.getValue(1), Val });
8562 Chain = SInt.getValue(1);
8563 } else {
8564 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8565 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8566 }
8567 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8568 } else {
8569 // Expand based on maximum range of FP_TO_SINT:
8570 // True = fp_to_sint(Src)
8571 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8572 // Result = select (Src < 0x8000000000000000), True, False
8573
8574 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8575 // TODO: Should any fast-math-flags be set for the FSUB?
8576 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8577 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8578 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8579 DAG.getConstant(SignMask, dl, DstVT));
8580 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8581 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8582 }
8583 return true;
8584}
8585
8587 SDValue &Chain, SelectionDAG &DAG) const {
8588 // This transform is not correct for converting 0 when rounding mode is set
8589 // to round toward negative infinity which will produce -0.0. So disable
8590 // under strictfp.
8591 if (Node->isStrictFPOpcode())
8592 return false;
8593
8594 SDValue Src = Node->getOperand(0);
8595 EVT SrcVT = Src.getValueType();
8596 EVT DstVT = Node->getValueType(0);
8597
8598 // If the input is known to be non-negative and SINT_TO_FP is legal then use
8599 // it.
8600 if (Node->getFlags().hasNonNeg() &&
8602 Result =
8603 DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
8604 return true;
8605 }
8606
8607 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8608 return false;
8609
8610 // Only expand vector types if we have the appropriate vector bit
8611 // operations.
8612 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8617 return false;
8618
8619 SDLoc dl(SDValue(Node, 0));
8620
8621 // Implementation of unsigned i64 to f64 following the algorithm in
8622 // __floatundidf in compiler_rt. This implementation performs rounding
8623 // correctly in all rounding modes with the exception of converting 0
8624 // when rounding toward negative infinity. In that case the fsub will
8625 // produce -0.0. This will be added to +0.0 and produce -0.0 which is
8626 // incorrect.
8627 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8628 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8629 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8630 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8631 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8632 SDValue HiShift = DAG.getShiftAmountConstant(32, SrcVT, dl);
8633
8634 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8635 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8636 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
8637 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
8638 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8639 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8640 SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8641 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8642 return true;
8643}
8644
8645SDValue
8647 SelectionDAG &DAG) const {
8648 unsigned Opcode = Node->getOpcode();
8649 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8650 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8651 "Wrong opcode");
8652
8653 if (Node->getFlags().hasNoNaNs()) {
8654 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8655 EVT VT = Node->getValueType(0);
8656 if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
8658 VT.isVector())
8659 return SDValue();
8660 SDValue Op1 = Node->getOperand(0);
8661 SDValue Op2 = Node->getOperand(1);
8662 return DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred,
8663 Node->getFlags());
8664 }
8665
8666 return SDValue();
8667}
8668
8670 SelectionDAG &DAG) const {
8671 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
8672 return Expanded;
8673
8674 EVT VT = Node->getValueType(0);
8675 if (VT.isScalableVector())
8677 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8678
8679 SDLoc dl(Node);
8680 unsigned NewOp =
8681 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8682
8683 if (isOperationLegalOrCustom(NewOp, VT)) {
8684 SDValue Quiet0 = Node->getOperand(0);
8685 SDValue Quiet1 = Node->getOperand(1);
8686
8687 if (!Node->getFlags().hasNoNaNs()) {
8688 // Insert canonicalizes if it's possible we need to quiet to get correct
8689 // sNaN behavior.
8690 if (!DAG.isKnownNeverSNaN(Quiet0)) {
8691 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8692 Node->getFlags());
8693 }
8694 if (!DAG.isKnownNeverSNaN(Quiet1)) {
8695 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8696 Node->getFlags());
8697 }
8698 }
8699
8700 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8701 }
8702
8703 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8704 // instead if there are no NaNs and there can't be an incompatible zero
8705 // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8706 if ((Node->getFlags().hasNoNaNs() ||
8707 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8708 DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
8709 (Node->getFlags().hasNoSignedZeros() ||
8710 DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
8711 DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
8712 unsigned IEEE2018Op =
8713 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8714 if (isOperationLegalOrCustom(IEEE2018Op, VT))
8715 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8716 Node->getOperand(1), Node->getFlags());
8717 }
8718
8720 return SelCC;
8721
8722 return SDValue();
8723}
8724
8726 SelectionDAG &DAG) const {
8727 if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
8728 return Expanded;
8729
8730 SDLoc DL(N);
8731 SDValue LHS = N->getOperand(0);
8732 SDValue RHS = N->getOperand(1);
8733 unsigned Opc = N->getOpcode();
8734 EVT VT = N->getValueType(0);
8735 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8736 bool IsMax = Opc == ISD::FMAXIMUM;
8737 SDNodeFlags Flags = N->getFlags();
8738
8739 // First, implement comparison not propagating NaN. If no native fmin or fmax
8740 // available, use plain select with setcc instead.
8742 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8743 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8744
8745 // FIXME: We should probably define fminnum/fmaxnum variants with correct
8746 // signed zero behavior.
8747 bool MinMaxMustRespectOrderedZero = false;
8748
8749 if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
8750 MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
8751 MinMaxMustRespectOrderedZero = true;
8752 } else if (isOperationLegalOrCustom(CompOpc, VT)) {
8753 MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
8754 } else {
8756 return DAG.UnrollVectorOp(N);
8757
8758 // NaN (if exists) will be propagated later, so orderness doesn't matter.
8759 SDValue Compare =
8760 DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
8761 MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
8762 }
8763
8764 // Propagate any NaN of both operands
8765 if (!N->getFlags().hasNoNaNs() &&
8766 (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
8767 ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
8769 MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
8770 DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
8771 }
8772
8773 // fminimum/fmaximum requires -0.0 less than +0.0
8774 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8775 !DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) {
8776 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8777 DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
8778 SDValue TestZero =
8779 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8780 SDValue LCmp = DAG.getSelect(
8781 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8782 MinMax, Flags);
8783 SDValue RCmp = DAG.getSelect(
8784 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
8785 LCmp, Flags);
8786 MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8787 }
8788
8789 return MinMax;
8790}
8791
8793 SelectionDAG &DAG) const {
8794 SDLoc DL(Node);
8795 SDValue LHS = Node->getOperand(0);
8796 SDValue RHS = Node->getOperand(1);
8797 unsigned Opc = Node->getOpcode();
8798 EVT VT = Node->getValueType(0);
8799 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8800 bool IsMax = Opc == ISD::FMAXIMUMNUM;
8801 SDNodeFlags Flags = Node->getFlags();
8802
8803 unsigned NewOp =
8804 Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8805
8806 if (isOperationLegalOrCustom(NewOp, VT)) {
8807 if (!Flags.hasNoNaNs()) {
8808 // Insert canonicalizes if it's possible we need to quiet to get correct
8809 // sNaN behavior.
8810 if (!DAG.isKnownNeverSNaN(LHS)) {
8811 LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
8812 }
8813 if (!DAG.isKnownNeverSNaN(RHS)) {
8814 RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
8815 }
8816 }
8817
8818 return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
8819 }
8820
8821 // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
8822 // same behaviors for all of other cases: +0.0 vs -0.0 included.
8823 if (Flags.hasNoNaNs() ||
8824 (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
8825 unsigned IEEE2019Op =
8826 Opc == ISD::FMINIMUMNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8827 if (isOperationLegalOrCustom(IEEE2019Op, VT))
8828 return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
8829 }
8830
8831 // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
8832 // either one for +0.0 vs -0.0.
8833 if ((Flags.hasNoNaNs() ||
8834 (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
8835 (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
8836 DAG.isKnownNeverZeroFloat(RHS))) {
8837 unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
8838 if (isOperationLegalOrCustom(IEEE2008Op, VT))
8839 return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
8840 }
8841
8842 if (VT.isVector() &&
8845 return DAG.UnrollVectorOp(Node);
8846
8847 // If only one operand is NaN, override it with another operand.
8848 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
8849 LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
8850 }
8851 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
8852 RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
8853 }
8854
8855 SDValue MinMax =
8856 DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
8857
8858 // TODO: We need quiet sNaN if strictfp.
8859
8860 // Fixup signed zero behavior.
8861 if (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
8862 DAG.isKnownNeverZeroFloat(RHS)) {
8863 return MinMax;
8864 }
8865 SDValue TestZero =
8866 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8867 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8868 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
8869 SDValue LCmp = DAG.getSelect(
8870 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8871 MinMax, Flags);
8872 SDValue RCmp = DAG.getSelect(
8873 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, LCmp,
8874 Flags);
8875 return DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8876}
8877
8878/// Returns a true value if if this FPClassTest can be performed with an ordered
8879/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8880/// std::nullopt if it cannot be performed as a compare with 0.
8881static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8882 const fltSemantics &Semantics,
8883 const MachineFunction &MF) {
8884 FPClassTest OrderedMask = Test & ~fcNan;
8885 FPClassTest NanTest = Test & fcNan;
8886 bool IsOrdered = NanTest == fcNone;
8887 bool IsUnordered = NanTest == fcNan;
8888
8889 // Skip cases that are testing for only a qnan or snan.
8890 if (!IsOrdered && !IsUnordered)
8891 return std::nullopt;
8892
8893 if (OrderedMask == fcZero &&
8894 MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
8895 return IsOrdered;
8896 if (OrderedMask == (fcZero | fcSubnormal) &&
8897 MF.getDenormalMode(Semantics).inputsAreZero())
8898 return IsOrdered;
8899 return std::nullopt;
8900}
8901
8903 const FPClassTest OrigTestMask,
8904 SDNodeFlags Flags, const SDLoc &DL,
8905 SelectionDAG &DAG) const {
8906 EVT OperandVT = Op.getValueType();
8907 assert(OperandVT.isFloatingPoint());
8908 FPClassTest Test = OrigTestMask;
8909
8910 // Degenerated cases.
8911 if (Test == fcNone)
8912 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8913 if (Test == fcAllFlags)
8914 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
8915
8916 // PPC double double is a pair of doubles, of which the higher part determines
8917 // the value class.
8918 if (OperandVT == MVT::ppcf128) {
8919 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8920 DAG.getConstant(1, DL, MVT::i32));
8921 OperandVT = MVT::f64;
8922 }
8923
8924 // Floating-point type properties.
8925 EVT ScalarFloatVT = OperandVT.getScalarType();
8926 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
8927 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8928 bool IsF80 = (ScalarFloatVT == MVT::f80);
8929
8930 // Some checks can be implemented using float comparisons, if floating point
8931 // exceptions are ignored.
8932 if (Flags.hasNoFPExcept() &&
8934 FPClassTest FPTestMask = Test;
8935 bool IsInvertedFP = false;
8936
8937 if (FPClassTest InvertedFPCheck =
8938 invertFPClassTestIfSimpler(FPTestMask, true)) {
8939 FPTestMask = InvertedFPCheck;
8940 IsInvertedFP = true;
8941 }
8942
8943 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
8944 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
8945
8946 // See if we can fold an | fcNan into an unordered compare.
8947 FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
8948
8949 // Can't fold the ordered check if we're only testing for snan or qnan
8950 // individually.
8951 if ((FPTestMask & fcNan) != fcNan)
8952 OrderedFPTestMask = FPTestMask;
8953
8954 const bool IsOrdered = FPTestMask == OrderedFPTestMask;
8955
8956 if (std::optional<bool> IsCmp0 =
8957 isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
8958 IsCmp0 && (isCondCodeLegalOrCustom(
8959 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8960 OperandVT.getScalarType().getSimpleVT()))) {
8961
8962 // If denormals could be implicitly treated as 0, this is not equivalent
8963 // to a compare with 0 since it will also be true for denormals.
8964 return DAG.getSetCC(DL, ResultVT, Op,
8965 DAG.getConstantFP(0.0, DL, OperandVT),
8966 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8967 }
8968
8969 if (FPTestMask == fcNan &&
8971 OperandVT.getScalarType().getSimpleVT()))
8972 return DAG.getSetCC(DL, ResultVT, Op, Op,
8973 IsInvertedFP ? ISD::SETO : ISD::SETUO);
8974
8975 bool IsOrderedInf = FPTestMask == fcInf;
8976 if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
8977 isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
8978 : UnorderedCmpOpcode,
8979 OperandVT.getScalarType().getSimpleVT()) &&
8980 isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType()) &&
8982 (OperandVT.isVector() &&
8984 // isinf(x) --> fabs(x) == inf
8985 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8986 SDValue Inf =
8987 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8988 return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8989 IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
8990 }
8991
8992 if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
8993 isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
8994 : UnorderedCmpOpcode,
8995 OperandVT.getSimpleVT())) {
8996 // isposinf(x) --> x == inf
8997 // isneginf(x) --> x == -inf
8998 // isposinf(x) || nan --> x u== inf
8999 // isneginf(x) || nan --> x u== -inf
9000
9001 SDValue Inf = DAG.getConstantFP(
9002 APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
9003 OperandVT);
9004 return DAG.getSetCC(DL, ResultVT, Op, Inf,
9005 IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
9006 }
9007
9008 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
9009 // TODO: Could handle ordered case, but it produces worse code for
9010 // x86. Maybe handle ordered if fabs is free?
9011
9012 ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9013 ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
9014
9015 if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
9016 OperandVT.getScalarType().getSimpleVT())) {
9017 // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
9018
9019 // TODO: Maybe only makes sense if fabs is free. Integer test of
9020 // exponent bits seems better for x86.
9021 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9022 SDValue SmallestNormal = DAG.getConstantFP(
9023 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9024 return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
9025 IsOrdered ? OrderedOp : UnorderedOp);
9026 }
9027 }
9028
9029 if (FPTestMask == fcNormal) {
9030 // TODO: Handle unordered
9031 ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9032 ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
9033
9034 if (isCondCodeLegalOrCustom(IsFiniteOp,
9035 OperandVT.getScalarType().getSimpleVT()) &&
9036 isCondCodeLegalOrCustom(IsNormalOp,
9037 OperandVT.getScalarType().getSimpleVT()) &&
9038 isFAbsFree(OperandVT)) {
9039 // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
9040 SDValue Inf =
9041 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
9042 SDValue SmallestNormal = DAG.getConstantFP(
9043 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9044
9045 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9046 SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
9047 SDValue IsNormal =
9048 DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
9049 unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
9050 return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
9051 }
9052 }
9053 }
9054
9055 // Some checks may be represented as inversion of simpler check, for example
9056 // "inf|normal|subnormal|zero" => !"nan".
9057 bool IsInverted = false;
9058
9059 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
9060 Test = InvertedCheck;
9061 IsInverted = true;
9062 }
9063
9064 // In the general case use integer operations.
9065 unsigned BitSize = OperandVT.getScalarSizeInBits();
9066 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
9067 if (OperandVT.isVector())
9068 IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
9069 OperandVT.getVectorElementCount());
9070 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
9071
9072 // Various masks.
9073 APInt SignBit = APInt::getSignMask(BitSize);
9074 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9075 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9076 const unsigned ExplicitIntBitInF80 = 63;
9077 APInt ExpMask = Inf;
9078 if (IsF80)
9079 ExpMask.clearBit(ExplicitIntBitInF80);
9080 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9081 APInt QNaNBitMask =
9082 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9083 APInt InversionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
9084
9085 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
9086 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
9087 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
9088 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
9089 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
9090 SDValue ResultInversionMask = DAG.getConstant(InversionMask, DL, ResultVT);
9091
9092 SDValue Res;
9093 const auto appendResult = [&](SDValue PartialRes) {
9094 if (PartialRes) {
9095 if (Res)
9096 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
9097 else
9098 Res = PartialRes;
9099 }
9100 };
9101
9102 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
9103 const auto getIntBitIsSet = [&]() -> SDValue {
9104 if (!IntBitIsSetV) {
9105 APInt IntBitMask(BitSize, 0);
9106 IntBitMask.setBit(ExplicitIntBitInF80);
9107 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
9108 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
9109 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
9110 }
9111 return IntBitIsSetV;
9112 };
9113
9114 // Split the value into sign bit and absolute value.
9115 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
9116 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
9117 DAG.getConstant(0, DL, IntVT), ISD::SETLT);
9118
9119 // Tests that involve more than one class should be processed first.
9120 SDValue PartialRes;
9121
9122 if (IsF80)
9123 ; // Detect finite numbers of f80 by checking individual classes because
9124 // they have different settings of the explicit integer bit.
9125 else if ((Test & fcFinite) == fcFinite) {
9126 // finite(V) ==> abs(V) < exp_mask
9127 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
9128 Test &= ~fcFinite;
9129 } else if ((Test & fcFinite) == fcPosFinite) {
9130 // finite(V) && V > 0 ==> V < exp_mask
9131 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
9132 Test &= ~fcPosFinite;
9133 } else if ((Test & fcFinite) == fcNegFinite) {
9134 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
9135 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
9136 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9137 Test &= ~fcNegFinite;
9138 }
9139 appendResult(PartialRes);
9140
9141 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
9142 // fcZero | fcSubnormal => test all exponent bits are 0
9143 // TODO: Handle sign bit specific cases
9144 if (PartialCheck == (fcZero | fcSubnormal)) {
9145 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
9146 SDValue ExpIsZero =
9147 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9148 appendResult(ExpIsZero);
9149 Test &= ~PartialCheck & fcAllFlags;
9150 }
9151 }
9152
9153 // Check for individual classes.
9154
9155 if (unsigned PartialCheck = Test & fcZero) {
9156 if (PartialCheck == fcPosZero)
9157 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
9158 else if (PartialCheck == fcZero)
9159 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
9160 else // ISD::fcNegZero
9161 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
9162 appendResult(PartialRes);
9163 }
9164
9165 if (unsigned PartialCheck = Test & fcSubnormal) {
9166 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
9167 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
9168 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
9169 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
9170 SDValue VMinusOneV =
9171 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
9172 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
9173 if (PartialCheck == fcNegSubnormal)
9174 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9175 appendResult(PartialRes);
9176 }
9177
9178 if (unsigned PartialCheck = Test & fcInf) {
9179 if (PartialCheck == fcPosInf)
9180 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
9181 else if (PartialCheck == fcInf)
9182 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
9183 else { // ISD::fcNegInf
9184 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9185 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
9186 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
9187 }
9188 appendResult(PartialRes);
9189 }
9190
9191 if (unsigned PartialCheck = Test & fcNan) {
9192 APInt InfWithQnanBit = Inf | QNaNBitMask;
9193 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
9194 if (PartialCheck == fcNan) {
9195 // isnan(V) ==> abs(V) > int(inf)
9196 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9197 if (IsF80) {
9198 // Recognize unsupported values as NaNs for compatibility with glibc.
9199 // In them (exp(V)==0) == int_bit.
9200 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
9201 SDValue ExpIsZero =
9202 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9203 SDValue IsPseudo =
9204 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
9205 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
9206 }
9207 } else if (PartialCheck == fcQNan) {
9208 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
9209 PartialRes =
9210 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
9211 } else { // ISD::fcSNan
9212 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
9213 // abs(V) < (unsigned(Inf) | quiet_bit)
9214 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9215 SDValue IsNotQnan =
9216 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
9217 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
9218 }
9219 appendResult(PartialRes);
9220 }
9221
9222 if (unsigned PartialCheck = Test & fcNormal) {
9223 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
9224 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9225 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
9226 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
9227 APInt ExpLimit = ExpMask - ExpLSB;
9228 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
9229 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
9230 if (PartialCheck == fcNegNormal)
9231 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9232 else if (PartialCheck == fcPosNormal) {
9233 SDValue PosSignV =
9234 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInversionMask);
9235 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
9236 }
9237 if (IsF80)
9238 PartialRes =
9239 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
9240 appendResult(PartialRes);
9241 }
9242
9243 if (!Res)
9244 return DAG.getConstant(IsInverted, DL, ResultVT);
9245 if (IsInverted)
9246 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInversionMask);
9247 return Res;
9248}
9249
9250// Only expand vector types if we have the appropriate vector bit operations.
9251static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
9252 assert(VT.isVector() && "Expected vector type");
9253 unsigned Len = VT.getScalarSizeInBits();
9254 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
9257 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
9259}
9260
9262 SDLoc dl(Node);
9263 EVT VT = Node->getValueType(0);
9264 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9265 SDValue Op = Node->getOperand(0);
9266 unsigned Len = VT.getScalarSizeInBits();
9267 assert(VT.isInteger() && "CTPOP not implemented for this type.");
9268
9269 // TODO: Add support for irregular type lengths.
9270 if (!(Len <= 128 && Len % 8 == 0))
9271 return SDValue();
9272
9273 // Only expand vector types if we have the appropriate vector bit operations.
9274 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
9275 return SDValue();
9276
9277 // This is the "best" algorithm from
9278 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9279 SDValue Mask55 =
9280 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9281 SDValue Mask33 =
9282 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9283 SDValue Mask0F =
9284 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9285
9286 // v = v - ((v >> 1) & 0x55555555...)
9287 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
9288 DAG.getNode(ISD::AND, dl, VT,
9289 DAG.getNode(ISD::SRL, dl, VT, Op,
9290 DAG.getConstant(1, dl, ShVT)),
9291 Mask55));
9292 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9293 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
9294 DAG.getNode(ISD::AND, dl, VT,
9295 DAG.getNode(ISD::SRL, dl, VT, Op,
9296 DAG.getConstant(2, dl, ShVT)),
9297 Mask33));
9298 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9299 Op = DAG.getNode(ISD::AND, dl, VT,
9300 DAG.getNode(ISD::ADD, dl, VT, Op,
9301 DAG.getNode(ISD::SRL, dl, VT, Op,
9302 DAG.getConstant(4, dl, ShVT))),
9303 Mask0F);
9304
9305 if (Len <= 8)
9306 return Op;
9307
9308 // Avoid the multiply if we only have 2 bytes to add.
9309 // TODO: Only doing this for scalars because vectors weren't as obviously
9310 // improved.
9311 if (Len == 16 && !VT.isVector()) {
9312 // v = (v + (v >> 8)) & 0x00FF;
9313 return DAG.getNode(ISD::AND, dl, VT,
9314 DAG.getNode(ISD::ADD, dl, VT, Op,
9315 DAG.getNode(ISD::SRL, dl, VT, Op,
9316 DAG.getConstant(8, dl, ShVT))),
9317 DAG.getConstant(0xFF, dl, VT));
9318 }
9319
9320 // v = (v * 0x01010101...) >> (Len - 8)
9321 SDValue V;
9324 SDValue Mask01 =
9325 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9326 V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
9327 } else {
9328 V = Op;
9329 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9330 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9331 V = DAG.getNode(ISD::ADD, dl, VT, V,
9332 DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
9333 }
9334 }
9335 return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
9336}
9337
9339 SDLoc dl(Node);
9340 EVT VT = Node->getValueType(0);
9341 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9342 SDValue Op = Node->getOperand(0);
9343 SDValue Mask = Node->getOperand(1);
9344 SDValue VL = Node->getOperand(2);
9345 unsigned Len = VT.getScalarSizeInBits();
9346 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
9347
9348 // TODO: Add support for irregular type lengths.
9349 if (!(Len <= 128 && Len % 8 == 0))
9350 return SDValue();
9351
9352 // This is same algorithm of expandCTPOP from
9353 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9354 SDValue Mask55 =
9355 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9356 SDValue Mask33 =
9357 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9358 SDValue Mask0F =
9359 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9360
9361 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9362
9363 // v = v - ((v >> 1) & 0x55555555...)
9364 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
9365 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9366 DAG.getConstant(1, dl, ShVT), Mask, VL),
9367 Mask55, Mask, VL);
9368 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
9369
9370 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9371 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
9372 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
9373 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9374 DAG.getConstant(2, dl, ShVT), Mask, VL),
9375 Mask33, Mask, VL);
9376 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
9377
9378 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9379 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
9380 Mask, VL),
9381 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
9382 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
9383
9384 if (Len <= 8)
9385 return Op;
9386
9387 // v = (v * 0x01010101...) >> (Len - 8)
9388 SDValue V;
9390 ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
9391 SDValue Mask01 =
9392 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9393 V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
9394 } else {
9395 V = Op;
9396 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9397 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9398 V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
9399 DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
9400 Mask, VL);
9401 }
9402 }
9403 return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
9404 Mask, VL);
9405}
9406
9408 SDLoc dl(Node);
9409 EVT VT = Node->getValueType(0);
9410 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9411 SDValue Op = Node->getOperand(0);
9412 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9413
9414 // If the non-ZERO_UNDEF version is supported we can use that instead.
9415 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
9417 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
9418
9419 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9421 EVT SetCCVT =
9422 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9423 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
9424 SDValue Zero = DAG.getConstant(0, dl, VT);
9425 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9426 return DAG.getSelect(dl, VT, SrcIsZero,
9427 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
9428 }
9429
9430 // Only expand vector types if we have the appropriate vector bit operations.
9431 // This includes the operations needed to expand CTPOP if it isn't supported.
9432 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9434 !canExpandVectorCTPOP(*this, VT)) ||
9437 return SDValue();
9438
9439 // for now, we do this:
9440 // x = x | (x >> 1);
9441 // x = x | (x >> 2);
9442 // ...
9443 // x = x | (x >>16);
9444 // x = x | (x >>32); // for 64-bit input
9445 // return popcount(~x);
9446 //
9447 // Ref: "Hacker's Delight" by Henry Warren
9448 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9449 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9450 Op = DAG.getNode(ISD::OR, dl, VT, Op,
9451 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
9452 }
9453 Op = DAG.getNOT(dl, Op, VT);
9454 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
9455}
9456
9458 SDLoc dl(Node);
9459 EVT VT = Node->getValueType(0);
9460 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9461 SDValue Op = Node->getOperand(0);
9462 SDValue Mask = Node->getOperand(1);
9463 SDValue VL = Node->getOperand(2);
9464 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9465
9466 // do this:
9467 // x = x | (x >> 1);
9468 // x = x | (x >> 2);
9469 // ...
9470 // x = x | (x >>16);
9471 // x = x | (x >>32); // for 64-bit input
9472 // return popcount(~x);
9473 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9474 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9475 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
9476 DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
9477 VL);
9478 }
9479 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getAllOnesConstant(dl, VT),
9480 Mask, VL);
9481 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
9482}
9483
9485 const SDLoc &DL, EVT VT, SDValue Op,
9486 unsigned BitWidth) const {
9487 if (BitWidth != 32 && BitWidth != 64)
9488 return SDValue();
9489 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
9490 : APInt(64, 0x0218A392CD3D5DBFULL);
9491 const DataLayout &TD = DAG.getDataLayout();
9492 MachinePointerInfo PtrInfo =
9494 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
9495 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
9496 SDValue Lookup = DAG.getNode(
9497 ISD::SRL, DL, VT,
9498 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
9499 DAG.getConstant(DeBruijn, DL, VT)),
9500 DAG.getShiftAmountConstant(ShiftAmt, VT, DL));
9502
9504 for (unsigned i = 0; i < BitWidth; i++) {
9505 APInt Shl = DeBruijn.shl(i);
9506 APInt Lshr = Shl.lshr(ShiftAmt);
9507 Table[Lshr.getZExtValue()] = i;
9508 }
9509
9510 // Create a ConstantArray in Constant Pool
9511 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
9512 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
9513 TD.getPrefTypeAlign(CA->getType()));
9514 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
9515 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
9516 PtrInfo, MVT::i8);
9517 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9518 return ExtLoad;
9519
9520 EVT SetCCVT =
9521 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9522 SDValue Zero = DAG.getConstant(0, DL, VT);
9523 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
9524 return DAG.getSelect(DL, VT, SrcIsZero,
9525 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
9526}
9527
9529 SDLoc dl(Node);
9530 EVT VT = Node->getValueType(0);
9531 SDValue Op = Node->getOperand(0);
9532 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9533
9534 // If the non-ZERO_UNDEF version is supported we can use that instead.
9535 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9537 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
9538
9539 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9541 EVT SetCCVT =
9542 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9543 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
9544 SDValue Zero = DAG.getConstant(0, dl, VT);
9545 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9546 return DAG.getSelect(dl, VT, SrcIsZero,
9547 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
9548 }
9549
9550 // Only expand vector types if we have the appropriate vector bit operations.
9551 // This includes the operations needed to expand CTPOP if it isn't supported.
9552 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9555 !canExpandVectorCTPOP(*this, VT)) ||
9559 return SDValue();
9560
9561 // Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
9562 // to be expanded or converted to a libcall.
9565 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
9566 return V;
9567
9568 // for now, we use: { return popcount(~x & (x - 1)); }
9569 // unless the target has ctlz but not ctpop, in which case we use:
9570 // { return 32 - nlz(~x & (x-1)); }
9571 // Ref: "Hacker's Delight" by Henry Warren
9572 SDValue Tmp = DAG.getNode(
9573 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
9574 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
9575
9576 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9578 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
9579 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
9580 }
9581
9582 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
9583}
9584
9586 SDValue Op = Node->getOperand(0);
9587 SDValue Mask = Node->getOperand(1);
9588 SDValue VL = Node->getOperand(2);
9589 SDLoc dl(Node);
9590 EVT VT = Node->getValueType(0);
9591
9592 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9593 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
9594 DAG.getAllOnesConstant(dl, VT), Mask, VL);
9595 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
9596 DAG.getConstant(1, dl, VT), Mask, VL);
9597 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
9598 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
9599}
9600
9602 SelectionDAG &DAG) const {
9603 // %cond = to_bool_vec %source
9604 // %splat = splat /*val=*/VL
9605 // %tz = step_vector
9606 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
9607 // %r = vp.reduce.umin %v
9608 SDLoc DL(N);
9609 SDValue Source = N->getOperand(0);
9610 SDValue Mask = N->getOperand(1);
9611 SDValue EVL = N->getOperand(2);
9612 EVT SrcVT = Source.getValueType();
9613 EVT ResVT = N->getValueType(0);
9614 EVT ResVecVT =
9615 EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
9616
9617 // Convert to boolean vector.
9618 if (SrcVT.getScalarType() != MVT::i1) {
9619 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
9620 SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
9621 SrcVT.getVectorElementCount());
9622 Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
9623 DAG.getCondCode(ISD::SETNE), Mask, EVL);
9624 }
9625
9626 SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
9627 SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
9628 SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
9629 SDValue Select =
9630 DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
9631 return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
9632}
9633
9635 SelectionDAG &DAG) const {
9636 SDLoc DL(N);
9637 SDValue Mask = N->getOperand(0);
9638 EVT MaskVT = Mask.getValueType();
9639 EVT BoolVT = MaskVT.getScalarType();
9640
9641 // Find a suitable type for a stepvector.
9642 ConstantRange VScaleRange(1, /*isFullSet=*/true); // Fixed length default.
9643 if (MaskVT.isScalableVector())
9644 VScaleRange = getVScaleRange(&DAG.getMachineFunction().getFunction(), 64);
9645 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9646 unsigned EltWidth = TLI.getBitWidthForCttzElements(
9647 BoolVT.getTypeForEVT(*DAG.getContext()), MaskVT.getVectorElementCount(),
9648 /*ZeroIsPoison=*/true, &VScaleRange);
9649 EVT StepVT = MVT::getIntegerVT(EltWidth);
9650 EVT StepVecVT = MaskVT.changeVectorElementType(StepVT);
9651
9652 // If promotion is required to make the type legal, do it here; promotion
9653 // of integers within LegalizeVectorOps is looking for types of the same
9654 // size but with a smaller number of larger elements, not the usual larger
9655 // size with the same number of larger elements.
9656 if (TLI.getTypeAction(StepVecVT.getSimpleVT()) ==
9658 StepVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
9659 StepVT = StepVecVT.getVectorElementType();
9660 }
9661
9662 // Zero out lanes with inactive elements, then find the highest remaining
9663 // value from the stepvector.
9664 SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT);
9665 SDValue StepVec = DAG.getStepVector(DL, StepVecVT);
9666 SDValue ActiveElts = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
9667 SDValue HighestIdx = DAG.getNode(ISD::VECREDUCE_UMAX, DL, StepVT, ActiveElts);
9668 return DAG.getZExtOrTrunc(HighestIdx, DL, N->getValueType(0));
9669}
9670
9672 bool IsNegative) const {
9673 SDLoc dl(N);
9674 EVT VT = N->getValueType(0);
9675 SDValue Op = N->getOperand(0);
9676
9677 // abs(x) -> smax(x,sub(0,x))
9678 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9680 SDValue Zero = DAG.getConstant(0, dl, VT);
9681 Op = DAG.getFreeze(Op);
9682 return DAG.getNode(ISD::SMAX, dl, VT, Op,
9683 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9684 }
9685
9686 // abs(x) -> umin(x,sub(0,x))
9687 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9689 SDValue Zero = DAG.getConstant(0, dl, VT);
9690 Op = DAG.getFreeze(Op);
9691 return DAG.getNode(ISD::UMIN, dl, VT, Op,
9692 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9693 }
9694
9695 // 0 - abs(x) -> smin(x, sub(0,x))
9696 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
9698 SDValue Zero = DAG.getConstant(0, dl, VT);
9699 Op = DAG.getFreeze(Op);
9700 return DAG.getNode(ISD::SMIN, dl, VT, Op,
9701 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9702 }
9703
9704 // Only expand vector types if we have the appropriate vector operations.
9705 if (VT.isVector() &&
9707 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
9708 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
9710 return SDValue();
9711
9712 Op = DAG.getFreeze(Op);
9713 SDValue Shift = DAG.getNode(
9714 ISD::SRA, dl, VT, Op,
9715 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9716 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
9717
9718 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9719 if (!IsNegative)
9720 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
9721
9722 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9723 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
9724}
9725
9727 SDLoc dl(N);
9728 EVT VT = N->getValueType(0);
9729 SDValue LHS = N->getOperand(0);
9730 SDValue RHS = N->getOperand(1);
9731 bool IsSigned = N->getOpcode() == ISD::ABDS;
9732
9733 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9734 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9735 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9736 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9737 if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
9738 LHS = DAG.getFreeze(LHS);
9739 RHS = DAG.getFreeze(RHS);
9740 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
9741 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
9742 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9743 }
9744
9745 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9746 if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT)) {
9747 LHS = DAG.getFreeze(LHS);
9748 RHS = DAG.getFreeze(RHS);
9749 return DAG.getNode(ISD::OR, dl, VT,
9750 DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
9751 DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
9752 }
9753
9754 // If the subtract doesn't overflow then just use abs(sub())
9755 bool IsNonNegative = DAG.SignBitIsZero(LHS) && DAG.SignBitIsZero(RHS);
9756
9757 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, LHS, RHS))
9758 return DAG.getNode(ISD::ABS, dl, VT,
9759 DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
9760
9761 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, RHS, LHS))
9762 return DAG.getNode(ISD::ABS, dl, VT,
9763 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9764
9765 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9767 LHS = DAG.getFreeze(LHS);
9768 RHS = DAG.getFreeze(RHS);
9769 SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
9770
9771 // Branchless expansion iff cmp result is allbits:
9772 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9773 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9774 if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
9775 SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
9776 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
9777 return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
9778 }
9779
9780 // Similar to the branchless expansion, if we don't prefer selects, use the
9781 // (sign-extended) usubo overflow flag if the (scalar) type is illegal as this
9782 // is more likely to legalize cleanly: abdu(lhs, rhs) -> sub(xor(sub(lhs,
9783 // rhs), uof(lhs, rhs)), uof(lhs, rhs))
9784 if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT) &&
9786 SDValue USubO =
9787 DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
9788 SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
9789 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
9790 return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
9791 }
9792
9793 // FIXME: Should really try to split the vector in case it's legal on a
9794 // subvector.
9796 return DAG.UnrollVectorOp(N);
9797
9798 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9799 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9800 return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9801 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9802}
9803
9805 SDLoc dl(N);
9806 EVT VT = N->getValueType(0);
9807 SDValue LHS = N->getOperand(0);
9808 SDValue RHS = N->getOperand(1);
9809
9810 unsigned Opc = N->getOpcode();
9811 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
9812 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
9813 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9814 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
9815 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
9816 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9818 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
9819 "Unknown AVG node");
9820
9821 // If the operands are already extended, we can add+shift.
9822 bool IsExt =
9823 (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
9824 DAG.ComputeNumSignBits(RHS) >= 2) ||
9825 (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
9826 DAG.computeKnownBits(RHS).countMinLeadingZeros() >= 1);
9827 if (IsExt) {
9828 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
9829 if (!IsFloor)
9830 Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
9831 return DAG.getNode(ShiftOpc, dl, VT, Sum,
9832 DAG.getShiftAmountConstant(1, VT, dl));
9833 }
9834
9835 // For scalars, see if we can efficiently extend/truncate to use add+shift.
9836 if (VT.isScalarInteger()) {
9837 unsigned BW = VT.getScalarSizeInBits();
9838 EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
9839 if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
9840 LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
9841 RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
9842 SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
9843 if (!IsFloor)
9844 Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
9845 DAG.getConstant(1, dl, ExtVT));
9846 // Just use SRL as we will be truncating away the extended sign bits.
9847 Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
9848 DAG.getShiftAmountConstant(1, ExtVT, dl));
9849 return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
9850 }
9851 }
9852
9853 // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
9854 if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT)) {
9855 SDValue UAddWithOverflow =
9856 DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
9857
9858 SDValue Sum = UAddWithOverflow.getValue(0);
9859 SDValue Overflow = UAddWithOverflow.getValue(1);
9860
9861 // Right shift the sum by 1
9862 SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,
9863 DAG.getShiftAmountConstant(1, VT, dl));
9864
9865 SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
9866 SDValue OverflowShl = DAG.getNode(
9867 ISD::SHL, dl, VT, ZeroExtOverflow,
9868 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9869
9870 return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
9871 }
9872
9873 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
9874 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
9875 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
9876 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
9877 LHS = DAG.getFreeze(LHS);
9878 RHS = DAG.getFreeze(RHS);
9879 SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
9880 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
9881 SDValue Shift =
9882 DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
9883 return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
9884}
9885
9887 SDLoc dl(N);
9888 EVT VT = N->getValueType(0);
9889 SDValue Op = N->getOperand(0);
9890
9891 if (!VT.isSimple())
9892 return SDValue();
9893
9894 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9895 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9896 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9897 default:
9898 return SDValue();
9899 case MVT::i16:
9900 // Use a rotate by 8. This can be further expanded if necessary.
9901 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9902 case MVT::i32:
9903 // This is meant for ARM speficially, which has ROTR but no ROTL.
9905 SDValue Mask = DAG.getConstant(0x00FF00FF, dl, VT);
9906 // (x & 0x00FF00FF) rotr 8 | (x rotl 8) & 0x00FF00FF
9907 SDValue And = DAG.getNode(ISD::AND, dl, VT, Op, Mask);
9908 SDValue Rotr =
9909 DAG.getNode(ISD::ROTR, dl, VT, And, DAG.getConstant(8, dl, SHVT));
9910 SDValue Rotl =
9911 DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9912 SDValue And2 = DAG.getNode(ISD::AND, dl, VT, Rotl, Mask);
9913 return DAG.getNode(ISD::OR, dl, VT, Rotr, And2);
9914 }
9915 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9916 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
9917 DAG.getConstant(0xFF00, dl, VT));
9918 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
9919 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9920 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
9921 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9922 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9923 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9924 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9925 case MVT::i64:
9926 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9927 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
9928 DAG.getConstant(255ULL<<8, dl, VT));
9929 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
9930 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
9931 DAG.getConstant(255ULL<<16, dl, VT));
9932 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
9933 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
9934 DAG.getConstant(255ULL<<24, dl, VT));
9935 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
9936 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9937 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
9938 DAG.getConstant(255ULL<<24, dl, VT));
9939 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9940 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
9941 DAG.getConstant(255ULL<<16, dl, VT));
9942 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
9943 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
9944 DAG.getConstant(255ULL<<8, dl, VT));
9945 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9946 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
9947 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
9948 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9949 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9950 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
9951 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9952 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
9953 }
9954}
9955
9957 SDLoc dl(N);
9958 EVT VT = N->getValueType(0);
9959 SDValue Op = N->getOperand(0);
9960 SDValue Mask = N->getOperand(1);
9961 SDValue EVL = N->getOperand(2);
9962
9963 if (!VT.isSimple())
9964 return SDValue();
9965
9966 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9967 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9968 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9969 default:
9970 return SDValue();
9971 case MVT::i16:
9972 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9973 Mask, EVL);
9974 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9975 Mask, EVL);
9976 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
9977 case MVT::i32:
9978 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9979 Mask, EVL);
9980 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
9981 Mask, EVL);
9982 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
9983 Mask, EVL);
9984 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9985 Mask, EVL);
9986 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9987 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
9988 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9989 Mask, EVL);
9990 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9991 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9992 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9993 case MVT::i64:
9994 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9995 Mask, EVL);
9996 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9997 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9998 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
9999 Mask, EVL);
10000 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10001 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
10002 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
10003 Mask, EVL);
10004 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10005 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
10006 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
10007 Mask, EVL);
10008 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10009 Mask, EVL);
10010 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
10011 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
10012 Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10013 Mask, EVL);
10014 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
10015 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
10016 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
10017 Mask, EVL);
10018 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10019 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
10020 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
10021 Mask, EVL);
10022 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
10023 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
10024 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
10025 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
10026 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
10027 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
10028 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
10029 }
10030}
10031
10033 SDLoc dl(N);
10034 EVT VT = N->getValueType(0);
10035 SDValue Op = N->getOperand(0);
10036 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10037 unsigned Sz = VT.getScalarSizeInBits();
10038
10039 SDValue Tmp, Tmp2, Tmp3;
10040
10041 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10042 // and finally the i1 pairs.
10043 // TODO: We can easily support i4/i2 legal types if any target ever does.
10044 if (Sz >= 8 && isPowerOf2_32(Sz)) {
10045 // Create the masks - repeating the pattern every byte.
10046 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
10047 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
10048 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
10049
10050 // BSWAP if the type is wider than a single byte.
10051 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
10052
10053 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10054 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
10055 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
10056 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
10057 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
10058 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10059
10060 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10061 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
10062 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
10063 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
10064 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
10065 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10066
10067 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10068 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
10069 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
10070 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
10071 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
10072 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10073 return Tmp;
10074 }
10075
10076 Tmp = DAG.getConstant(0, dl, VT);
10077 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
10078 if (I < J)
10079 Tmp2 =
10080 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
10081 else
10082 Tmp2 =
10083 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
10084
10085 APInt Shift = APInt::getOneBitSet(Sz, J);
10086 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
10087 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
10088 }
10089
10090 return Tmp;
10091}
10092
10094 assert(N->getOpcode() == ISD::VP_BITREVERSE);
10095
10096 SDLoc dl(N);
10097 EVT VT = N->getValueType(0);
10098 SDValue Op = N->getOperand(0);
10099 SDValue Mask = N->getOperand(1);
10100 SDValue EVL = N->getOperand(2);
10101 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10102 unsigned Sz = VT.getScalarSizeInBits();
10103
10104 SDValue Tmp, Tmp2, Tmp3;
10105
10106 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10107 // and finally the i1 pairs.
10108 // TODO: We can easily support i4/i2 legal types if any target ever does.
10109 if (Sz >= 8 && isPowerOf2_32(Sz)) {
10110 // Create the masks - repeating the pattern every byte.
10111 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
10112 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
10113 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
10114
10115 // BSWAP if the type is wider than a single byte.
10116 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
10117
10118 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10119 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
10120 Mask, EVL);
10121 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10122 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
10123 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
10124 Mask, EVL);
10125 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
10126 Mask, EVL);
10127 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10128
10129 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10130 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
10131 Mask, EVL);
10132 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10133 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
10134 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
10135 Mask, EVL);
10136 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
10137 Mask, EVL);
10138 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10139
10140 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10141 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
10142 Mask, EVL);
10143 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10144 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
10145 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
10146 Mask, EVL);
10147 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
10148 Mask, EVL);
10149 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10150 return Tmp;
10151 }
10152 return SDValue();
10153}
10154
10155std::pair<SDValue, SDValue>
10157 SelectionDAG &DAG) const {
10158 SDLoc SL(LD);
10159 SDValue Chain = LD->getChain();
10160 SDValue BasePTR = LD->getBasePtr();
10161 EVT SrcVT = LD->getMemoryVT();
10162 EVT DstVT = LD->getValueType(0);
10163 ISD::LoadExtType ExtType = LD->getExtensionType();
10164
10165 if (SrcVT.isScalableVector())
10166 report_fatal_error("Cannot scalarize scalable vector loads");
10167
10168 unsigned NumElem = SrcVT.getVectorNumElements();
10169
10170 EVT SrcEltVT = SrcVT.getScalarType();
10171 EVT DstEltVT = DstVT.getScalarType();
10172
10173 // A vector must always be stored in memory as-is, i.e. without any padding
10174 // between the elements, since various code depend on it, e.g. in the
10175 // handling of a bitcast of a vector type to int, which may be done with a
10176 // vector store followed by an integer load. A vector that does not have
10177 // elements that are byte-sized must therefore be stored as an integer
10178 // built out of the extracted vector elements.
10179 if (!SrcEltVT.isByteSized()) {
10180 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
10181 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
10182
10183 unsigned NumSrcBits = SrcVT.getSizeInBits();
10184 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
10185
10186 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
10187 SDValue SrcEltBitMask = DAG.getConstant(
10188 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
10189
10190 // Load the whole vector and avoid masking off the top bits as it makes
10191 // the codegen worse.
10192 SDValue Load =
10193 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
10194 LD->getPointerInfo(), SrcIntVT, LD->getBaseAlign(),
10195 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10196
10198 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10199 unsigned ShiftIntoIdx =
10200 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10201 SDValue ShiftAmount = DAG.getShiftAmountConstant(
10202 ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
10203 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
10204 SDValue Elt =
10205 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
10206 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
10207
10208 if (ExtType != ISD::NON_EXTLOAD) {
10209 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
10210 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
10211 }
10212
10213 Vals.push_back(Scalar);
10214 }
10215
10216 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10217 return std::make_pair(Value, Load.getValue(1));
10218 }
10219
10220 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
10221 assert(SrcEltVT.isByteSized());
10222
10224 SmallVector<SDValue, 8> LoadChains;
10225
10226 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10227 SDValue ScalarLoad = DAG.getExtLoad(
10228 ExtType, SL, DstEltVT, Chain, BasePTR,
10229 LD->getPointerInfo().getWithOffset(Idx * Stride), SrcEltVT,
10230 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
10231
10232 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
10233
10234 Vals.push_back(ScalarLoad.getValue(0));
10235 LoadChains.push_back(ScalarLoad.getValue(1));
10236 }
10237
10238 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
10239 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10240
10241 return std::make_pair(Value, NewChain);
10242}
10243
10245 SelectionDAG &DAG) const {
10246 SDLoc SL(ST);
10247
10248 SDValue Chain = ST->getChain();
10249 SDValue BasePtr = ST->getBasePtr();
10250 SDValue Value = ST->getValue();
10251 EVT StVT = ST->getMemoryVT();
10252
10253 if (StVT.isScalableVector())
10254 report_fatal_error("Cannot scalarize scalable vector stores");
10255
10256 // The type of the data we want to save
10257 EVT RegVT = Value.getValueType();
10258 EVT RegSclVT = RegVT.getScalarType();
10259
10260 // The type of data as saved in memory.
10261 EVT MemSclVT = StVT.getScalarType();
10262
10263 unsigned NumElem = StVT.getVectorNumElements();
10264
10265 // A vector must always be stored in memory as-is, i.e. without any padding
10266 // between the elements, since various code depend on it, e.g. in the
10267 // handling of a bitcast of a vector type to int, which may be done with a
10268 // vector store followed by an integer load. A vector that does not have
10269 // elements that are byte-sized must therefore be stored as an integer
10270 // built out of the extracted vector elements.
10271 if (!MemSclVT.isByteSized()) {
10272 unsigned NumBits = StVT.getSizeInBits();
10273 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
10274
10275 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
10276
10277 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10278 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
10279 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
10280 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
10281 unsigned ShiftIntoIdx =
10282 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10283 SDValue ShiftAmount =
10284 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
10285 SDValue ShiftedElt =
10286 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
10287 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
10288 }
10289
10290 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
10291 ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
10292 ST->getAAInfo());
10293 }
10294
10295 // Store Stride in bytes
10296 unsigned Stride = MemSclVT.getSizeInBits() / 8;
10297 assert(Stride && "Zero stride!");
10298 // Extract each of the elements from the original vector and save them into
10299 // memory individually.
10301 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10302 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
10303
10304 SDValue Ptr =
10305 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
10306
10307 // This scalar TruncStore may be illegal, but we legalize it later.
10308 SDValue Store = DAG.getTruncStore(
10309 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
10310 MemSclVT, ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
10311 ST->getAAInfo());
10312
10313 Stores.push_back(Store);
10314 }
10315
10316 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
10317}
10318
10319std::pair<SDValue, SDValue>
10321 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
10322 "unaligned indexed loads not implemented!");
10323 SDValue Chain = LD->getChain();
10324 SDValue Ptr = LD->getBasePtr();
10325 EVT VT = LD->getValueType(0);
10326 EVT LoadedVT = LD->getMemoryVT();
10327 SDLoc dl(LD);
10328 auto &MF = DAG.getMachineFunction();
10329
10330 if (VT.isFloatingPoint() || VT.isVector()) {
10331 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
10332 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
10333 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
10334 LoadedVT.isVector()) {
10335 // Scalarize the load and let the individual components be handled.
10336 return scalarizeVectorLoad(LD, DAG);
10337 }
10338
10339 // Expand to a (misaligned) integer load of the same size,
10340 // then bitconvert to floating point or vector.
10341 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
10342 LD->getMemOperand());
10343 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
10344 if (LoadedVT != VT)
10345 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
10346 ISD::ANY_EXTEND, dl, VT, Result);
10347
10348 return std::make_pair(Result, newLoad.getValue(1));
10349 }
10350
10351 // Copy the value to a (aligned) stack slot using (unaligned) integer
10352 // loads and stores, then do a (aligned) load from the stack slot.
10353 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
10354 unsigned LoadedBytes = LoadedVT.getStoreSize();
10355 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10356 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
10357
10358 // Make sure the stack slot is also aligned for the register type.
10359 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
10360 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
10362 SDValue StackPtr = StackBase;
10363 unsigned Offset = 0;
10364
10365 EVT PtrVT = Ptr.getValueType();
10366 EVT StackPtrVT = StackPtr.getValueType();
10367
10368 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10369 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10370
10371 // Do all but one copies using the full register width.
10372 for (unsigned i = 1; i < NumRegs; i++) {
10373 // Load one integer register's worth from the original location.
10374 SDValue Load = DAG.getLoad(
10375 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
10376 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
10377 // Follow the load with a store to the stack slot. Remember the store.
10378 Stores.push_back(DAG.getStore(
10379 Load.getValue(1), dl, Load, StackPtr,
10380 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
10381 // Increment the pointers.
10382 Offset += RegBytes;
10383
10384 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10385 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10386 }
10387
10388 // The last copy may be partial. Do an extending load.
10389 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
10390 8 * (LoadedBytes - Offset));
10391 SDValue Load = DAG.getExtLoad(
10392 ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
10393 LD->getPointerInfo().getWithOffset(Offset), MemVT, LD->getBaseAlign(),
10394 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10395 // Follow the load with a store to the stack slot. Remember the store.
10396 // On big-endian machines this requires a truncating store to ensure
10397 // that the bits end up in the right place.
10398 Stores.push_back(DAG.getTruncStore(
10399 Load.getValue(1), dl, Load, StackPtr,
10400 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
10401
10402 // The order of the stores doesn't matter - say it with a TokenFactor.
10403 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10404
10405 // Finally, perform the original load only redirected to the stack slot.
10406 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
10407 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
10408 LoadedVT);
10409
10410 // Callers expect a MERGE_VALUES node.
10411 return std::make_pair(Load, TF);
10412 }
10413
10414 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
10415 "Unaligned load of unsupported type.");
10416
10417 // Compute the new VT that is half the size of the old one. This is an
10418 // integer MVT.
10419 unsigned NumBits = LoadedVT.getSizeInBits();
10420 EVT NewLoadedVT;
10421 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
10422 NumBits >>= 1;
10423
10424 Align Alignment = LD->getBaseAlign();
10425 unsigned IncrementSize = NumBits / 8;
10426 ISD::LoadExtType HiExtType = LD->getExtensionType();
10427
10428 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10429 if (HiExtType == ISD::NON_EXTLOAD)
10430 HiExtType = ISD::ZEXTLOAD;
10431
10432 // Load the value in two parts
10433 SDValue Lo, Hi;
10434 if (DAG.getDataLayout().isLittleEndian()) {
10435 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10436 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10437 LD->getAAInfo());
10438
10439 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10440 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
10441 LD->getPointerInfo().getWithOffset(IncrementSize),
10442 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10443 LD->getAAInfo());
10444 } else {
10445 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10446 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10447 LD->getAAInfo());
10448
10449 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10450 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
10451 LD->getPointerInfo().getWithOffset(IncrementSize),
10452 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10453 LD->getAAInfo());
10454 }
10455
10456 // aggregate the two parts
10457 SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
10458 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
10459 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
10460
10461 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
10462 Hi.getValue(1));
10463
10464 return std::make_pair(Result, TF);
10465}
10466
10468 SelectionDAG &DAG) const {
10469 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
10470 "unaligned indexed stores not implemented!");
10471 SDValue Chain = ST->getChain();
10472 SDValue Ptr = ST->getBasePtr();
10473 SDValue Val = ST->getValue();
10474 EVT VT = Val.getValueType();
10475 Align Alignment = ST->getBaseAlign();
10476 auto &MF = DAG.getMachineFunction();
10477 EVT StoreMemVT = ST->getMemoryVT();
10478
10479 SDLoc dl(ST);
10480 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
10481 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
10482 if (isTypeLegal(intVT)) {
10483 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
10484 StoreMemVT.isVector()) {
10485 // Scalarize the store and let the individual components be handled.
10486 SDValue Result = scalarizeVectorStore(ST, DAG);
10487 return Result;
10488 }
10489 // Expand to a bitconvert of the value to the integer type of the
10490 // same size, then a (misaligned) int store.
10491 // FIXME: Does not handle truncating floating point stores!
10492 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
10493 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
10494 Alignment, ST->getMemOperand()->getFlags());
10495 return Result;
10496 }
10497 // Do a (aligned) store to a stack slot, then copy from the stack slot
10498 // to the final destination using (unaligned) integer loads and stores.
10499 MVT RegVT = getRegisterType(
10500 *DAG.getContext(),
10501 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
10502 EVT PtrVT = Ptr.getValueType();
10503 unsigned StoredBytes = StoreMemVT.getStoreSize();
10504 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10505 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
10506
10507 // Make sure the stack slot is also aligned for the register type.
10508 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
10509 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
10510
10511 // Perform the original store, only redirected to the stack slot.
10512 SDValue Store = DAG.getTruncStore(
10513 Chain, dl, Val, StackPtr,
10514 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
10515
10516 EVT StackPtrVT = StackPtr.getValueType();
10517
10518 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10519 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10521 unsigned Offset = 0;
10522
10523 // Do all but one copies using the full register width.
10524 for (unsigned i = 1; i < NumRegs; i++) {
10525 // Load one integer register's worth from the stack slot.
10526 SDValue Load = DAG.getLoad(
10527 RegVT, dl, Store, StackPtr,
10528 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
10529 // Store it to the final location. Remember the store.
10530 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
10531 ST->getPointerInfo().getWithOffset(Offset),
10532 ST->getBaseAlign(),
10533 ST->getMemOperand()->getFlags()));
10534 // Increment the pointers.
10535 Offset += RegBytes;
10536 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10537 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10538 }
10539
10540 // The last store may be partial. Do a truncating store. On big-endian
10541 // machines this requires an extending load from the stack slot to ensure
10542 // that the bits are in the right place.
10543 EVT LoadMemVT =
10544 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
10545
10546 // Load from the stack slot.
10547 SDValue Load = DAG.getExtLoad(
10548 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
10549 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
10550
10551 Stores.push_back(DAG.getTruncStore(
10552 Load.getValue(1), dl, Load, Ptr,
10553 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
10554 ST->getBaseAlign(), ST->getMemOperand()->getFlags(), ST->getAAInfo()));
10555 // The order of the stores doesn't matter - say it with a TokenFactor.
10556 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10557 return Result;
10558 }
10559
10560 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10561 "Unaligned store of unknown type.");
10562 // Get the half-size VT
10563 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
10564 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10565 unsigned IncrementSize = NumBits / 8;
10566
10567 // Divide the stored value in two parts.
10568 SDValue ShiftAmount =
10569 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
10570 SDValue Lo = Val;
10571 // If Val is a constant, replace the upper bits with 0. The SRL will constant
10572 // fold and not use the upper bits. A smaller constant may be easier to
10573 // materialize.
10574 if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
10575 Lo = DAG.getNode(
10576 ISD::AND, dl, VT, Lo,
10577 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
10578 VT));
10579 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
10580
10581 // Store the two parts
10582 SDValue Store1, Store2;
10583 Store1 = DAG.getTruncStore(Chain, dl,
10584 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10585 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
10586 ST->getMemOperand()->getFlags());
10587
10588 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10589 Store2 = DAG.getTruncStore(
10590 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10591 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
10592 ST->getMemOperand()->getFlags(), ST->getAAInfo());
10593
10594 SDValue Result =
10595 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
10596 return Result;
10597}
10598
10599SDValue
10601 const SDLoc &DL, EVT DataVT,
10602 SelectionDAG &DAG,
10603 bool IsCompressedMemory) const {
10605 EVT AddrVT = Addr.getValueType();
10606 EVT MaskVT = Mask.getValueType();
10607 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10608 "Incompatible types of Data and Mask");
10609 if (IsCompressedMemory) {
10610 if (DataVT.isScalableVector())
10612 "Cannot currently handle compressed memory with scalable vectors");
10613 // Incrementing the pointer according to number of '1's in the mask.
10614 EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
10615 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
10616 if (MaskIntVT.getSizeInBits() < 32) {
10617 MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
10618 MaskIntVT = MVT::i32;
10619 }
10620
10621 // Count '1's with POPCNT.
10622 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
10623 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
10624 // Scale is an element size in bytes.
10625 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
10626 AddrVT);
10627 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
10628 } else if (DataVT.isScalableVector()) {
10629 Increment = DAG.getVScale(DL, AddrVT,
10630 APInt(AddrVT.getFixedSizeInBits(),
10631 DataVT.getStoreSize().getKnownMinValue()));
10632 } else
10633 Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
10634
10635 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
10636}
10637
10639 EVT VecVT, const SDLoc &dl,
10640 ElementCount SubEC) {
10641 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10642 "Cannot index a scalable vector within a fixed-width vector");
10643
10644 unsigned NElts = VecVT.getVectorMinNumElements();
10645 unsigned NumSubElts = SubEC.getKnownMinValue();
10646 EVT IdxVT = Idx.getValueType();
10647
10648 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10649 // If this is a constant index and we know the value plus the number of the
10650 // elements in the subvector minus one is less than the minimum number of
10651 // elements then it's safe to return Idx.
10652 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
10653 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
10654 return Idx;
10655 SDValue VS =
10656 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
10657 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
10658 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
10659 DAG.getConstant(NumSubElts, dl, IdxVT));
10660 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
10661 }
10662 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
10663 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
10664 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
10665 DAG.getConstant(Imm, dl, IdxVT));
10666 }
10667 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
10668 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
10669 DAG.getConstant(MaxIndex, dl, IdxVT));
10670}
10671
10672SDValue
10674 EVT VecVT, SDValue Index,
10675 const SDNodeFlags PtrArithFlags) const {
10677 DAG, VecPtr, VecVT,
10679 Index, PtrArithFlags);
10680}
10681
10682SDValue
10684 EVT VecVT, EVT SubVecVT, SDValue Index,
10685 const SDNodeFlags PtrArithFlags) const {
10686 SDLoc dl(Index);
10687 // Make sure the index type is big enough to compute in.
10688 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
10689
10690 EVT EltVT = VecVT.getVectorElementType();
10691
10692 // Calculate the element offset and add it to the pointer.
10693 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
10694 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
10695 "Converting bits to bytes lost precision");
10696 assert(SubVecVT.getVectorElementType() == EltVT &&
10697 "Sub-vector must be a vector with matching element type");
10698 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
10699 SubVecVT.getVectorElementCount());
10700
10701 EVT IdxVT = Index.getValueType();
10702 if (SubVecVT.isScalableVector())
10703 Index =
10704 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10705 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
10706
10707 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10708 DAG.getConstant(EltSize, dl, IdxVT));
10709 return DAG.getMemBasePlusOffset(VecPtr, Index, dl, PtrArithFlags);
10710}
10711
10712//===----------------------------------------------------------------------===//
10713// Implementation of Emulated TLS Model
10714//===----------------------------------------------------------------------===//
10715
10717 SelectionDAG &DAG) const {
10718 // Access to address of TLS varialbe xyz is lowered to a function call:
10719 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10720 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10721 PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
10722 SDLoc dl(GA);
10723
10724 ArgListTy Args;
10725 const GlobalValue *GV =
10727 SmallString<32> NameString("__emutls_v.");
10728 NameString += GV->getName();
10729 StringRef EmuTlsVarName(NameString);
10730 const GlobalVariable *EmuTlsVar =
10731 GV->getParent()->getNamedGlobal(EmuTlsVarName);
10732 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10733 Args.emplace_back(DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT), VoidPtrType);
10734
10735 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
10736
10738 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10739 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
10740 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10741
10742 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10743 // At last for X86 targets, maybe good for other targets too?
10745 MFI.setAdjustsStack(true); // Is this only for X86 target?
10746 MFI.setHasCalls(true);
10747
10748 assert((GA->getOffset() == 0) &&
10749 "Emulated TLS must have zero offset in GlobalAddressSDNode");
10750 return CallResult.first;
10751}
10752
10754 SelectionDAG &DAG) const {
10755 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10756 if (!isCtlzFast())
10757 return SDValue();
10758 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10759 SDLoc dl(Op);
10760 if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
10761 EVT VT = Op.getOperand(0).getValueType();
10762 SDValue Zext = Op.getOperand(0);
10763 if (VT.bitsLT(MVT::i32)) {
10764 VT = MVT::i32;
10765 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
10766 }
10767 unsigned Log2b = Log2_32(VT.getSizeInBits());
10768 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
10769 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
10770 DAG.getConstant(Log2b, dl, MVT::i32));
10771 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
10772 }
10773 return SDValue();
10774}
10775
10777 SDValue Op0 = Node->getOperand(0);
10778 SDValue Op1 = Node->getOperand(1);
10779 EVT VT = Op0.getValueType();
10780 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10781 unsigned Opcode = Node->getOpcode();
10782 SDLoc DL(Node);
10783
10784 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10785 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
10787 Op0 = DAG.getFreeze(Op0);
10788 SDValue Zero = DAG.getConstant(0, DL, VT);
10789 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10790 DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
10791 }
10792
10793 // umin(x,y) -> sub(x,usubsat(x,y))
10794 // TODO: Missing freeze(Op0)?
10795 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
10797 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10798 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
10799 }
10800
10801 // umax(x,y) -> add(x,usubsat(y,x))
10802 // TODO: Missing freeze(Op0)?
10803 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
10805 return DAG.getNode(ISD::ADD, DL, VT, Op0,
10806 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
10807 }
10808
10809 // FIXME: Should really try to split the vector in case it's legal on a
10810 // subvector.
10812 return DAG.UnrollVectorOp(Node);
10813
10814 // Attempt to find an existing SETCC node that we can reuse.
10815 // TODO: Do we need a generic doesSETCCNodeExist?
10816 // TODO: Missing freeze(Op0)/freeze(Op1)?
10817 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10818 ISD::CondCode PrefCommuteCC,
10819 ISD::CondCode AltCommuteCC) {
10820 SDVTList BoolVTList = DAG.getVTList(BoolVT);
10821 for (ISD::CondCode CC : {PrefCC, AltCC}) {
10822 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10823 {Op0, Op1, DAG.getCondCode(CC)})) {
10824 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10825 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10826 }
10827 }
10828 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10829 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10830 {Op0, Op1, DAG.getCondCode(CC)})) {
10831 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10832 return DAG.getSelect(DL, VT, Cond, Op1, Op0);
10833 }
10834 }
10835 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
10836 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10837 };
10838
10839 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10840 // -> Y = (A < B) ? B : A
10841 // -> Y = (A >= B) ? A : B
10842 // -> Y = (A <= B) ? B : A
10843 switch (Opcode) {
10844 case ISD::SMAX:
10845 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10846 case ISD::SMIN:
10847 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10848 case ISD::UMAX:
10849 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10850 case ISD::UMIN:
10851 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10852 }
10853
10854 llvm_unreachable("How did we get here?");
10855}
10856
10858 unsigned Opcode = Node->getOpcode();
10859 SDValue LHS = Node->getOperand(0);
10860 SDValue RHS = Node->getOperand(1);
10861 EVT VT = LHS.getValueType();
10862 SDLoc dl(Node);
10863
10864 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10865 assert(VT.isInteger() && "Expected operands to be integers");
10866
10867 // usub.sat(a, b) -> umax(a, b) - b
10868 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
10869 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
10870 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
10871 }
10872
10873 // uadd.sat(a, b) -> umin(a, ~b) + b
10874 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
10875 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
10876 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
10877 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
10878 }
10879
10880 unsigned OverflowOp;
10881 switch (Opcode) {
10882 case ISD::SADDSAT:
10883 OverflowOp = ISD::SADDO;
10884 break;
10885 case ISD::UADDSAT:
10886 OverflowOp = ISD::UADDO;
10887 break;
10888 case ISD::SSUBSAT:
10889 OverflowOp = ISD::SSUBO;
10890 break;
10891 case ISD::USUBSAT:
10892 OverflowOp = ISD::USUBO;
10893 break;
10894 default:
10895 llvm_unreachable("Expected method to receive signed or unsigned saturation "
10896 "addition or subtraction node.");
10897 }
10898
10899 // FIXME: Should really try to split the vector in case it's legal on a
10900 // subvector.
10902 return DAG.UnrollVectorOp(Node);
10903
10904 unsigned BitWidth = LHS.getScalarValueSizeInBits();
10905 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10906 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10907 SDValue SumDiff = Result.getValue(0);
10908 SDValue Overflow = Result.getValue(1);
10909 SDValue Zero = DAG.getConstant(0, dl, VT);
10910 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
10911
10912 if (Opcode == ISD::UADDSAT) {
10914 // (LHS + RHS) | OverflowMask
10915 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10916 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
10917 }
10918 // Overflow ? 0xffff.... : (LHS + RHS)
10919 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
10920 }
10921
10922 if (Opcode == ISD::USUBSAT) {
10924 // (LHS - RHS) & ~OverflowMask
10925 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10926 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
10927 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
10928 }
10929 // Overflow ? 0 : (LHS - RHS)
10930 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
10931 }
10932
10933 if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10936
10937 KnownBits KnownLHS = DAG.computeKnownBits(LHS);
10938 KnownBits KnownRHS = DAG.computeKnownBits(RHS);
10939
10940 // If either of the operand signs are known, then they are guaranteed to
10941 // only saturate in one direction. If non-negative they will saturate
10942 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10943 //
10944 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10945 // sign of 'y' has to be flipped.
10946
10947 bool LHSIsNonNegative = KnownLHS.isNonNegative();
10948 bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10949 : KnownRHS.isNegative();
10950 if (LHSIsNonNegative || RHSIsNonNegative) {
10951 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10952 return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
10953 }
10954
10955 bool LHSIsNegative = KnownLHS.isNegative();
10956 bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10957 : KnownRHS.isNonNegative();
10958 if (LHSIsNegative || RHSIsNegative) {
10959 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10960 return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
10961 }
10962 }
10963
10964 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10966 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10967 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
10968 DAG.getConstant(BitWidth - 1, dl, VT));
10969 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
10970 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
10971}
10972
10974 unsigned Opcode = Node->getOpcode();
10975 SDValue LHS = Node->getOperand(0);
10976 SDValue RHS = Node->getOperand(1);
10977 EVT VT = LHS.getValueType();
10978 EVT ResVT = Node->getValueType(0);
10979 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10980 SDLoc dl(Node);
10981
10982 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
10983 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
10984 SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
10985 SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
10986
10987 // We can't perform arithmetic on i1 values. Extending them would
10988 // probably result in worse codegen, so let's just use two selects instead.
10989 // Some targets are also just better off using selects rather than subtraction
10990 // because one of the conditions can be merged with one of the selects.
10991 // And finally, if we don't know the contents of high bits of a boolean value
10992 // we can't perform any arithmetic either.
10994 BoolVT.getScalarSizeInBits() == 1 ||
10996 SDValue SelectZeroOrOne =
10997 DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
10998 DAG.getConstant(0, dl, ResVT));
10999 return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
11000 SelectZeroOrOne);
11001 }
11002
11004 std::swap(IsGT, IsLT);
11005 return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
11006 ResVT);
11007}
11008
11010 unsigned Opcode = Node->getOpcode();
11011 bool IsSigned = Opcode == ISD::SSHLSAT;
11012 SDValue LHS = Node->getOperand(0);
11013 SDValue RHS = Node->getOperand(1);
11014 EVT VT = LHS.getValueType();
11015 SDLoc dl(Node);
11016
11017 assert((Node->getOpcode() == ISD::SSHLSAT ||
11018 Node->getOpcode() == ISD::USHLSAT) &&
11019 "Expected a SHLSAT opcode");
11020 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
11021 assert(VT.isInteger() && "Expected operands to be integers");
11022
11024 return DAG.UnrollVectorOp(Node);
11025
11026 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
11027
11028 unsigned BW = VT.getScalarSizeInBits();
11029 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11030 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
11031 SDValue Orig =
11032 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
11033
11034 SDValue SatVal;
11035 if (IsSigned) {
11036 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
11037 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
11038 SDValue Cond =
11039 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
11040 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
11041 } else {
11042 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
11043 }
11044 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
11045 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
11046}
11047
11049 bool Signed, SDValue &Lo, SDValue &Hi,
11050 SDValue LHS, SDValue RHS,
11051 SDValue HiLHS, SDValue HiRHS) const {
11052 EVT VT = LHS.getValueType();
11053 assert(RHS.getValueType() == VT && "Mismatching operand types");
11054
11055 assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));
11056 assert((!Signed || !HiLHS) &&
11057 "Signed flag should only be set when HiLHS and RiRHS are null");
11058
11059 // We'll expand the multiplication by brute force because we have no other
11060 // options. This is a trivially-generalized version of the code from
11061 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
11062 // 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
11063 // sign bits while calculating the Hi half.
11064 unsigned Bits = VT.getSizeInBits();
11065 unsigned HalfBits = Bits / 2;
11066 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
11067 SDValue LL = DAG.getNode(ISD::AND, dl, VT, LHS, Mask);
11068 SDValue RL = DAG.getNode(ISD::AND, dl, VT, RHS, Mask);
11069
11070 SDValue T = DAG.getNode(ISD::MUL, dl, VT, LL, RL);
11071 SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
11072
11073 SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
11074 // This is always an unsigned shift.
11075 SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
11076
11077 unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
11078 SDValue LH = DAG.getNode(ShiftOpc, dl, VT, LHS, Shift);
11079 SDValue RH = DAG.getNode(ShiftOpc, dl, VT, RHS, Shift);
11080
11081 SDValue U =
11082 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RL), TH);
11083 SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
11084 SDValue UH = DAG.getNode(ShiftOpc, dl, VT, U, Shift);
11085
11086 SDValue V =
11087 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LL, RH), UL);
11088 SDValue VH = DAG.getNode(ShiftOpc, dl, VT, V, Shift);
11089
11090 Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
11091 DAG.getNode(ISD::SHL, dl, VT, V, Shift));
11092
11093 Hi = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RH),
11094 DAG.getNode(ISD::ADD, dl, VT, UH, VH));
11095
11096 // If HiLHS and HiRHS are set, multiply them by the opposite low part and add
11097 // the products to Hi.
11098 if (HiLHS) {
11099 Hi = DAG.getNode(ISD::ADD, dl, VT, Hi,
11100 DAG.getNode(ISD::ADD, dl, VT,
11101 DAG.getNode(ISD::MUL, dl, VT, HiRHS, LHS),
11102 DAG.getNode(ISD::MUL, dl, VT, RHS, HiLHS)));
11103 }
11104}
11105
11107 bool Signed, const SDValue LHS,
11108 const SDValue RHS, SDValue &Lo,
11109 SDValue &Hi) const {
11110 EVT VT = LHS.getValueType();
11111 assert(RHS.getValueType() == VT && "Mismatching operand types");
11112 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
11113 // We can fall back to a libcall with an illegal type for the MUL if we
11114 // have a libcall big enough.
11115 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
11116 if (WideVT == MVT::i16)
11117 LC = RTLIB::MUL_I16;
11118 else if (WideVT == MVT::i32)
11119 LC = RTLIB::MUL_I32;
11120 else if (WideVT == MVT::i64)
11121 LC = RTLIB::MUL_I64;
11122 else if (WideVT == MVT::i128)
11123 LC = RTLIB::MUL_I128;
11124
11125 if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
11126 forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS);
11127 return;
11128 }
11129
11130 SDValue HiLHS, HiRHS;
11131 if (Signed) {
11132 // The high part is obtained by SRA'ing all but one of the bits of low
11133 // part.
11134 unsigned LoSize = VT.getFixedSizeInBits();
11135 SDValue Shift = DAG.getShiftAmountConstant(LoSize - 1, VT, dl);
11136 HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, Shift);
11137 HiRHS = DAG.getNode(ISD::SRA, dl, VT, RHS, Shift);
11138 } else {
11139 HiLHS = DAG.getConstant(0, dl, VT);
11140 HiRHS = DAG.getConstant(0, dl, VT);
11141 }
11142
11143 // Attempt a libcall.
11144 SDValue Ret;
11146 CallOptions.setIsSigned(Signed);
11147 CallOptions.setIsPostTypeLegalization(true);
11149 // Halves of WideVT are packed into registers in different order
11150 // depending on platform endianness. This is usually handled by
11151 // the C calling convention, but we can't defer to it in
11152 // the legalizer.
11153 SDValue Args[] = {LHS, HiLHS, RHS, HiRHS};
11154 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
11155 } else {
11156 SDValue Args[] = {HiLHS, LHS, HiRHS, RHS};
11157 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
11158 }
11159 assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
11160 "Ret value is a collection of constituent nodes holding result.");
11161 if (DAG.getDataLayout().isLittleEndian()) {
11162 // Same as above.
11163 Lo = Ret.getOperand(0);
11164 Hi = Ret.getOperand(1);
11165 } else {
11166 Lo = Ret.getOperand(1);
11167 Hi = Ret.getOperand(0);
11168 }
11169}
11170
11171SDValue
11173 assert((Node->getOpcode() == ISD::SMULFIX ||
11174 Node->getOpcode() == ISD::UMULFIX ||
11175 Node->getOpcode() == ISD::SMULFIXSAT ||
11176 Node->getOpcode() == ISD::UMULFIXSAT) &&
11177 "Expected a fixed point multiplication opcode");
11178
11179 SDLoc dl(Node);
11180 SDValue LHS = Node->getOperand(0);
11181 SDValue RHS = Node->getOperand(1);
11182 EVT VT = LHS.getValueType();
11183 unsigned Scale = Node->getConstantOperandVal(2);
11184 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
11185 Node->getOpcode() == ISD::UMULFIXSAT);
11186 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
11187 Node->getOpcode() == ISD::SMULFIXSAT);
11188 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11189 unsigned VTSize = VT.getScalarSizeInBits();
11190
11191 if (!Scale) {
11192 // [us]mul.fix(a, b, 0) -> mul(a, b)
11193 if (!Saturating) {
11195 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11196 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
11197 SDValue Result =
11198 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11199 SDValue Product = Result.getValue(0);
11200 SDValue Overflow = Result.getValue(1);
11201 SDValue Zero = DAG.getConstant(0, dl, VT);
11202
11203 APInt MinVal = APInt::getSignedMinValue(VTSize);
11204 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
11205 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11206 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11207 // Xor the inputs, if resulting sign bit is 0 the product will be
11208 // positive, else negative.
11209 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
11210 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
11211 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
11212 return DAG.getSelect(dl, VT, Overflow, Result, Product);
11213 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
11214 SDValue Result =
11215 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11216 SDValue Product = Result.getValue(0);
11217 SDValue Overflow = Result.getValue(1);
11218
11219 APInt MaxVal = APInt::getMaxValue(VTSize);
11220 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11221 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
11222 }
11223 }
11224
11225 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
11226 "Expected scale to be less than the number of bits if signed or at "
11227 "most the number of bits if unsigned.");
11228 assert(LHS.getValueType() == RHS.getValueType() &&
11229 "Expected both operands to be the same type");
11230
11231 // Get the upper and lower bits of the result.
11232 SDValue Lo, Hi;
11233 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
11234 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
11235 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
11236 if (VT.isVector())
11237 WideVT =
11239 if (isOperationLegalOrCustom(LoHiOp, VT)) {
11240 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
11241 Lo = Result.getValue(0);
11242 Hi = Result.getValue(1);
11243 } else if (isOperationLegalOrCustom(HiOp, VT)) {
11244 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11245 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
11246 } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
11247 // Try for a multiplication using a wider type.
11248 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11249 SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
11250 SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
11251 SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
11252 Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
11253 SDValue Shifted =
11254 DAG.getNode(ISD::SRA, dl, WideVT, Res,
11255 DAG.getShiftAmountConstant(VTSize, WideVT, dl));
11256 Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
11257 } else if (VT.isVector()) {
11258 return SDValue();
11259 } else {
11260 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
11261 }
11262
11263 if (Scale == VTSize)
11264 // Result is just the top half since we'd be shifting by the width of the
11265 // operand. Overflow impossible so this works for both UMULFIX and
11266 // UMULFIXSAT.
11267 return Hi;
11268
11269 // The result will need to be shifted right by the scale since both operands
11270 // are scaled. The result is given to us in 2 halves, so we only want part of
11271 // both in the result.
11272 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
11273 DAG.getShiftAmountConstant(Scale, VT, dl));
11274 if (!Saturating)
11275 return Result;
11276
11277 if (!Signed) {
11278 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
11279 // widened multiplication) aren't all zeroes.
11280
11281 // Saturate to max if ((Hi >> Scale) != 0),
11282 // which is the same as if (Hi > ((1 << Scale) - 1))
11283 APInt MaxVal = APInt::getMaxValue(VTSize);
11284 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
11285 dl, VT);
11286 Result = DAG.getSelectCC(dl, Hi, LowMask,
11287 DAG.getConstant(MaxVal, dl, VT), Result,
11288 ISD::SETUGT);
11289
11290 return Result;
11291 }
11292
11293 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
11294 // widened multiplication) aren't all ones or all zeroes.
11295
11296 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
11297 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
11298
11299 if (Scale == 0) {
11300 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
11301 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
11302 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
11303 // Saturated to SatMin if wide product is negative, and SatMax if wide
11304 // product is positive ...
11305 SDValue Zero = DAG.getConstant(0, dl, VT);
11306 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
11307 ISD::SETLT);
11308 // ... but only if we overflowed.
11309 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
11310 }
11311
11312 // We handled Scale==0 above so all the bits to examine is in Hi.
11313
11314 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
11315 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
11316 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
11317 dl, VT);
11318 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
11319 // Saturate to min if (Hi >> (Scale - 1)) < -1),
11320 // which is the same as if (HI < (-1 << (Scale - 1))
11321 SDValue HighMask =
11322 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
11323 dl, VT);
11324 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
11325 return Result;
11326}
11327
11328SDValue
11330 SDValue LHS, SDValue RHS,
11331 unsigned Scale, SelectionDAG &DAG) const {
11332 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
11333 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
11334 "Expected a fixed point division opcode");
11335
11336 EVT VT = LHS.getValueType();
11337 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
11338 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
11339 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11340
11341 // If there is enough room in the type to upscale the LHS or downscale the
11342 // RHS before the division, we can perform it in this type without having to
11343 // resize. For signed operations, the LHS headroom is the number of
11344 // redundant sign bits, and for unsigned ones it is the number of zeroes.
11345 // The headroom for the RHS is the number of trailing zeroes.
11346 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
11348 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11349
11350 // For signed saturating operations, we need to be able to detect true integer
11351 // division overflow; that is, when you have MIN / -EPS. However, this
11352 // is undefined behavior and if we emit divisions that could take such
11353 // values it may cause undesired behavior (arithmetic exceptions on x86, for
11354 // example).
11355 // Avoid this by requiring an extra bit so that we never get this case.
11356 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11357 // signed saturating division, we need to emit a whopping 32-bit division.
11358 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
11359 return SDValue();
11360
11361 unsigned LHSShift = std::min(LHSLead, Scale);
11362 unsigned RHSShift = Scale - LHSShift;
11363
11364 // At this point, we know that if we shift the LHS up by LHSShift and the
11365 // RHS down by RHSShift, we can emit a regular division with a final scaling
11366 // factor of Scale.
11367
11368 if (LHSShift)
11369 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
11370 DAG.getShiftAmountConstant(LHSShift, VT, dl));
11371 if (RHSShift)
11372 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
11373 DAG.getShiftAmountConstant(RHSShift, VT, dl));
11374
11375 SDValue Quot;
11376 if (Signed) {
11377 // For signed operations, if the resulting quotient is negative and the
11378 // remainder is nonzero, subtract 1 from the quotient to round towards
11379 // negative infinity.
11380 SDValue Rem;
11381 // FIXME: Ideally we would always produce an SDIVREM here, but if the
11382 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
11383 // we couldn't just form a libcall, but the type legalizer doesn't do it.
11384 if (isTypeLegal(VT) &&
11386 Quot = DAG.getNode(ISD::SDIVREM, dl,
11387 DAG.getVTList(VT, VT),
11388 LHS, RHS);
11389 Rem = Quot.getValue(1);
11390 Quot = Quot.getValue(0);
11391 } else {
11392 Quot = DAG.getNode(ISD::SDIV, dl, VT,
11393 LHS, RHS);
11394 Rem = DAG.getNode(ISD::SREM, dl, VT,
11395 LHS, RHS);
11396 }
11397 SDValue Zero = DAG.getConstant(0, dl, VT);
11398 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
11399 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
11400 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
11401 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
11402 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
11403 DAG.getConstant(1, dl, VT));
11404 Quot = DAG.getSelect(dl, VT,
11405 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
11406 Sub1, Quot);
11407 } else
11408 Quot = DAG.getNode(ISD::UDIV, dl, VT,
11409 LHS, RHS);
11410
11411 return Quot;
11412}
11413
11415 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11416 SDLoc dl(Node);
11417 SDValue LHS = Node->getOperand(0);
11418 SDValue RHS = Node->getOperand(1);
11419 bool IsAdd = Node->getOpcode() == ISD::UADDO;
11420
11421 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
11422 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
11423 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
11424 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
11425 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
11426 { LHS, RHS, CarryIn });
11427 Result = SDValue(NodeCarry.getNode(), 0);
11428 Overflow = SDValue(NodeCarry.getNode(), 1);
11429 return;
11430 }
11431
11432 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11433 LHS.getValueType(), LHS, RHS);
11434
11435 EVT ResultType = Node->getValueType(1);
11436 EVT SetCCType = getSetCCResultType(
11437 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11438 SDValue SetCC;
11439 if (IsAdd && isOneConstant(RHS)) {
11440 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
11441 // the live range of X. We assume comparing with 0 is cheap.
11442 // The general case (X + C) < C is not necessarily beneficial. Although we
11443 // reduce the live range of X, we may introduce the materialization of
11444 // constant C.
11445 SetCC =
11446 DAG.getSetCC(dl, SetCCType, Result,
11447 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
11448 } else if (IsAdd && isAllOnesConstant(RHS)) {
11449 // Special case: uaddo X, -1 overflows if X != 0.
11450 SetCC =
11451 DAG.getSetCC(dl, SetCCType, LHS,
11452 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
11453 } else {
11454 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
11455 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
11456 }
11457 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11458}
11459
11461 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11462 SDLoc dl(Node);
11463 SDValue LHS = Node->getOperand(0);
11464 SDValue RHS = Node->getOperand(1);
11465 bool IsAdd = Node->getOpcode() == ISD::SADDO;
11466
11467 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11468 LHS.getValueType(), LHS, RHS);
11469
11470 EVT ResultType = Node->getValueType(1);
11471 EVT OType = getSetCCResultType(
11472 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11473
11474 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
11475 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
11476 if (isOperationLegal(OpcSat, LHS.getValueType())) {
11477 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
11478 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
11479 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11480 return;
11481 }
11482
11483 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
11484
11485 // For an addition, the result should be less than one of the operands (LHS)
11486 // if and only if the other operand (RHS) is negative, otherwise there will
11487 // be overflow.
11488 // For a subtraction, the result should be less than one of the operands
11489 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11490 // otherwise there will be overflow.
11491 SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
11492 SDValue ConditionRHS =
11493 DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
11494
11495 Overflow = DAG.getBoolExtOrTrunc(
11496 DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
11497 ResultType, ResultType);
11498}
11499
11501 SDValue &Overflow, SelectionDAG &DAG) const {
11502 SDLoc dl(Node);
11503 EVT VT = Node->getValueType(0);
11504 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11505 SDValue LHS = Node->getOperand(0);
11506 SDValue RHS = Node->getOperand(1);
11507 bool isSigned = Node->getOpcode() == ISD::SMULO;
11508
11509 // For power-of-two multiplications we can use a simpler shift expansion.
11510 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
11511 const APInt &C = RHSC->getAPIntValue();
11512 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
11513 if (C.isPowerOf2()) {
11514 // smulo(x, signed_min) is same as umulo(x, signed_min).
11515 bool UseArithShift = isSigned && !C.isMinSignedValue();
11516 SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
11517 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
11518 Overflow = DAG.getSetCC(dl, SetCCVT,
11519 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
11520 dl, VT, Result, ShiftAmt),
11521 LHS, ISD::SETNE);
11522 return true;
11523 }
11524 }
11525
11526 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
11527 if (VT.isVector())
11528 WideVT =
11530
11531 SDValue BottomHalf;
11532 SDValue TopHalf;
11533 static const unsigned Ops[2][3] =
11536 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
11537 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11538 TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
11539 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
11540 BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
11541 RHS);
11542 TopHalf = BottomHalf.getValue(1);
11543 } else if (isTypeLegal(WideVT)) {
11544 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
11545 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
11546 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
11547 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
11548 SDValue ShiftAmt =
11549 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
11550 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
11551 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
11552 } else {
11553 if (VT.isVector())
11554 return false;
11555
11556 forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
11557 }
11558
11559 Result = BottomHalf;
11560 if (isSigned) {
11561 SDValue ShiftAmt = DAG.getShiftAmountConstant(
11562 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
11563 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
11564 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
11565 } else {
11566 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
11567 DAG.getConstant(0, dl, VT), ISD::SETNE);
11568 }
11569
11570 // Truncate the result if SetCC returns a larger type than needed.
11571 EVT RType = Node->getValueType(1);
11572 if (RType.bitsLT(Overflow.getValueType()))
11573 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
11574
11575 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11576 "Unexpected result type for S/UMULO legalization");
11577 return true;
11578}
11579
11581 SDLoc dl(Node);
11582 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11583 SDValue Op = Node->getOperand(0);
11584 EVT VT = Op.getValueType();
11585
11586 // Try to use a shuffle reduction for power of two vectors.
11587 if (VT.isPow2VectorType()) {
11589 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
11590 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
11591 break;
11592
11593 SDValue Lo, Hi;
11594 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
11595 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
11596 VT = HalfVT;
11597
11598 // Stop if splitting is enough to make the reduction legal.
11599 if (isOperationLegalOrCustom(Node->getOpcode(), HalfVT))
11600 return DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Op,
11601 Node->getFlags());
11602 }
11603 }
11604
11605 if (VT.isScalableVector())
11607 "Expanding reductions for scalable vectors is undefined.");
11608
11609 EVT EltVT = VT.getVectorElementType();
11610 unsigned NumElts = VT.getVectorNumElements();
11611
11613 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
11614
11615 SDValue Res = Ops[0];
11616 for (unsigned i = 1; i < NumElts; i++)
11617 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
11618
11619 // Result type may be wider than element type.
11620 if (EltVT != Node->getValueType(0))
11621 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
11622 return Res;
11623}
11624
11626 SDLoc dl(Node);
11627 SDValue AccOp = Node->getOperand(0);
11628 SDValue VecOp = Node->getOperand(1);
11629 SDNodeFlags Flags = Node->getFlags();
11630
11631 EVT VT = VecOp.getValueType();
11632 EVT EltVT = VT.getVectorElementType();
11633
11634 if (VT.isScalableVector())
11636 "Expanding reductions for scalable vectors is undefined.");
11637
11638 unsigned NumElts = VT.getVectorNumElements();
11639
11641 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
11642
11643 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11644
11645 SDValue Res = AccOp;
11646 for (unsigned i = 0; i < NumElts; i++)
11647 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
11648
11649 return Res;
11650}
11651
11653 SelectionDAG &DAG) const {
11654 EVT VT = Node->getValueType(0);
11655 SDLoc dl(Node);
11656 bool isSigned = Node->getOpcode() == ISD::SREM;
11657 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
11658 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
11659 SDValue Dividend = Node->getOperand(0);
11660 SDValue Divisor = Node->getOperand(1);
11661 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
11662 SDVTList VTs = DAG.getVTList(VT, VT);
11663 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
11664 return true;
11665 }
11666 if (isOperationLegalOrCustom(DivOpc, VT)) {
11667 // X % Y -> X-X/Y*Y
11668 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
11669 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
11670 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
11671 return true;
11672 }
11673 return false;
11674}
11675
11677 SelectionDAG &DAG) const {
11678 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
11679 SDLoc dl(SDValue(Node, 0));
11680 SDValue Src = Node->getOperand(0);
11681
11682 // DstVT is the result type, while SatVT is the size to which we saturate
11683 EVT SrcVT = Src.getValueType();
11684 EVT DstVT = Node->getValueType(0);
11685
11686 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
11687 unsigned SatWidth = SatVT.getScalarSizeInBits();
11688 unsigned DstWidth = DstVT.getScalarSizeInBits();
11689 assert(SatWidth <= DstWidth &&
11690 "Expected saturation width smaller than result width");
11691
11692 // Determine minimum and maximum integer values and their corresponding
11693 // floating-point values.
11694 APInt MinInt, MaxInt;
11695 if (IsSigned) {
11696 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
11697 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
11698 } else {
11699 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
11700 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
11701 }
11702
11703 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11704 // libcall emission cannot handle this. Large result types will fail.
11705 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
11706 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
11707 SrcVT = Src.getValueType();
11708 }
11709
11710 const fltSemantics &Sem = SrcVT.getFltSemantics();
11711 APFloat MinFloat(Sem);
11712 APFloat MaxFloat(Sem);
11713
11714 APFloat::opStatus MinStatus =
11715 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
11716 APFloat::opStatus MaxStatus =
11717 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
11718 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
11719 !(MaxStatus & APFloat::opStatus::opInexact);
11720
11721 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
11722 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
11723
11724 // If the integer bounds are exactly representable as floats and min/max are
11725 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11726 // of comparisons and selects.
11727 bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
11728 isOperationLegal(ISD::FMAXNUM, SrcVT);
11729 if (AreExactFloatBounds && MinMaxLegal) {
11730 SDValue Clamped = Src;
11731
11732 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11733 Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
11734 // Clamp by MaxFloat from above. NaN cannot occur.
11735 Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
11736 // Convert clamped value to integer.
11737 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
11738 dl, DstVT, Clamped);
11739
11740 // In the unsigned case we're done, because we mapped NaN to MinFloat,
11741 // which will cast to zero.
11742 if (!IsSigned)
11743 return FpToInt;
11744
11745 // Otherwise, select 0 if Src is NaN.
11746 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11747 EVT SetCCVT =
11748 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11749 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11750 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
11751 }
11752
11753 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
11754 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
11755
11756 // Result of direct conversion. The assumption here is that the operation is
11757 // non-trapping and it's fine to apply it to an out-of-range value if we
11758 // select it away later.
11759 SDValue FpToInt =
11760 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
11761
11762 SDValue Select = FpToInt;
11763
11764 EVT SetCCVT =
11765 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11766
11767 // If Src ULT MinFloat, select MinInt. In particular, this also selects
11768 // MinInt if Src is NaN.
11769 SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
11770 Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
11771 // If Src OGT MaxFloat, select MaxInt.
11772 SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
11773 Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
11774
11775 // In the unsigned case we are done, because we mapped NaN to MinInt, which
11776 // is already zero.
11777 if (!IsSigned)
11778 return Select;
11779
11780 // Otherwise, select 0 if Src is NaN.
11781 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11782 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11783 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
11784}
11785
11787 const SDLoc &dl,
11788 SelectionDAG &DAG) const {
11789 EVT OperandVT = Op.getValueType();
11790 if (OperandVT.getScalarType() == ResultVT.getScalarType())
11791 return Op;
11792 EVT ResultIntVT = ResultVT.changeTypeToInteger();
11793 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11794 // can induce double-rounding which may alter the results. We can
11795 // correct for this using a trick explained in: Boldo, Sylvie, and
11796 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11797 // World Congress. 2005.
11798 SDValue Narrow = DAG.getFPExtendOrRound(Op, dl, ResultVT);
11799 SDValue NarrowAsWide = DAG.getFPExtendOrRound(Narrow, dl, OperandVT);
11800
11801 // We can keep the narrow value as-is if narrowing was exact (no
11802 // rounding error), the wide value was NaN (the narrow value is also
11803 // NaN and should be preserved) or if we rounded to the odd value.
11804 SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, Narrow);
11805 SDValue One = DAG.getConstant(1, dl, ResultIntVT);
11806 SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
11807 SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
11808 EVT ResultIntVTCCVT = getSetCCResultType(
11809 DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
11810 SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
11811 // The result is already odd so we don't need to do anything.
11812 SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
11813
11814 EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11815 Op.getValueType());
11816 // We keep results which are exact, odd or NaN.
11817 SDValue KeepNarrow =
11818 DAG.getSetCC(dl, WideSetCCVT, Op, NarrowAsWide, ISD::SETUEQ);
11819 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
11820 // We morally performed a round-down if AbsNarrow is smaller than
11821 // AbsWide.
11822 SDValue AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
11823 SDValue AbsNarrowAsWide = DAG.getNode(ISD::FABS, dl, OperandVT, NarrowAsWide);
11824 SDValue NarrowIsRd =
11825 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
11826 // If the narrow value is odd or exact, pick it.
11827 // Otherwise, narrow is even and corresponds to either the rounded-up
11828 // or rounded-down value. If narrow is the rounded-down value, we want
11829 // the rounded-up value as it will be odd.
11830 SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
11831 SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
11832 Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
11833 return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
11834}
11835
11837 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
11838 SDValue Op = Node->getOperand(0);
11839 EVT VT = Node->getValueType(0);
11840 SDLoc dl(Node);
11841 if (VT.getScalarType() == MVT::bf16) {
11842 if (Node->getConstantOperandVal(1) == 1) {
11843 return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
11844 }
11845 EVT OperandVT = Op.getValueType();
11846 SDValue IsNaN = DAG.getSetCC(
11847 dl,
11848 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
11849 Op, Op, ISD::SETUO);
11850
11851 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11852 // can induce double-rounding which may alter the results. We can
11853 // correct for this using a trick explained in: Boldo, Sylvie, and
11854 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11855 // World Congress. 2005.
11856 EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
11857 EVT I32 = F32.changeTypeToInteger();
11858 Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
11859 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11860
11861 // Conversions should set NaN's quiet bit. This also prevents NaNs from
11862 // turning into infinities.
11863 SDValue NaN =
11864 DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
11865
11866 // Factor in the contribution of the low 16 bits.
11867 SDValue One = DAG.getConstant(1, dl, I32);
11868 SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
11869 DAG.getShiftAmountConstant(16, I32, dl));
11870 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
11871 SDValue RoundingBias =
11872 DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
11873 SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
11874
11875 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11876 // 0x80000000.
11877 Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
11878
11879 // Now that we have rounded, shift the bits into position.
11880 Op = DAG.getNode(ISD::SRL, dl, I32, Op,
11881 DAG.getShiftAmountConstant(16, I32, dl));
11882 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11883 EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
11884 Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
11885 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
11886 }
11887 return SDValue();
11888}
11889
11891 SelectionDAG &DAG) const {
11892 assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11893 assert(Node->getValueType(0).isScalableVector() &&
11894 "Fixed length vector types expected to use SHUFFLE_VECTOR!");
11895
11896 EVT VT = Node->getValueType(0);
11897 SDValue V1 = Node->getOperand(0);
11898 SDValue V2 = Node->getOperand(1);
11899 int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
11900 SDLoc DL(Node);
11901
11902 // Expand through memory thusly:
11903 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11904 // Store V1, Ptr
11905 // Store V2, Ptr + sizeof(V1)
11906 // If (Imm < 0)
11907 // TrailingElts = -Imm
11908 // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
11909 // else
11910 // Ptr = Ptr + (Imm * sizeof(VT.Elt))
11911 // Res = Load Ptr
11912
11913 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
11914
11916 VT.getVectorElementCount() * 2);
11917 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
11918 EVT PtrVT = StackPtr.getValueType();
11919 auto &MF = DAG.getMachineFunction();
11920 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11921 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
11922
11923 // Store the lo part of CONCAT_VECTORS(V1, V2)
11924 SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
11925 // Store the hi part of CONCAT_VECTORS(V1, V2)
11926 SDValue OffsetToV2 = DAG.getVScale(
11927 DL, PtrVT,
11929 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
11930 SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
11931
11932 if (Imm >= 0) {
11933 // Load back the required element. getVectorElementPointer takes care of
11934 // clamping the index if it's out-of-bounds.
11935 StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
11936 // Load the spliced result
11937 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
11939 }
11940
11941 uint64_t TrailingElts = -Imm;
11942
11943 // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11944 TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11945 SDValue TrailingBytes =
11946 DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
11947
11948 if (TrailingElts > VT.getVectorMinNumElements()) {
11949 SDValue VLBytes =
11950 DAG.getVScale(DL, PtrVT,
11951 APInt(PtrVT.getFixedSizeInBits(),
11953 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
11954 }
11955
11956 // Calculate the start address of the spliced result.
11957 StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
11958
11959 // Load the spliced result
11960 return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
11962}
11963
11965 SelectionDAG &DAG) const {
11966 SDLoc DL(Node);
11967 SDValue Vec = Node->getOperand(0);
11968 SDValue Mask = Node->getOperand(1);
11969 SDValue Passthru = Node->getOperand(2);
11970
11971 EVT VecVT = Vec.getValueType();
11972 EVT ScalarVT = VecVT.getScalarType();
11973 EVT MaskVT = Mask.getValueType();
11974 EVT MaskScalarVT = MaskVT.getScalarType();
11975
11976 // Needs to be handled by targets that have scalable vector types.
11977 if (VecVT.isScalableVector())
11978 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
11979
11980 SDValue StackPtr = DAG.CreateStackTemporary(
11981 VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
11982 int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11983 MachinePointerInfo PtrInfo =
11985
11986 MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
11987 SDValue Chain = DAG.getEntryNode();
11988 SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
11989
11990 bool HasPassthru = !Passthru.isUndef();
11991
11992 // If we have a passthru vector, store it on the stack, overwrite the matching
11993 // positions and then re-write the last element that was potentially
11994 // overwritten even though mask[i] = false.
11995 if (HasPassthru)
11996 Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo);
11997
11998 SDValue LastWriteVal;
11999 APInt PassthruSplatVal;
12000 bool IsSplatPassthru =
12001 ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
12002
12003 if (IsSplatPassthru) {
12004 // As we do not know which position we wrote to last, we cannot simply
12005 // access that index from the passthru vector. So we first check if passthru
12006 // is a splat vector, to use any element ...
12007 LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
12008 } else if (HasPassthru) {
12009 // ... if it is not a splat vector, we need to get the passthru value at
12010 // position = popcount(mask) and re-load it from the stack before it is
12011 // overwritten in the loop below.
12012 EVT PopcountVT = ScalarVT.changeTypeToInteger();
12013 SDValue Popcount = DAG.getNode(
12014 ISD::TRUNCATE, DL, MaskVT.changeVectorElementType(MVT::i1), Mask);
12015 Popcount =
12017 MaskVT.changeVectorElementType(PopcountVT), Popcount);
12018 Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, PopcountVT, Popcount);
12019 SDValue LastElmtPtr =
12020 getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
12021 LastWriteVal = DAG.getLoad(
12022 ScalarVT, DL, Chain, LastElmtPtr,
12024 Chain = LastWriteVal.getValue(1);
12025 }
12026
12027 unsigned NumElms = VecVT.getVectorNumElements();
12028 for (unsigned I = 0; I < NumElms; I++) {
12029 SDValue ValI = DAG.getExtractVectorElt(DL, ScalarVT, Vec, I);
12030 SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
12031 Chain = DAG.getStore(
12032 Chain, DL, ValI, OutPtr,
12034
12035 // Get the mask value and add it to the current output position. This
12036 // either increments by 1 if MaskI is true or adds 0 otherwise.
12037 // Freeze in case we have poison/undef mask entries.
12038 SDValue MaskI = DAG.getExtractVectorElt(DL, MaskScalarVT, Mask, I);
12039 MaskI = DAG.getFreeze(MaskI);
12040 MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
12041 MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
12042 OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
12043
12044 if (HasPassthru && I == NumElms - 1) {
12045 SDValue EndOfVector =
12046 DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
12047 SDValue AllLanesSelected =
12048 DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
12049 OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
12050 OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
12051
12052 // Re-write the last ValI if all lanes were selected. Otherwise,
12053 // overwrite the last write it with the passthru value.
12054 LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,
12055 LastWriteVal, SDNodeFlags::Unpredictable);
12056 Chain = DAG.getStore(
12057 Chain, DL, LastWriteVal, OutPtr,
12059 }
12060 }
12061
12062 return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
12063}
12064
12066 SelectionDAG &DAG) const {
12067 SDLoc DL(N);
12068 SDValue Acc = N->getOperand(0);
12069 SDValue MulLHS = N->getOperand(1);
12070 SDValue MulRHS = N->getOperand(2);
12071 EVT AccVT = Acc.getValueType();
12072 EVT MulOpVT = MulLHS.getValueType();
12073
12074 EVT ExtMulOpVT =
12076 MulOpVT.getVectorElementCount());
12077
12078 unsigned ExtOpcLHS, ExtOpcRHS;
12079 switch (N->getOpcode()) {
12080 default:
12081 llvm_unreachable("Unexpected opcode");
12082 case ISD::PARTIAL_REDUCE_UMLA:
12083 ExtOpcLHS = ExtOpcRHS = ISD::ZERO_EXTEND;
12084 break;
12085 case ISD::PARTIAL_REDUCE_SMLA:
12086 ExtOpcLHS = ExtOpcRHS = ISD::SIGN_EXTEND;
12087 break;
12088 case ISD::PARTIAL_REDUCE_FMLA:
12089 ExtOpcLHS = ExtOpcRHS = ISD::FP_EXTEND;
12090 break;
12091 }
12092
12093 if (ExtMulOpVT != MulOpVT) {
12094 MulLHS = DAG.getNode(ExtOpcLHS, DL, ExtMulOpVT, MulLHS);
12095 MulRHS = DAG.getNode(ExtOpcRHS, DL, ExtMulOpVT, MulRHS);
12096 }
12097 SDValue Input = MulLHS;
12098 if (N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA) {
12099 if (!llvm::isOneOrOneSplatFP(MulRHS))
12100 Input = DAG.getNode(ISD::FMUL, DL, ExtMulOpVT, MulLHS, MulRHS);
12101 } else if (!llvm::isOneOrOneSplat(MulRHS)) {
12102 Input = DAG.getNode(ISD::MUL, DL, ExtMulOpVT, MulLHS, MulRHS);
12103 }
12104
12105 unsigned Stride = AccVT.getVectorMinNumElements();
12106 unsigned ScaleFactor = MulOpVT.getVectorMinNumElements() / Stride;
12107
12108 // Collect all of the subvectors
12109 std::deque<SDValue> Subvectors = {Acc};
12110 for (unsigned I = 0; I < ScaleFactor; I++)
12111 Subvectors.push_back(DAG.getExtractSubvector(DL, AccVT, Input, I * Stride));
12112
12113 unsigned FlatNode =
12114 N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA ? ISD::FADD : ISD::ADD;
12115
12116 // Flatten the subvector tree
12117 while (Subvectors.size() > 1) {
12118 Subvectors.push_back(
12119 DAG.getNode(FlatNode, DL, AccVT, {Subvectors[0], Subvectors[1]}));
12120 Subvectors.pop_front();
12121 Subvectors.pop_front();
12122 }
12123
12124 assert(Subvectors.size() == 1 &&
12125 "There should only be one subvector after tree flattening");
12126
12127 return Subvectors[0];
12128}
12129
12130/// Given a store node \p StoreNode, return true if it is safe to fold that node
12131/// into \p FPNode, which expands to a library call with output pointers.
12133 SDNode *FPNode) {
12135 SmallVector<const SDNode *, 8> DeferredNodes;
12137
12138 // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode).
12139 for (SDValue Op : StoreNode->ops())
12140 if (Op.getNode() != FPNode)
12141 Worklist.push_back(Op.getNode());
12142
12144 while (!Worklist.empty()) {
12145 const SDNode *Node = Worklist.pop_back_val();
12146 auto [_, Inserted] = Visited.insert(Node);
12147 if (!Inserted)
12148 continue;
12149
12150 if (MaxSteps > 0 && Visited.size() >= MaxSteps)
12151 return false;
12152
12153 // Reached the FPNode (would result in a cycle).
12154 // OR Reached CALLSEQ_START (would result in nested call sequences).
12155 if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START)
12156 return false;
12157
12158 if (Node->getOpcode() == ISD::CALLSEQ_END) {
12159 // Defer looking into call sequences (so we can check we're outside one).
12160 // We still need to look through these for the predecessor check.
12161 DeferredNodes.push_back(Node);
12162 continue;
12163 }
12164
12165 for (SDValue Op : Node->ops())
12166 Worklist.push_back(Op.getNode());
12167 }
12168
12169 // True if we're outside a call sequence and don't have the FPNode as a
12170 // predecessor. No cycles or nested call sequences possible.
12171 return !SDNode::hasPredecessorHelper(FPNode, Visited, DeferredNodes,
12172 MaxSteps);
12173}
12174
12176 SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node,
12178 std::optional<unsigned> CallRetResNo) const {
12179 if (LC == RTLIB::UNKNOWN_LIBCALL)
12180 return false;
12181
12182 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC);
12183 if (LibcallImpl == RTLIB::Unsupported)
12184 return false;
12185
12186 LLVMContext &Ctx = *DAG.getContext();
12187 EVT VT = Node->getValueType(0);
12188 unsigned NumResults = Node->getNumValues();
12189
12190 // Find users of the node that store the results (and share input chains). The
12191 // destination pointers can be used instead of creating stack allocations.
12192 SDValue StoresInChain;
12193 SmallVector<StoreSDNode *, 2> ResultStores(NumResults);
12194 for (SDNode *User : Node->users()) {
12196 continue;
12197 auto *ST = cast<StoreSDNode>(User);
12198 SDValue StoreValue = ST->getValue();
12199 unsigned ResNo = StoreValue.getResNo();
12200 // Ensure the store corresponds to an output pointer.
12201 if (CallRetResNo == ResNo)
12202 continue;
12203 // Ensure the store to the default address space and not atomic or volatile.
12204 if (!ST->isSimple() || ST->getAddressSpace() != 0)
12205 continue;
12206 // Ensure all store chains are the same (so they don't alias).
12207 if (StoresInChain && ST->getChain() != StoresInChain)
12208 continue;
12209 // Ensure the store is properly aligned.
12210 Type *StoreType = StoreValue.getValueType().getTypeForEVT(Ctx);
12211 if (ST->getAlign() <
12212 DAG.getDataLayout().getABITypeAlign(StoreType->getScalarType()))
12213 continue;
12214 // Avoid:
12215 // 1. Creating cyclic dependencies.
12216 // 2. Expanding the node to a call within a call sequence.
12218 continue;
12219 ResultStores[ResNo] = ST;
12220 StoresInChain = ST->getChain();
12221 }
12222
12223 ArgListTy Args;
12224
12225 // Pass the arguments.
12226 for (const SDValue &Op : Node->op_values()) {
12227 EVT ArgVT = Op.getValueType();
12228 Type *ArgTy = ArgVT.getTypeForEVT(Ctx);
12229 Args.emplace_back(Op, ArgTy);
12230 }
12231
12232 // Pass the output pointers.
12233 SmallVector<SDValue, 2> ResultPtrs(NumResults);
12235 for (auto [ResNo, ST] : llvm::enumerate(ResultStores)) {
12236 if (ResNo == CallRetResNo)
12237 continue;
12238 EVT ResVT = Node->getValueType(ResNo);
12239 SDValue ResultPtr = ST ? ST->getBasePtr() : DAG.CreateStackTemporary(ResVT);
12240 ResultPtrs[ResNo] = ResultPtr;
12241 Args.emplace_back(ResultPtr, PointerTy);
12242 }
12243
12244 SDLoc DL(Node);
12245
12247 // Pass the vector mask (if required).
12248 EVT MaskVT = getSetCCResultType(DAG.getDataLayout(), Ctx, VT);
12249 SDValue Mask = DAG.getBoolConstant(true, DL, MaskVT, VT);
12250 Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx));
12251 }
12252
12253 Type *RetType = CallRetResNo.has_value()
12254 ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx)
12255 : Type::getVoidTy(Ctx);
12256 SDValue InChain = StoresInChain ? StoresInChain : DAG.getEntryNode();
12257 SDValue Callee = DAG.getExternalSymbol(getLibcallImplName(LibcallImpl).data(),
12260 CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(
12261 getLibcallImplCallingConv(LibcallImpl), RetType, Callee, std::move(Args));
12262
12263 auto [Call, CallChain] = LowerCallTo(CLI);
12264
12265 for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) {
12266 if (ResNo == CallRetResNo) {
12267 Results.push_back(Call);
12268 continue;
12269 }
12270 MachinePointerInfo PtrInfo;
12271 SDValue LoadResult = DAG.getLoad(Node->getValueType(ResNo), DL, CallChain,
12272 ResultPtr, PtrInfo);
12273 SDValue OutChain = LoadResult.getValue(1);
12274
12275 if (StoreSDNode *ST = ResultStores[ResNo]) {
12276 // Replace store with the library call.
12277 DAG.ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain);
12278 PtrInfo = ST->getPointerInfo();
12279 } else {
12281 DAG.getMachineFunction(),
12282 cast<FrameIndexSDNode>(ResultPtr)->getIndex());
12283 }
12284
12285 Results.push_back(LoadResult);
12286 }
12287
12288 return true;
12289}
12290
12292 SDValue &LHS, SDValue &RHS,
12293 SDValue &CC, SDValue Mask,
12294 SDValue EVL, bool &NeedInvert,
12295 const SDLoc &dl, SDValue &Chain,
12296 bool IsSignaling) const {
12297 MVT OpVT = LHS.getSimpleValueType();
12298 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
12299 NeedInvert = false;
12300 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
12301 bool IsNonVP = !EVL;
12302 switch (getCondCodeAction(CCCode, OpVT)) {
12303 default:
12304 llvm_unreachable("Unknown condition code action!");
12306 // Nothing to do.
12307 break;
12310 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
12311 std::swap(LHS, RHS);
12312 CC = DAG.getCondCode(InvCC);
12313 return true;
12314 }
12315 // Swapping operands didn't work. Try inverting the condition.
12316 bool NeedSwap = false;
12317 InvCC = getSetCCInverse(CCCode, OpVT);
12318 if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
12319 // If inverting the condition is not enough, try swapping operands
12320 // on top of it.
12321 InvCC = ISD::getSetCCSwappedOperands(InvCC);
12322 NeedSwap = true;
12323 }
12324 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
12325 CC = DAG.getCondCode(InvCC);
12326 NeedInvert = true;
12327 if (NeedSwap)
12328 std::swap(LHS, RHS);
12329 return true;
12330 }
12331
12332 // Special case: expand i1 comparisons using logical operations.
12333 if (OpVT == MVT::i1) {
12334 SDValue Ret;
12335 switch (CCCode) {
12336 default:
12337 llvm_unreachable("Unknown integer setcc!");
12338 case ISD::SETEQ: // X == Y --> ~(X ^ Y)
12339 Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS),
12340 MVT::i1);
12341 break;
12342 case ISD::SETNE: // X != Y --> (X ^ Y)
12343 Ret = DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS);
12344 break;
12345 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
12346 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
12347 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, RHS,
12348 DAG.getNOT(dl, LHS, MVT::i1));
12349 break;
12350 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
12351 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
12352 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, LHS,
12353 DAG.getNOT(dl, RHS, MVT::i1));
12354 break;
12355 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
12356 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
12357 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, RHS,
12358 DAG.getNOT(dl, LHS, MVT::i1));
12359 break;
12360 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
12361 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
12362 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, LHS,
12363 DAG.getNOT(dl, RHS, MVT::i1));
12364 break;
12365 }
12366
12367 LHS = DAG.getZExtOrTrunc(Ret, dl, VT);
12368 RHS = SDValue();
12369 CC = SDValue();
12370 return true;
12371 }
12372
12374 unsigned Opc = 0;
12375 switch (CCCode) {
12376 default:
12377 llvm_unreachable("Don't know how to expand this condition!");
12378 case ISD::SETUO:
12379 if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
12380 CC1 = ISD::SETUNE;
12381 CC2 = ISD::SETUNE;
12382 Opc = ISD::OR;
12383 break;
12384 }
12386 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
12387 NeedInvert = true;
12388 [[fallthrough]];
12389 case ISD::SETO:
12391 "If SETO is expanded, SETOEQ must be legal!");
12392 CC1 = ISD::SETOEQ;
12393 CC2 = ISD::SETOEQ;
12394 Opc = ISD::AND;
12395 break;
12396 case ISD::SETONE:
12397 case ISD::SETUEQ:
12398 // If the SETUO or SETO CC isn't legal, we might be able to use
12399 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
12400 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
12401 // the operands.
12402 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12403 if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
12404 isCondCodeLegal(ISD::SETOLT, OpVT))) {
12405 CC1 = ISD::SETOGT;
12406 CC2 = ISD::SETOLT;
12407 Opc = ISD::OR;
12408 NeedInvert = ((unsigned)CCCode & 0x8U);
12409 break;
12410 }
12411 [[fallthrough]];
12412 case ISD::SETOEQ:
12413 case ISD::SETOGT:
12414 case ISD::SETOGE:
12415 case ISD::SETOLT:
12416 case ISD::SETOLE:
12417 case ISD::SETUNE:
12418 case ISD::SETUGT:
12419 case ISD::SETUGE:
12420 case ISD::SETULT:
12421 case ISD::SETULE:
12422 // If we are floating point, assign and break, otherwise fall through.
12423 if (!OpVT.isInteger()) {
12424 // We can use the 4th bit to tell if we are the unordered
12425 // or ordered version of the opcode.
12426 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12427 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
12428 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
12429 break;
12430 }
12431 // Fallthrough if we are unsigned integer.
12432 [[fallthrough]];
12433 case ISD::SETLE:
12434 case ISD::SETGT:
12435 case ISD::SETGE:
12436 case ISD::SETLT:
12437 case ISD::SETNE:
12438 case ISD::SETEQ:
12439 // If all combinations of inverting the condition and swapping operands
12440 // didn't work then we have no means to expand the condition.
12441 llvm_unreachable("Don't know how to expand this condition!");
12442 }
12443
12444 SDValue SetCC1, SetCC2;
12445 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
12446 // If we aren't the ordered or unorder operation,
12447 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
12448 if (IsNonVP) {
12449 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
12450 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
12451 } else {
12452 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
12453 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
12454 }
12455 } else {
12456 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
12457 if (IsNonVP) {
12458 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
12459 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
12460 } else {
12461 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
12462 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
12463 }
12464 }
12465 if (Chain)
12466 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
12467 SetCC2.getValue(1));
12468 if (IsNonVP)
12469 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
12470 else {
12471 // Transform the binary opcode to the VP equivalent.
12472 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
12473 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
12474 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
12475 }
12476 RHS = SDValue();
12477 CC = SDValue();
12478 return true;
12479 }
12480 }
12481 return false;
12482}
12483
12485 SelectionDAG &DAG) const {
12486 EVT VT = Node->getValueType(0);
12487 // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
12488 // split into two equal parts.
12489 if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))
12490 return SDValue();
12491
12492 // Restrict expansion to cases where both parts can be concatenated.
12493 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
12494 if (LoVT != HiVT || !isTypeLegal(LoVT))
12495 return SDValue();
12496
12497 SDLoc DL(Node);
12498 unsigned Opcode = Node->getOpcode();
12499
12500 // Don't expand if the result is likely to be unrolled anyway.
12501 if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))
12502 return SDValue();
12503
12504 SmallVector<SDValue, 4> LoOps, HiOps;
12505 for (const SDValue &V : Node->op_values()) {
12506 auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT);
12507 LoOps.push_back(Lo);
12508 HiOps.push_back(Hi);
12509 }
12510
12511 SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps);
12512 SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps);
12513 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi);
12514}
12515
12517 const SDLoc &DL,
12518 EVT InVecVT, SDValue EltNo,
12519 LoadSDNode *OriginalLoad,
12520 SelectionDAG &DAG) const {
12521 assert(OriginalLoad->isSimple());
12522
12523 EVT VecEltVT = InVecVT.getVectorElementType();
12524
12525 // If the vector element type is not a multiple of a byte then we are unable
12526 // to correctly compute an address to load only the extracted element as a
12527 // scalar.
12528 if (!VecEltVT.isByteSized())
12529 return SDValue();
12530
12531 ISD::LoadExtType ExtTy =
12532 ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
12533 if (!isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
12534 return SDValue();
12535
12536 std::optional<unsigned> ByteOffset;
12537 Align Alignment = OriginalLoad->getAlign();
12539 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
12540 int Elt = ConstEltNo->getZExtValue();
12541 ByteOffset = VecEltVT.getSizeInBits() * Elt / 8;
12542 MPI = OriginalLoad->getPointerInfo().getWithOffset(*ByteOffset);
12543 Alignment = commonAlignment(Alignment, *ByteOffset);
12544 } else {
12545 // Discard the pointer info except the address space because the memory
12546 // operand can't represent this new access since the offset is variable.
12547 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
12548 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
12549 }
12550
12551 if (!shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT, ByteOffset))
12552 return SDValue();
12553
12554 unsigned IsFast = 0;
12555 if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
12556 OriginalLoad->getAddressSpace(), Alignment,
12557 OriginalLoad->getMemOperand()->getFlags(), &IsFast) ||
12558 !IsFast)
12559 return SDValue();
12560
12561 // The original DAG loaded the entire vector from memory, so arithmetic
12562 // within it must be inbounds.
12564 DAG, OriginalLoad->getBasePtr(), InVecVT, EltNo);
12565
12566 // We are replacing a vector load with a scalar load. The new load must have
12567 // identical memory op ordering to the original.
12568 SDValue Load;
12569 if (ResultVT.bitsGT(VecEltVT)) {
12570 // If the result type of vextract is wider than the load, then issue an
12571 // extending load instead.
12572 ISD::LoadExtType ExtType = isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT)
12574 : ISD::EXTLOAD;
12575 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
12576 NewPtr, MPI, VecEltVT, Alignment,
12577 OriginalLoad->getMemOperand()->getFlags(),
12578 OriginalLoad->getAAInfo());
12579 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
12580 } else {
12581 // The result type is narrower or the same width as the vector element
12582 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
12583 Alignment, OriginalLoad->getMemOperand()->getFlags(),
12584 OriginalLoad->getAAInfo());
12585 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
12586 if (ResultVT.bitsLT(VecEltVT))
12587 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
12588 else
12589 Load = DAG.getBitcast(ResultVT, Load);
12590 }
12591
12592 return Load;
12593}
unsigned const MachineRegisterInfo * MRI
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT F32
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool isSigned(unsigned int Opcode)
#define _
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
lazy value info
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static bool isUndef(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
#define T
#define T1
#define P(N)
Function const char * Passes
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
static Split data
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact UDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode, SDNode *FPNode)
Given a store node StoreNode, return true if it is safe to fold that node into FPNode,...
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineShiftToAVG(SDValue Op, TargetLowering::TargetLoweringOpt &TLO, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
This file describes how to lower LLVM code to machine code.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
Value * RHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1329
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.h:1140
APInt bitcastToAPInt() const
Definition APFloat.h:1335
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1120
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1080
void changeSign()
Definition APFloat.h:1279
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1091
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1573
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1758
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1407
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition APInt.h:424
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1392
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1386
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1513
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1331
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1183
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
void setSignBit()
Set the sign bit to 1.
Definition APInt.h:1341
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:217
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1250
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1397
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:835
void negate()
Negate this APInt in place.
Definition APInt.h:1469
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1640
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1599
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1532
unsigned countLeadingZeros() const
Definition APInt.h:1607
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
void clearLowBits(unsigned loBits)
Set bottom loBits bits to 0.
Definition APInt.h:1436
unsigned logBase2() const
Definition APInt.h:1762
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:828
void setAllBits()
Set every bit to 1.
Definition APInt.h:1320
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1274
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition APInt.h:406
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1151
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition APInt.h:1368
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:874
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
void clearBits(unsigned LoBit, unsigned HiBit)
Clear the bits from LoBit (inclusive) to HiBit (exclusive) to 0.
Definition APInt.h:1418
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1389
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
void clearHighBits(unsigned hiBits)
Set top hiBits bits to 0.
Definition APInt.h:1443
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:859
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1657
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1222
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
Definition APInt.h:1344
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:715
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:277
This class represents a range of values.
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:207
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
std::vector< std::string > ConstraintCodeVector
Definition InlineAsm.h:104
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Machine Value Type.
SimpleValueType SimpleTy
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Function & getFunction()
Return the LLVM function that this machine code represents.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
Flags getFlags() const
Return the raw flags of the source value,.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition Module.h:445
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
iterator end() const
Definition ArrayRef.h:343
iterator begin() const
Definition ArrayRef.h:342
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI std::optional< unsigned > getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has shift amounts that are all less than the element bit-width of the shift n...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
static LLVM_ABI unsigned getHasPredecessorMaxSteps()
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
LLVM_ABI bool shouldOptForSize() const
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI std::optional< unsigned > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI bool isKnownNeverZeroFloat(SDValue Op) const
Test whether the given floating point SDValue is known to never be positive or negative zero.
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
iterator end() const
Definition StringRef.h:114
Class to represent struct types.
LLVM_ABI void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
virtual bool preferSelectsOverBooleanArithmetic(EVT VT) const
Should we prefer selects to doing arithmetic on boolean types.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const
Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
CallingConv::ID getLibcallImplCallingConv(RTLIB::LibcallImpl Call) const
Get the CallingConv that should be used for the specified libcall implementation.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
unsigned getBitWidthForCttzElements(Type *RetTy, ElementCount EC, bool ZeroIsPoison, const ConstantRange *VScaleRange) const
Return the minimum number of bits required to hold the maximum possible number of trailing zero vecto...
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
ISD::CondCode getSoftFloatCmpLibcallPredicate(RTLIB::LibcallImpl Call) const
Get the comparison predicate that's to be used to test the result of the comparison libcall against z...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
TargetLoweringBase(const TargetMachine &TM, const TargetSubtargetInfo &STI)
NOTE: The TargetMachine owns TLOF.
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
BooleanContent
Enum that describes how the target represents true/false values.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
static StringRef getLibcallImplName(RTLIB::LibcallImpl Call)
Get the libcall routine name for the specified libcall implementation.
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
virtual EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom for a comparison of the specified type...
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
bool expandMultipleResultFPLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl< SDValue > &Results, std::optional< unsigned > CallRetResNo={}) const
Expands a node with multiple results to an FP or vector libcall.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
SmallVector< ConstraintPair > ConstraintGroup
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
virtual Align computeKnownAlignForTargetInstr(GISelValueTracking &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, const SDValue LHS, const SDValue RHS, SDValue &Lo, SDValue &Hi) const
Calculate full product of LHS and RHS either via a libcall or through brute force expansion of the mu...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_UNDEF nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_UNDEF nodes.
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
virtual bool isTargetCanonicalConstantNode(SDValue Op) const
Returns true if the given Opc is considered a canonical constant for the target, which should not be ...
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
virtual unsigned computeNumSignBitsForTargetInstr(GISelValueTracking &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue unwrapAddress(SDValue N) const
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_UNDEF nodes.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimumnum/fmaximumnum into multiple comparison with selects.
void forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl, bool Signed, SDValue &Lo, SDValue &Hi, SDValue LHS, SDValue RHS, SDValue HiLHS=SDValue(), SDValue HiRHS=SDValue()) const
Calculate the product twice the width of LHS and RHS.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index, const SDNodeFlags PtrArithFlags=SDNodeFlags()) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
virtual void computeKnownBitsForTargetInstr(GISelValueTracking &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const
~TargetLowering() override
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isPositionIndependent() const
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
TargetLowering(const TargetLowering &)=delete
virtual bool shouldSimplifyDemandedVectorElts(SDValue Op, const TargetLoweringOpt &TLO) const
Return true if the target supports simplifying demanded vector elements by converting them to undefs.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
virtual bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const
For most targets, an LLVM type must be broken down into multiple smaller types.
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const
Expand a vector VECTOR_COMPRESS into a sequence of extract element, store temporarily,...
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index, const SDNodeFlags PtrArithFlags=SDNodeFlags()) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual void computeKnownFPClassForTargetInstr(GISelValueTracking &Analysis, Register R, KnownFPClass &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]CMP.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue getInboundsVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const
Expand VECTOR_FIND_LAST_ACTIVE nodes.
SDValue expandPartialReduceMLA(SDNode *Node, SelectionDAG &DAG) const
Expands PARTIAL_REDUCE_S/UMLA nodes to a series of simpler operations, consisting of zext/sext,...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad, SelectionDAG &DAG) const
Replace an extraction of a load with a narrowed load.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const
Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
Primary interface to the complete machine description for the target machine.
bool isPositionIndependent() const
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< regclass_iterator > regclasses() const
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition Triple.h:790
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition Type.h:296
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:280
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:311
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:705
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition APInt.cpp:3009
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:780
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition ISDOpcodes.h:531
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:387
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:289
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:515
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:393
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:577
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:898
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:521
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition ISDOpcodes.h:400
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:712
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:779
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:347
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:534
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:541
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:799
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:379
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:351
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:887
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:406
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:323
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:174
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:707
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:299
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition ISDOpcodes.h:654
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:909
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:933
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:821
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:527
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:719
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
LLVM_ABI NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_ABI bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTruncation=false)
Hook for matching ConstantSDNode predicate.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
void stable_sort(R &&Range)
Definition STLExtras.h:2058
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
InstructionCost Cost
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2472
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
LLVM_ABI bool isOneOrOneSplatFP(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant floating-point value, or a splatted vector of a constant float...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
void * PointerTy
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:345
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:177
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1763
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
Definition ModRef.h:68
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition APFloat.h:1551
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:121
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition ValueTypes.h:430
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:470
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:412
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:308
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition KnownBits.h:301
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:186
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition KnownBits.h:255
static LLVM_ABI KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108
bool isZero() const
Returns true if value is all zero.
Definition KnownBits.h:80
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:242
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
void setAllConflict()
Make all bits known to be both zero and one.
Definition KnownBits.h:99
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:161
KnownBits byteSwap() const
Definition KnownBits.h:514
static LLVM_ABI std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:289
KnownBits reverseBits() const
Definition KnownBits.h:518
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
Definition KnownBits.h:233
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
static LLVM_ABI KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:172
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition KnownBits.h:321
bool isSignUnknown() const
Returns true if we don't know the sign bit.
Definition KnownBits.h:69
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:311
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:180
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:248
static LLVM_ABI KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
static LLVM_ABI std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
static LLVM_ABI std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
static LLVM_ABI KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition KnownBits.cpp:60
static LLVM_ABI std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
static LLVM_ABI std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:105
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition KnownBits.h:167
static LLVM_ABI std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
static LLVM_ABI std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:286
static LLVM_ABI std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
static LLVM_ABI KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static bool hasVectorMaskArgument(RTLIB::LibcallImpl Impl)
Returns true if the function has a vector mask argument, which is assumed to be the last argument.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
void setNoSignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This contains information for each constraint that we are lowering.
std::string ConstraintCode
This contains the actual string for the code, like "m".
LLVM_ABI unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
LLVM_ABI bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
MakeLibCallOptions & setIsSigned(bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...