LLVM 19.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
25#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/LLVMContext.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCExpr.h"
36#include <cctype>
37using namespace llvm;
38
39/// NOTE: The TargetMachine owns TLOF.
41 : TargetLoweringBase(tm) {}
42
43const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
44 return nullptr;
45}
46
49}
50
51/// Check whether a given call node is in tail position within its function. If
52/// so, it sets Chain to the input chain of the tail call.
54 SDValue &Chain) const {
56
57 // First, check if tail calls have been disabled in this function.
58 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
59 return false;
60
61 // Conservatively require the attributes of the call to match those of
62 // the return. Ignore following attributes because they don't affect the
63 // call sequence.
64 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
65 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
66 Attribute::DereferenceableOrNull, Attribute::NoAlias,
67 Attribute::NonNull, Attribute::NoUndef})
68 CallerAttrs.removeAttribute(Attr);
69
70 if (CallerAttrs.hasAttributes())
71 return false;
72
73 // It's not safe to eliminate the sign / zero extension of the return value.
74 if (CallerAttrs.contains(Attribute::ZExt) ||
75 CallerAttrs.contains(Attribute::SExt))
76 return false;
77
78 // Check if the only use is a function return node.
79 return isUsedByReturnOnly(Node, Chain);
80}
81
83 const uint32_t *CallerPreservedMask,
84 const SmallVectorImpl<CCValAssign> &ArgLocs,
85 const SmallVectorImpl<SDValue> &OutVals) const {
86 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
87 const CCValAssign &ArgLoc = ArgLocs[I];
88 if (!ArgLoc.isRegLoc())
89 continue;
90 MCRegister Reg = ArgLoc.getLocReg();
91 // Only look at callee saved registers.
92 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
93 continue;
94 // Check that we pass the value used for the caller.
95 // (We look for a CopyFromReg reading a virtual register that is used
96 // for the function live-in value of register Reg)
97 SDValue Value = OutVals[I];
98 if (Value->getOpcode() == ISD::AssertZext)
99 Value = Value.getOperand(0);
100 if (Value->getOpcode() != ISD::CopyFromReg)
101 return false;
102 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
103 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
104 return false;
105 }
106 return true;
107}
108
109/// Set CallLoweringInfo attribute flags based on a call instruction
110/// and called function attributes.
112 unsigned ArgIdx) {
113 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
114 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
115 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
116 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
117 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
118 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
119 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
120 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
121 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
122 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
123 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
124 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
125 Alignment = Call->getParamStackAlign(ArgIdx);
126 IndirectType = nullptr;
128 "multiple ABI attributes?");
129 if (IsByVal) {
130 IndirectType = Call->getParamByValType(ArgIdx);
131 if (!Alignment)
132 Alignment = Call->getParamAlign(ArgIdx);
133 }
134 if (IsPreallocated)
135 IndirectType = Call->getParamPreallocatedType(ArgIdx);
136 if (IsInAlloca)
137 IndirectType = Call->getParamInAllocaType(ArgIdx);
138 if (IsSRet)
139 IndirectType = Call->getParamStructRetType(ArgIdx);
140}
141
142/// Generate a libcall taking the given operands as arguments and returning a
143/// result of type RetVT.
144std::pair<SDValue, SDValue>
147 MakeLibCallOptions CallOptions,
148 const SDLoc &dl,
149 SDValue InChain) const {
150 if (!InChain)
151 InChain = DAG.getEntryNode();
152
154 Args.reserve(Ops.size());
155
157 for (unsigned i = 0; i < Ops.size(); ++i) {
158 SDValue NewOp = Ops[i];
159 Entry.Node = NewOp;
160 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
161 Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
162 CallOptions.IsSExt);
163 Entry.IsZExt = !Entry.IsSExt;
164
165 if (CallOptions.IsSoften &&
167 Entry.IsSExt = Entry.IsZExt = false;
168 }
169 Args.push_back(Entry);
170 }
171
172 if (LC == RTLIB::UNKNOWN_LIBCALL)
173 report_fatal_error("Unsupported library call operation!");
176
177 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
179 bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
180 bool zeroExtend = !signExtend;
181
182 if (CallOptions.IsSoften &&
184 signExtend = zeroExtend = false;
185 }
186
187 CLI.setDebugLoc(dl)
188 .setChain(InChain)
189 .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
190 .setNoReturn(CallOptions.DoesNotReturn)
193 .setSExtResult(signExtend)
194 .setZExtResult(zeroExtend);
195 return LowerCallTo(CLI);
196}
197
199 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
200 unsigned SrcAS, const AttributeList &FuncAttributes) const {
201 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
202 Op.getSrcAlign() < Op.getDstAlign())
203 return false;
204
205 EVT VT = getOptimalMemOpType(Op, FuncAttributes);
206
207 if (VT == MVT::Other) {
208 // Use the largest integer type whose alignment constraints are satisfied.
209 // We only need to check DstAlign here as SrcAlign is always greater or
210 // equal to DstAlign (or zero).
211 VT = MVT::i64;
212 if (Op.isFixedDstAlign())
213 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
214 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
216 assert(VT.isInteger());
217
218 // Find the largest legal integer type.
219 MVT LVT = MVT::i64;
220 while (!isTypeLegal(LVT))
221 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
222 assert(LVT.isInteger());
223
224 // If the type we've chosen is larger than the largest legal integer type
225 // then use that instead.
226 if (VT.bitsGT(LVT))
227 VT = LVT;
228 }
229
230 unsigned NumMemOps = 0;
231 uint64_t Size = Op.size();
232 while (Size) {
233 unsigned VTSize = VT.getSizeInBits() / 8;
234 while (VTSize > Size) {
235 // For now, only use non-vector load / store's for the left-over pieces.
236 EVT NewVT = VT;
237 unsigned NewVTSize;
238
239 bool Found = false;
240 if (VT.isVector() || VT.isFloatingPoint()) {
241 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
244 Found = true;
245 else if (NewVT == MVT::i64 &&
247 isSafeMemOpType(MVT::f64)) {
248 // i64 is usually not legal on 32-bit targets, but f64 may be.
249 NewVT = MVT::f64;
250 Found = true;
251 }
252 }
253
254 if (!Found) {
255 do {
256 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
257 if (NewVT == MVT::i8)
258 break;
259 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
260 }
261 NewVTSize = NewVT.getSizeInBits() / 8;
262
263 // If the new VT cannot cover all of the remaining bits, then consider
264 // issuing a (or a pair of) unaligned and overlapping load / store.
265 unsigned Fast;
266 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
268 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
270 Fast)
271 VTSize = Size;
272 else {
273 VT = NewVT;
274 VTSize = NewVTSize;
275 }
276 }
277
278 if (++NumMemOps > Limit)
279 return false;
280
281 MemOps.push_back(VT);
282 Size -= VTSize;
283 }
284
285 return true;
286}
287
288/// Soften the operands of a comparison. This code is shared among BR_CC,
289/// SELECT_CC, and SETCC handlers.
291 SDValue &NewLHS, SDValue &NewRHS,
292 ISD::CondCode &CCCode,
293 const SDLoc &dl, const SDValue OldLHS,
294 const SDValue OldRHS) const {
295 SDValue Chain;
296 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
297 OldRHS, Chain);
298}
299
301 SDValue &NewLHS, SDValue &NewRHS,
302 ISD::CondCode &CCCode,
303 const SDLoc &dl, const SDValue OldLHS,
304 const SDValue OldRHS,
305 SDValue &Chain,
306 bool IsSignaling) const {
307 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
308 // not supporting it. We can update this code when libgcc provides such
309 // functions.
310
311 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
312 && "Unsupported setcc type!");
313
314 // Expand into one or more soft-fp libcall(s).
315 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
316 bool ShouldInvertCC = false;
317 switch (CCCode) {
318 case ISD::SETEQ:
319 case ISD::SETOEQ:
320 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
321 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
322 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
323 break;
324 case ISD::SETNE:
325 case ISD::SETUNE:
326 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
327 (VT == MVT::f64) ? RTLIB::UNE_F64 :
328 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
329 break;
330 case ISD::SETGE:
331 case ISD::SETOGE:
332 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
333 (VT == MVT::f64) ? RTLIB::OGE_F64 :
334 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
335 break;
336 case ISD::SETLT:
337 case ISD::SETOLT:
338 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
339 (VT == MVT::f64) ? RTLIB::OLT_F64 :
340 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
341 break;
342 case ISD::SETLE:
343 case ISD::SETOLE:
344 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
345 (VT == MVT::f64) ? RTLIB::OLE_F64 :
346 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
347 break;
348 case ISD::SETGT:
349 case ISD::SETOGT:
350 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
351 (VT == MVT::f64) ? RTLIB::OGT_F64 :
352 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
353 break;
354 case ISD::SETO:
355 ShouldInvertCC = true;
356 [[fallthrough]];
357 case ISD::SETUO:
358 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
359 (VT == MVT::f64) ? RTLIB::UO_F64 :
360 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
361 break;
362 case ISD::SETONE:
363 // SETONE = O && UNE
364 ShouldInvertCC = true;
365 [[fallthrough]];
366 case ISD::SETUEQ:
367 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
368 (VT == MVT::f64) ? RTLIB::UO_F64 :
369 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
370 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
371 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
372 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
373 break;
374 default:
375 // Invert CC for unordered comparisons
376 ShouldInvertCC = true;
377 switch (CCCode) {
378 case ISD::SETULT:
379 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
380 (VT == MVT::f64) ? RTLIB::OGE_F64 :
381 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
382 break;
383 case ISD::SETULE:
384 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
385 (VT == MVT::f64) ? RTLIB::OGT_F64 :
386 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
387 break;
388 case ISD::SETUGT:
389 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
390 (VT == MVT::f64) ? RTLIB::OLE_F64 :
391 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
392 break;
393 case ISD::SETUGE:
394 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
395 (VT == MVT::f64) ? RTLIB::OLT_F64 :
396 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
397 break;
398 default: llvm_unreachable("Do not know how to soften this setcc!");
399 }
400 }
401
402 // Use the target specific return value for comparison lib calls.
404 SDValue Ops[2] = {NewLHS, NewRHS};
406 EVT OpsVT[2] = { OldLHS.getValueType(),
407 OldRHS.getValueType() };
408 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
409 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
410 NewLHS = Call.first;
411 NewRHS = DAG.getConstant(0, dl, RetVT);
412
413 CCCode = getCmpLibcallCC(LC1);
414 if (ShouldInvertCC) {
415 assert(RetVT.isInteger());
416 CCCode = getSetCCInverse(CCCode, RetVT);
417 }
418
419 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
420 // Update Chain.
421 Chain = Call.second;
422 } else {
423 EVT SetCCVT =
424 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
425 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
426 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
427 CCCode = getCmpLibcallCC(LC2);
428 if (ShouldInvertCC)
429 CCCode = getSetCCInverse(CCCode, RetVT);
430 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
431 if (Chain)
432 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
433 Call2.second);
434 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
435 Tmp.getValueType(), Tmp, NewLHS);
436 NewRHS = SDValue();
437 }
438}
439
440/// Return the entry encoding for a jump table in the current function. The
441/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
443 // In non-pic modes, just use the address of a block.
444 if (!isPositionIndependent())
446
447 // In PIC mode, if the target supports a GPRel32 directive, use it.
448 if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
450
451 // Otherwise, use a label difference.
453}
454
456 SelectionDAG &DAG) const {
457 // If our PIC model is GP relative, use the global offset table as the base.
458 unsigned JTEncoding = getJumpTableEncoding();
459
463
464 return Table;
465}
466
467/// This returns the relocation base for the given PIC jumptable, the same as
468/// getPICJumpTableRelocBase, but as an MCExpr.
469const MCExpr *
471 unsigned JTI,MCContext &Ctx) const{
472 // The normal PIC reloc base is the label at the start of the jump table.
473 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
474}
475
477 SDValue Addr, int JTI,
478 SelectionDAG &DAG) const {
479 SDValue Chain = Value;
480 // Jump table debug info is only needed if CodeView is enabled.
482 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
483 }
484 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
485}
486
487bool
489 const TargetMachine &TM = getTargetMachine();
490 const GlobalValue *GV = GA->getGlobal();
491
492 // If the address is not even local to this DSO we will have to load it from
493 // a got and then add the offset.
494 if (!TM.shouldAssumeDSOLocal(GV))
495 return false;
496
497 // If the code is position independent we will have to add a base register.
498 if (isPositionIndependent())
499 return false;
500
501 // Otherwise we can do it.
502 return true;
503}
504
505//===----------------------------------------------------------------------===//
506// Optimization Methods
507//===----------------------------------------------------------------------===//
508
509/// If the specified instruction has a constant integer operand and there are
510/// bits set in that constant that are not demanded, then clear those bits and
511/// return true.
513 const APInt &DemandedBits,
514 const APInt &DemandedElts,
515 TargetLoweringOpt &TLO) const {
516 SDLoc DL(Op);
517 unsigned Opcode = Op.getOpcode();
518
519 // Early-out if we've ended up calling an undemanded node, leave this to
520 // constant folding.
521 if (DemandedBits.isZero() || DemandedElts.isZero())
522 return false;
523
524 // Do target-specific constant optimization.
525 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
526 return TLO.New.getNode();
527
528 // FIXME: ISD::SELECT, ISD::SELECT_CC
529 switch (Opcode) {
530 default:
531 break;
532 case ISD::XOR:
533 case ISD::AND:
534 case ISD::OR: {
535 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
536 if (!Op1C || Op1C->isOpaque())
537 return false;
538
539 // If this is a 'not' op, don't touch it because that's a canonical form.
540 const APInt &C = Op1C->getAPIntValue();
541 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
542 return false;
543
544 if (!C.isSubsetOf(DemandedBits)) {
545 EVT VT = Op.getValueType();
546 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
547 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
548 return TLO.CombineTo(Op, NewOp);
549 }
550
551 break;
552 }
553 }
554
555 return false;
556}
557
559 const APInt &DemandedBits,
560 TargetLoweringOpt &TLO) const {
561 EVT VT = Op.getValueType();
562 APInt DemandedElts = VT.isVector()
564 : APInt(1, 1);
565 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
566}
567
568/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
569/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
570/// but it could be generalized for targets with other types of implicit
571/// widening casts.
573 const APInt &DemandedBits,
574 TargetLoweringOpt &TLO) const {
575 assert(Op.getNumOperands() == 2 &&
576 "ShrinkDemandedOp only supports binary operators!");
577 assert(Op.getNode()->getNumValues() == 1 &&
578 "ShrinkDemandedOp only supports nodes with one result!");
579
580 EVT VT = Op.getValueType();
581 SelectionDAG &DAG = TLO.DAG;
582 SDLoc dl(Op);
583
584 // Early return, as this function cannot handle vector types.
585 if (VT.isVector())
586 return false;
587
588 // Don't do this if the node has another user, which may require the
589 // full value.
590 if (!Op.getNode()->hasOneUse())
591 return false;
592
593 // Search for the smallest integer type with free casts to and from
594 // Op's type. For expedience, just check power-of-2 integer types.
595 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
596 unsigned DemandedSize = DemandedBits.getActiveBits();
597 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
598 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
599 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
600 if (TLI.isTruncateFree(VT, SmallVT) && TLI.isZExtFree(SmallVT, VT)) {
601 // We found a type with free casts.
602 SDValue X = DAG.getNode(
603 Op.getOpcode(), dl, SmallVT,
604 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
605 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
606 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
607 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
608 return TLO.CombineTo(Op, Z);
609 }
610 }
611 return false;
612}
613
615 DAGCombinerInfo &DCI) const {
616 SelectionDAG &DAG = DCI.DAG;
617 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
618 !DCI.isBeforeLegalizeOps());
619 KnownBits Known;
620
621 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
622 if (Simplified) {
623 DCI.AddToWorklist(Op.getNode());
625 }
626 return Simplified;
627}
628
630 const APInt &DemandedElts,
631 DAGCombinerInfo &DCI) const {
632 SelectionDAG &DAG = DCI.DAG;
633 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
634 !DCI.isBeforeLegalizeOps());
635 KnownBits Known;
636
637 bool Simplified =
638 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
639 if (Simplified) {
640 DCI.AddToWorklist(Op.getNode());
642 }
643 return Simplified;
644}
645
647 KnownBits &Known,
649 unsigned Depth,
650 bool AssumeSingleUse) const {
651 EVT VT = Op.getValueType();
652
653 // Since the number of lanes in a scalable vector is unknown at compile time,
654 // we track one bit which is implicitly broadcast to all lanes. This means
655 // that all lanes in a scalable vector are considered demanded.
656 APInt DemandedElts = VT.isFixedLengthVector()
658 : APInt(1, 1);
659 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
660 AssumeSingleUse);
661}
662
663// TODO: Under what circumstances can we create nodes? Constant folding?
665 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
666 SelectionDAG &DAG, unsigned Depth) const {
667 EVT VT = Op.getValueType();
668
669 // Limit search depth.
671 return SDValue();
672
673 // Ignore UNDEFs.
674 if (Op.isUndef())
675 return SDValue();
676
677 // Not demanding any bits/elts from Op.
678 if (DemandedBits == 0 || DemandedElts == 0)
679 return DAG.getUNDEF(VT);
680
681 bool IsLE = DAG.getDataLayout().isLittleEndian();
682 unsigned NumElts = DemandedElts.getBitWidth();
683 unsigned BitWidth = DemandedBits.getBitWidth();
684 KnownBits LHSKnown, RHSKnown;
685 switch (Op.getOpcode()) {
686 case ISD::BITCAST: {
687 if (VT.isScalableVector())
688 return SDValue();
689
690 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
691 EVT SrcVT = Src.getValueType();
692 EVT DstVT = Op.getValueType();
693 if (SrcVT == DstVT)
694 return Src;
695
696 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
697 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
698 if (NumSrcEltBits == NumDstEltBits)
699 if (SDValue V = SimplifyMultipleUseDemandedBits(
700 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
701 return DAG.getBitcast(DstVT, V);
702
703 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
704 unsigned Scale = NumDstEltBits / NumSrcEltBits;
705 unsigned NumSrcElts = SrcVT.getVectorNumElements();
706 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
707 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
708 for (unsigned i = 0; i != Scale; ++i) {
709 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
710 unsigned BitOffset = EltOffset * NumSrcEltBits;
711 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
712 if (!Sub.isZero()) {
713 DemandedSrcBits |= Sub;
714 for (unsigned j = 0; j != NumElts; ++j)
715 if (DemandedElts[j])
716 DemandedSrcElts.setBit((j * Scale) + i);
717 }
718 }
719
720 if (SDValue V = SimplifyMultipleUseDemandedBits(
721 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
722 return DAG.getBitcast(DstVT, V);
723 }
724
725 // TODO - bigendian once we have test coverage.
726 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
727 unsigned Scale = NumSrcEltBits / NumDstEltBits;
728 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
729 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
730 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
731 for (unsigned i = 0; i != NumElts; ++i)
732 if (DemandedElts[i]) {
733 unsigned Offset = (i % Scale) * NumDstEltBits;
734 DemandedSrcBits.insertBits(DemandedBits, Offset);
735 DemandedSrcElts.setBit(i / Scale);
736 }
737
738 if (SDValue V = SimplifyMultipleUseDemandedBits(
739 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
740 return DAG.getBitcast(DstVT, V);
741 }
742
743 break;
744 }
745 case ISD::AND: {
746 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
747 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
748
749 // If all of the demanded bits are known 1 on one side, return the other.
750 // These bits cannot contribute to the result of the 'and' in this
751 // context.
752 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
753 return Op.getOperand(0);
754 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
755 return Op.getOperand(1);
756 break;
757 }
758 case ISD::OR: {
759 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
760 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
761
762 // If all of the demanded bits are known zero on one side, return the
763 // other. These bits cannot contribute to the result of the 'or' in this
764 // context.
765 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
766 return Op.getOperand(0);
767 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
768 return Op.getOperand(1);
769 break;
770 }
771 case ISD::XOR: {
772 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
773 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
774
775 // If all of the demanded bits are known zero on one side, return the
776 // other.
777 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
778 return Op.getOperand(0);
779 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
780 return Op.getOperand(1);
781 break;
782 }
783 case ISD::SHL: {
784 // If we are only demanding sign bits then we can use the shift source
785 // directly.
786 if (const APInt *MaxSA =
787 DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
788 SDValue Op0 = Op.getOperand(0);
789 unsigned ShAmt = MaxSA->getZExtValue();
790 unsigned NumSignBits =
791 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
792 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
793 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
794 return Op0;
795 }
796 break;
797 }
798 case ISD::SETCC: {
799 SDValue Op0 = Op.getOperand(0);
800 SDValue Op1 = Op.getOperand(1);
801 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
802 // If (1) we only need the sign-bit, (2) the setcc operands are the same
803 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
804 // -1, we may be able to bypass the setcc.
805 if (DemandedBits.isSignMask() &&
809 // If we're testing X < 0, then this compare isn't needed - just use X!
810 // FIXME: We're limiting to integer types here, but this should also work
811 // if we don't care about FP signed-zero. The use of SETLT with FP means
812 // that we don't care about NaNs.
813 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
815 return Op0;
816 }
817 break;
818 }
820 // If none of the extended bits are demanded, eliminate the sextinreg.
821 SDValue Op0 = Op.getOperand(0);
822 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
823 unsigned ExBits = ExVT.getScalarSizeInBits();
824 if (DemandedBits.getActiveBits() <= ExBits &&
826 return Op0;
827 // If the input is already sign extended, just drop the extension.
828 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
829 if (NumSignBits >= (BitWidth - ExBits + 1))
830 return Op0;
831 break;
832 }
836 if (VT.isScalableVector())
837 return SDValue();
838
839 // If we only want the lowest element and none of extended bits, then we can
840 // return the bitcasted source vector.
841 SDValue Src = Op.getOperand(0);
842 EVT SrcVT = Src.getValueType();
843 EVT DstVT = Op.getValueType();
844 if (IsLE && DemandedElts == 1 &&
845 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
846 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
847 return DAG.getBitcast(DstVT, Src);
848 }
849 break;
850 }
852 if (VT.isScalableVector())
853 return SDValue();
854
855 // If we don't demand the inserted element, return the base vector.
856 SDValue Vec = Op.getOperand(0);
857 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
858 EVT VecVT = Vec.getValueType();
859 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
860 !DemandedElts[CIdx->getZExtValue()])
861 return Vec;
862 break;
863 }
865 if (VT.isScalableVector())
866 return SDValue();
867
868 SDValue Vec = Op.getOperand(0);
869 SDValue Sub = Op.getOperand(1);
870 uint64_t Idx = Op.getConstantOperandVal(2);
871 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
872 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
873 // If we don't demand the inserted subvector, return the base vector.
874 if (DemandedSubElts == 0)
875 return Vec;
876 break;
877 }
878 case ISD::VECTOR_SHUFFLE: {
880 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
881
882 // If all the demanded elts are from one operand and are inline,
883 // then we can use the operand directly.
884 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
885 for (unsigned i = 0; i != NumElts; ++i) {
886 int M = ShuffleMask[i];
887 if (M < 0 || !DemandedElts[i])
888 continue;
889 AllUndef = false;
890 IdentityLHS &= (M == (int)i);
891 IdentityRHS &= ((M - NumElts) == i);
892 }
893
894 if (AllUndef)
895 return DAG.getUNDEF(Op.getValueType());
896 if (IdentityLHS)
897 return Op.getOperand(0);
898 if (IdentityRHS)
899 return Op.getOperand(1);
900 break;
901 }
902 default:
903 // TODO: Probably okay to remove after audit; here to reduce change size
904 // in initial enablement patch for scalable vectors
905 if (VT.isScalableVector())
906 return SDValue();
907
908 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
909 if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
910 Op, DemandedBits, DemandedElts, DAG, Depth))
911 return V;
912 break;
913 }
914 return SDValue();
915}
916
919 unsigned Depth) const {
920 EVT VT = Op.getValueType();
921 // Since the number of lanes in a scalable vector is unknown at compile time,
922 // we track one bit which is implicitly broadcast to all lanes. This means
923 // that all lanes in a scalable vector are considered demanded.
924 APInt DemandedElts = VT.isFixedLengthVector()
926 : APInt(1, 1);
927 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
928 Depth);
929}
930
932 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
933 unsigned Depth) const {
934 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
935 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
936 Depth);
937}
938
939// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
940// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
942 const TargetLowering &TLI,
943 const APInt &DemandedBits,
944 const APInt &DemandedElts,
945 unsigned Depth) {
946 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
947 "SRL or SRA node is required here!");
948 // Is the right shift using an immediate value of 1?
949 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
950 if (!N1C || !N1C->isOne())
951 return SDValue();
952
953 // We are looking for an avgfloor
954 // add(ext, ext)
955 // or one of these as a avgceil
956 // add(add(ext, ext), 1)
957 // add(add(ext, 1), ext)
958 // add(ext, add(ext, 1))
959 SDValue Add = Op.getOperand(0);
960 if (Add.getOpcode() != ISD::ADD)
961 return SDValue();
962
963 SDValue ExtOpA = Add.getOperand(0);
964 SDValue ExtOpB = Add.getOperand(1);
965 SDValue Add2;
966 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
967 ConstantSDNode *ConstOp;
968 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
969 ConstOp->isOne()) {
970 ExtOpA = Op1;
971 ExtOpB = Op3;
972 Add2 = A;
973 return true;
974 }
975 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
976 ConstOp->isOne()) {
977 ExtOpA = Op1;
978 ExtOpB = Op2;
979 Add2 = A;
980 return true;
981 }
982 return false;
983 };
984 bool IsCeil =
985 (ExtOpA.getOpcode() == ISD::ADD &&
986 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
987 (ExtOpB.getOpcode() == ISD::ADD &&
988 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
989
990 // If the shift is signed (sra):
991 // - Needs >= 2 sign bit for both operands.
992 // - Needs >= 2 zero bits.
993 // If the shift is unsigned (srl):
994 // - Needs >= 1 zero bit for both operands.
995 // - Needs 1 demanded bit zero and >= 2 sign bits.
996 unsigned ShiftOpc = Op.getOpcode();
997 bool IsSigned = false;
998 unsigned KnownBits;
999 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1000 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1001 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1002 unsigned NumZeroA =
1003 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1004 unsigned NumZeroB =
1005 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1006 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1007
1008 switch (ShiftOpc) {
1009 default:
1010 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1011 case ISD::SRA: {
1012 if (NumZero >= 2 && NumSigned < NumZero) {
1013 IsSigned = false;
1014 KnownBits = NumZero;
1015 break;
1016 }
1017 if (NumSigned >= 1) {
1018 IsSigned = true;
1019 KnownBits = NumSigned;
1020 break;
1021 }
1022 return SDValue();
1023 }
1024 case ISD::SRL: {
1025 if (NumZero >= 1 && NumSigned < NumZero) {
1026 IsSigned = false;
1027 KnownBits = NumZero;
1028 break;
1029 }
1030 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1031 IsSigned = true;
1032 KnownBits = NumSigned;
1033 break;
1034 }
1035 return SDValue();
1036 }
1037 }
1038
1039 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1040 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1041
1042 // Find the smallest power-2 type that is legal for this vector size and
1043 // operation, given the original type size and the number of known sign/zero
1044 // bits.
1045 EVT VT = Op.getValueType();
1046 unsigned MinWidth =
1047 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1048 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1049 if (VT.isVector())
1050 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1051 if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT)) {
1052 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1053 // larger type size to do the transform.
1054 if (!TLI.isOperationLegalOrCustom(AVGOpc, VT))
1055 return SDValue();
1056 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1057 Add.getOperand(1)) &&
1058 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1059 Add2.getOperand(1))))
1060 NVT = VT;
1061 else
1062 return SDValue();
1063 }
1064
1065 SDLoc DL(Op);
1066 SDValue ResultAVG =
1067 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1068 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1069 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1070}
1071
1072/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1073/// result of Op are ever used downstream. If we can use this information to
1074/// simplify Op, create a new simplified DAG node and return true, returning the
1075/// original and new nodes in Old and New. Otherwise, analyze the expression and
1076/// return a mask of Known bits for the expression (used to simplify the
1077/// caller). The Known bits may only be accurate for those bits in the
1078/// OriginalDemandedBits and OriginalDemandedElts.
1080 SDValue Op, const APInt &OriginalDemandedBits,
1081 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1082 unsigned Depth, bool AssumeSingleUse) const {
1083 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1084 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1085 "Mask size mismatches value type size!");
1086
1087 // Don't know anything.
1088 Known = KnownBits(BitWidth);
1089
1090 EVT VT = Op.getValueType();
1091 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1092 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1093 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1094 "Unexpected vector size");
1095
1096 APInt DemandedBits = OriginalDemandedBits;
1097 APInt DemandedElts = OriginalDemandedElts;
1098 SDLoc dl(Op);
1099
1100 // Undef operand.
1101 if (Op.isUndef())
1102 return false;
1103
1104 // We can't simplify target constants.
1105 if (Op.getOpcode() == ISD::TargetConstant)
1106 return false;
1107
1108 if (Op.getOpcode() == ISD::Constant) {
1109 // We know all of the bits for a constant!
1110 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1111 return false;
1112 }
1113
1114 if (Op.getOpcode() == ISD::ConstantFP) {
1115 // We know all of the bits for a floating point constant!
1117 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1118 return false;
1119 }
1120
1121 // Other users may use these bits.
1122 bool HasMultiUse = false;
1123 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1125 // Limit search depth.
1126 return false;
1127 }
1128 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1130 DemandedElts = APInt::getAllOnes(NumElts);
1131 HasMultiUse = true;
1132 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1133 // Not demanding any bits/elts from Op.
1134 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1135 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1136 // Limit search depth.
1137 return false;
1138 }
1139
1140 KnownBits Known2;
1141 switch (Op.getOpcode()) {
1142 case ISD::SCALAR_TO_VECTOR: {
1143 if (VT.isScalableVector())
1144 return false;
1145 if (!DemandedElts[0])
1146 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1147
1148 KnownBits SrcKnown;
1149 SDValue Src = Op.getOperand(0);
1150 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1151 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1152 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1153 return true;
1154
1155 // Upper elements are undef, so only get the knownbits if we just demand
1156 // the bottom element.
1157 if (DemandedElts == 1)
1158 Known = SrcKnown.anyextOrTrunc(BitWidth);
1159 break;
1160 }
1161 case ISD::BUILD_VECTOR:
1162 // Collect the known bits that are shared by every demanded element.
1163 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1164 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1165 return false; // Don't fall through, will infinitely loop.
1166 case ISD::SPLAT_VECTOR: {
1167 SDValue Scl = Op.getOperand(0);
1168 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1169 KnownBits KnownScl;
1170 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1171 return true;
1172
1173 // Implicitly truncate the bits to match the official semantics of
1174 // SPLAT_VECTOR.
1175 Known = KnownScl.trunc(BitWidth);
1176 break;
1177 }
1178 case ISD::LOAD: {
1179 auto *LD = cast<LoadSDNode>(Op);
1180 if (getTargetConstantFromLoad(LD)) {
1181 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1182 return false; // Don't fall through, will infinitely loop.
1183 }
1184 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1185 // If this is a ZEXTLoad and we are looking at the loaded value.
1186 EVT MemVT = LD->getMemoryVT();
1187 unsigned MemBits = MemVT.getScalarSizeInBits();
1188 Known.Zero.setBitsFrom(MemBits);
1189 return false; // Don't fall through, will infinitely loop.
1190 }
1191 break;
1192 }
1194 if (VT.isScalableVector())
1195 return false;
1196 SDValue Vec = Op.getOperand(0);
1197 SDValue Scl = Op.getOperand(1);
1198 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1199 EVT VecVT = Vec.getValueType();
1200
1201 // If index isn't constant, assume we need all vector elements AND the
1202 // inserted element.
1203 APInt DemandedVecElts(DemandedElts);
1204 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1205 unsigned Idx = CIdx->getZExtValue();
1206 DemandedVecElts.clearBit(Idx);
1207
1208 // Inserted element is not required.
1209 if (!DemandedElts[Idx])
1210 return TLO.CombineTo(Op, Vec);
1211 }
1212
1213 KnownBits KnownScl;
1214 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1215 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1216 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1217 return true;
1218
1219 Known = KnownScl.anyextOrTrunc(BitWidth);
1220
1221 KnownBits KnownVec;
1222 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1223 Depth + 1))
1224 return true;
1225
1226 if (!!DemandedVecElts)
1227 Known = Known.intersectWith(KnownVec);
1228
1229 return false;
1230 }
1231 case ISD::INSERT_SUBVECTOR: {
1232 if (VT.isScalableVector())
1233 return false;
1234 // Demand any elements from the subvector and the remainder from the src its
1235 // inserted into.
1236 SDValue Src = Op.getOperand(0);
1237 SDValue Sub = Op.getOperand(1);
1238 uint64_t Idx = Op.getConstantOperandVal(2);
1239 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1240 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1241 APInt DemandedSrcElts = DemandedElts;
1242 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1243
1244 KnownBits KnownSub, KnownSrc;
1245 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1246 Depth + 1))
1247 return true;
1248 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1249 Depth + 1))
1250 return true;
1251
1252 Known.Zero.setAllBits();
1253 Known.One.setAllBits();
1254 if (!!DemandedSubElts)
1255 Known = Known.intersectWith(KnownSub);
1256 if (!!DemandedSrcElts)
1257 Known = Known.intersectWith(KnownSrc);
1258
1259 // Attempt to avoid multi-use src if we don't need anything from it.
1260 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1261 !DemandedSrcElts.isAllOnes()) {
1262 SDValue NewSub = SimplifyMultipleUseDemandedBits(
1263 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1264 SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1265 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1266 if (NewSub || NewSrc) {
1267 NewSub = NewSub ? NewSub : Sub;
1268 NewSrc = NewSrc ? NewSrc : Src;
1269 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1270 Op.getOperand(2));
1271 return TLO.CombineTo(Op, NewOp);
1272 }
1273 }
1274 break;
1275 }
1277 if (VT.isScalableVector())
1278 return false;
1279 // Offset the demanded elts by the subvector index.
1280 SDValue Src = Op.getOperand(0);
1281 if (Src.getValueType().isScalableVector())
1282 break;
1283 uint64_t Idx = Op.getConstantOperandVal(1);
1284 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1285 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1286
1287 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1288 Depth + 1))
1289 return true;
1290
1291 // Attempt to avoid multi-use src if we don't need anything from it.
1292 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1293 SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1294 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1295 if (DemandedSrc) {
1296 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1297 Op.getOperand(1));
1298 return TLO.CombineTo(Op, NewOp);
1299 }
1300 }
1301 break;
1302 }
1303 case ISD::CONCAT_VECTORS: {
1304 if (VT.isScalableVector())
1305 return false;
1306 Known.Zero.setAllBits();
1307 Known.One.setAllBits();
1308 EVT SubVT = Op.getOperand(0).getValueType();
1309 unsigned NumSubVecs = Op.getNumOperands();
1310 unsigned NumSubElts = SubVT.getVectorNumElements();
1311 for (unsigned i = 0; i != NumSubVecs; ++i) {
1312 APInt DemandedSubElts =
1313 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1314 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1315 Known2, TLO, Depth + 1))
1316 return true;
1317 // Known bits are shared by every demanded subvector element.
1318 if (!!DemandedSubElts)
1319 Known = Known.intersectWith(Known2);
1320 }
1321 break;
1322 }
1323 case ISD::VECTOR_SHUFFLE: {
1324 assert(!VT.isScalableVector());
1325 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1326
1327 // Collect demanded elements from shuffle operands..
1328 APInt DemandedLHS, DemandedRHS;
1329 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1330 DemandedRHS))
1331 break;
1332
1333 if (!!DemandedLHS || !!DemandedRHS) {
1334 SDValue Op0 = Op.getOperand(0);
1335 SDValue Op1 = Op.getOperand(1);
1336
1337 Known.Zero.setAllBits();
1338 Known.One.setAllBits();
1339 if (!!DemandedLHS) {
1340 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1341 Depth + 1))
1342 return true;
1343 Known = Known.intersectWith(Known2);
1344 }
1345 if (!!DemandedRHS) {
1346 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1347 Depth + 1))
1348 return true;
1349 Known = Known.intersectWith(Known2);
1350 }
1351
1352 // Attempt to avoid multi-use ops if we don't need anything from them.
1353 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1354 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1355 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1356 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1357 if (DemandedOp0 || DemandedOp1) {
1358 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1359 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1360 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1361 return TLO.CombineTo(Op, NewOp);
1362 }
1363 }
1364 break;
1365 }
1366 case ISD::AND: {
1367 SDValue Op0 = Op.getOperand(0);
1368 SDValue Op1 = Op.getOperand(1);
1369
1370 // If the RHS is a constant, check to see if the LHS would be zero without
1371 // using the bits from the RHS. Below, we use knowledge about the RHS to
1372 // simplify the LHS, here we're using information from the LHS to simplify
1373 // the RHS.
1374 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
1375 // Do not increment Depth here; that can cause an infinite loop.
1376 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1377 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1378 if ((LHSKnown.Zero & DemandedBits) ==
1379 (~RHSC->getAPIntValue() & DemandedBits))
1380 return TLO.CombineTo(Op, Op0);
1381
1382 // If any of the set bits in the RHS are known zero on the LHS, shrink
1383 // the constant.
1384 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1385 DemandedElts, TLO))
1386 return true;
1387
1388 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1389 // constant, but if this 'and' is only clearing bits that were just set by
1390 // the xor, then this 'and' can be eliminated by shrinking the mask of
1391 // the xor. For example, for a 32-bit X:
1392 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1393 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1394 LHSKnown.One == ~RHSC->getAPIntValue()) {
1395 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1396 return TLO.CombineTo(Op, Xor);
1397 }
1398 }
1399
1400 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1401 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1402 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1403 (Op0.getOperand(0).isUndef() ||
1405 Op0->hasOneUse()) {
1406 unsigned NumSubElts =
1408 unsigned SubIdx = Op0.getConstantOperandVal(2);
1409 APInt DemandedSub =
1410 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1411 KnownBits KnownSubMask =
1412 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1413 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1414 SDValue NewAnd =
1415 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1416 SDValue NewInsert =
1417 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1418 Op0.getOperand(1), Op0.getOperand(2));
1419 return TLO.CombineTo(Op, NewInsert);
1420 }
1421 }
1422
1423 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1424 Depth + 1))
1425 return true;
1426 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1427 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1428 Known2, TLO, Depth + 1))
1429 return true;
1430 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1431
1432 // If all of the demanded bits are known one on one side, return the other.
1433 // These bits cannot contribute to the result of the 'and'.
1434 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1435 return TLO.CombineTo(Op, Op0);
1436 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1437 return TLO.CombineTo(Op, Op1);
1438 // If all of the demanded bits in the inputs are known zeros, return zero.
1439 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1440 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1441 // If the RHS is a constant, see if we can simplify it.
1442 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1443 TLO))
1444 return true;
1445 // If the operation can be done in a smaller type, do so.
1446 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1447 return true;
1448
1449 // Attempt to avoid multi-use ops if we don't need anything from them.
1450 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1451 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1452 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1453 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1454 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1455 if (DemandedOp0 || DemandedOp1) {
1456 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1457 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1458 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1459 return TLO.CombineTo(Op, NewOp);
1460 }
1461 }
1462
1463 Known &= Known2;
1464 break;
1465 }
1466 case ISD::OR: {
1467 SDValue Op0 = Op.getOperand(0);
1468 SDValue Op1 = Op.getOperand(1);
1469 SDNodeFlags Flags = Op.getNode()->getFlags();
1470 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1471 Depth + 1)) {
1472 if (Flags.hasDisjoint()) {
1473 Flags.setDisjoint(false);
1474 Op->setFlags(Flags);
1475 }
1476 return true;
1477 }
1478 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1479 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1480 Known2, TLO, Depth + 1)) {
1481 if (Flags.hasDisjoint()) {
1482 Flags.setDisjoint(false);
1483 Op->setFlags(Flags);
1484 }
1485 return true;
1486 }
1487 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1488
1489 // If all of the demanded bits are known zero on one side, return the other.
1490 // These bits cannot contribute to the result of the 'or'.
1491 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1492 return TLO.CombineTo(Op, Op0);
1493 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1494 return TLO.CombineTo(Op, Op1);
1495 // If the RHS is a constant, see if we can simplify it.
1496 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1497 return true;
1498 // If the operation can be done in a smaller type, do so.
1499 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1500 return true;
1501
1502 // Attempt to avoid multi-use ops if we don't need anything from them.
1503 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1504 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1505 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1506 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1507 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1508 if (DemandedOp0 || DemandedOp1) {
1509 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1510 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1511 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1512 return TLO.CombineTo(Op, NewOp);
1513 }
1514 }
1515
1516 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1517 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1518 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1519 Op0->hasOneUse() && Op1->hasOneUse()) {
1520 // Attempt to match all commutations - m_c_Or would've been useful!
1521 for (int I = 0; I != 2; ++I) {
1522 SDValue X = Op.getOperand(I).getOperand(0);
1523 SDValue C1 = Op.getOperand(I).getOperand(1);
1524 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1525 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1526 if (Alt.getOpcode() == ISD::OR) {
1527 for (int J = 0; J != 2; ++J) {
1528 if (X == Alt.getOperand(J)) {
1529 SDValue Y = Alt.getOperand(1 - J);
1530 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1531 {C1, C2})) {
1532 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1533 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1534 return TLO.CombineTo(
1535 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1536 }
1537 }
1538 }
1539 }
1540 }
1541 }
1542
1543 Known |= Known2;
1544 break;
1545 }
1546 case ISD::XOR: {
1547 SDValue Op0 = Op.getOperand(0);
1548 SDValue Op1 = Op.getOperand(1);
1549
1550 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1551 Depth + 1))
1552 return true;
1553 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1554 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1555 Depth + 1))
1556 return true;
1557 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1558
1559 // If all of the demanded bits are known zero on one side, return the other.
1560 // These bits cannot contribute to the result of the 'xor'.
1561 if (DemandedBits.isSubsetOf(Known.Zero))
1562 return TLO.CombineTo(Op, Op0);
1563 if (DemandedBits.isSubsetOf(Known2.Zero))
1564 return TLO.CombineTo(Op, Op1);
1565 // If the operation can be done in a smaller type, do so.
1566 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1567 return true;
1568
1569 // If all of the unknown bits are known to be zero on one side or the other
1570 // turn this into an *inclusive* or.
1571 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1572 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1573 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1574
1575 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1576 if (C) {
1577 // If one side is a constant, and all of the set bits in the constant are
1578 // also known set on the other side, turn this into an AND, as we know
1579 // the bits will be cleared.
1580 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1581 // NB: it is okay if more bits are known than are requested
1582 if (C->getAPIntValue() == Known2.One) {
1583 SDValue ANDC =
1584 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1585 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1586 }
1587
1588 // If the RHS is a constant, see if we can change it. Don't alter a -1
1589 // constant because that's a 'not' op, and that is better for combining
1590 // and codegen.
1591 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1592 // We're flipping all demanded bits. Flip the undemanded bits too.
1593 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1594 return TLO.CombineTo(Op, New);
1595 }
1596
1597 unsigned Op0Opcode = Op0.getOpcode();
1598 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1599 if (ConstantSDNode *ShiftC =
1600 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1601 // Don't crash on an oversized shift. We can not guarantee that a
1602 // bogus shift has been simplified to undef.
1603 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1604 uint64_t ShiftAmt = ShiftC->getZExtValue();
1606 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1607 : Ones.lshr(ShiftAmt);
1608 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
1609 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1610 TLI.isDesirableToCommuteXorWithShift(Op.getNode())) {
1611 // If the xor constant is a demanded mask, do a 'not' before the
1612 // shift:
1613 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1614 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1615 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1616 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1617 Op0.getOperand(1)));
1618 }
1619 }
1620 }
1621 }
1622 }
1623
1624 // If we can't turn this into a 'not', try to shrink the constant.
1625 if (!C || !C->isAllOnes())
1626 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1627 return true;
1628
1629 // Attempt to avoid multi-use ops if we don't need anything from them.
1630 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1631 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1632 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1633 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1634 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1635 if (DemandedOp0 || DemandedOp1) {
1636 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1637 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1638 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1639 return TLO.CombineTo(Op, NewOp);
1640 }
1641 }
1642
1643 Known ^= Known2;
1644 break;
1645 }
1646 case ISD::SELECT:
1647 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1648 Known, TLO, Depth + 1))
1649 return true;
1650 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1651 Known2, TLO, Depth + 1))
1652 return true;
1653 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1654 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1655
1656 // If the operands are constants, see if we can simplify them.
1657 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1658 return true;
1659
1660 // Only known if known in both the LHS and RHS.
1661 Known = Known.intersectWith(Known2);
1662 break;
1663 case ISD::VSELECT:
1664 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1665 Known, TLO, Depth + 1))
1666 return true;
1667 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1668 Known2, TLO, Depth + 1))
1669 return true;
1670 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1671 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1672
1673 // Only known if known in both the LHS and RHS.
1674 Known = Known.intersectWith(Known2);
1675 break;
1676 case ISD::SELECT_CC:
1677 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1678 Known, TLO, Depth + 1))
1679 return true;
1680 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1681 Known2, TLO, Depth + 1))
1682 return true;
1683 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1684 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1685
1686 // If the operands are constants, see if we can simplify them.
1687 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1688 return true;
1689
1690 // Only known if known in both the LHS and RHS.
1691 Known = Known.intersectWith(Known2);
1692 break;
1693 case ISD::SETCC: {
1694 SDValue Op0 = Op.getOperand(0);
1695 SDValue Op1 = Op.getOperand(1);
1696 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1697 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1698 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1699 // -1, we may be able to bypass the setcc.
1700 if (DemandedBits.isSignMask() &&
1704 // If we're testing X < 0, then this compare isn't needed - just use X!
1705 // FIXME: We're limiting to integer types here, but this should also work
1706 // if we don't care about FP signed-zero. The use of SETLT with FP means
1707 // that we don't care about NaNs.
1708 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1710 return TLO.CombineTo(Op, Op0);
1711
1712 // TODO: Should we check for other forms of sign-bit comparisons?
1713 // Examples: X <= -1, X >= 0
1714 }
1715 if (getBooleanContents(Op0.getValueType()) ==
1717 BitWidth > 1)
1718 Known.Zero.setBitsFrom(1);
1719 break;
1720 }
1721 case ISD::SHL: {
1722 SDValue Op0 = Op.getOperand(0);
1723 SDValue Op1 = Op.getOperand(1);
1724 EVT ShiftVT = Op1.getValueType();
1725
1726 if (const APInt *SA =
1727 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1728 unsigned ShAmt = SA->getZExtValue();
1729 if (ShAmt == 0)
1730 return TLO.CombineTo(Op, Op0);
1731
1732 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1733 // single shift. We can do this if the bottom bits (which are shifted
1734 // out) are never demanded.
1735 // TODO - support non-uniform vector amounts.
1736 if (Op0.getOpcode() == ISD::SRL) {
1737 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1738 if (const APInt *SA2 =
1739 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1740 unsigned C1 = SA2->getZExtValue();
1741 unsigned Opc = ISD::SHL;
1742 int Diff = ShAmt - C1;
1743 if (Diff < 0) {
1744 Diff = -Diff;
1745 Opc = ISD::SRL;
1746 }
1747 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1748 return TLO.CombineTo(
1749 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1750 }
1751 }
1752 }
1753
1754 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1755 // are not demanded. This will likely allow the anyext to be folded away.
1756 // TODO - support non-uniform vector amounts.
1757 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1758 SDValue InnerOp = Op0.getOperand(0);
1759 EVT InnerVT = InnerOp.getValueType();
1760 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1761 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1762 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1763 SDValue NarrowShl = TLO.DAG.getNode(
1764 ISD::SHL, dl, InnerVT, InnerOp,
1765 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1766 return TLO.CombineTo(
1767 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1768 }
1769
1770 // Repeat the SHL optimization above in cases where an extension
1771 // intervenes: (shl (anyext (shr x, c1)), c2) to
1772 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1773 // aren't demanded (as above) and that the shifted upper c1 bits of
1774 // x aren't demanded.
1775 // TODO - support non-uniform vector amounts.
1776 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1777 InnerOp.hasOneUse()) {
1778 if (const APInt *SA2 =
1779 TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
1780 unsigned InnerShAmt = SA2->getZExtValue();
1781 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1782 DemandedBits.getActiveBits() <=
1783 (InnerBits - InnerShAmt + ShAmt) &&
1784 DemandedBits.countr_zero() >= ShAmt) {
1785 SDValue NewSA =
1786 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1787 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1788 InnerOp.getOperand(0));
1789 return TLO.CombineTo(
1790 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1791 }
1792 }
1793 }
1794 }
1795
1796 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1797 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1798 Depth + 1)) {
1799 SDNodeFlags Flags = Op.getNode()->getFlags();
1800 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1801 // Disable the nsw and nuw flags. We can no longer guarantee that we
1802 // won't wrap after simplification.
1803 Flags.setNoSignedWrap(false);
1804 Flags.setNoUnsignedWrap(false);
1805 Op->setFlags(Flags);
1806 }
1807 return true;
1808 }
1809 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1810 Known.Zero <<= ShAmt;
1811 Known.One <<= ShAmt;
1812 // low bits known zero.
1813 Known.Zero.setLowBits(ShAmt);
1814
1815 // Attempt to avoid multi-use ops if we don't need anything from them.
1816 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1817 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1818 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1819 if (DemandedOp0) {
1820 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1821 return TLO.CombineTo(Op, NewOp);
1822 }
1823 }
1824
1825 // Try shrinking the operation as long as the shift amount will still be
1826 // in range.
1827 if ((ShAmt < DemandedBits.getActiveBits()) &&
1828 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1829 return true;
1830
1831 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1832 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1833 // Only do this if we demand the upper half so the knownbits are correct.
1834 unsigned HalfWidth = BitWidth / 2;
1835 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1836 DemandedBits.countLeadingOnes() >= HalfWidth) {
1837 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1838 if (isNarrowingProfitable(VT, HalfVT) &&
1839 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1840 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1841 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1842 // If we're demanding the upper bits at all, we must ensure
1843 // that the upper bits of the shift result are known to be zero,
1844 // which is equivalent to the narrow shift being NUW.
1845 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1846 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1847 SDNodeFlags Flags;
1848 Flags.setNoSignedWrap(IsNSW);
1849 Flags.setNoUnsignedWrap(IsNUW);
1850 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1851 SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
1852 ShAmt, HalfVT, dl, TLO.LegalTypes());
1853 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1854 NewShiftAmt, Flags);
1855 SDValue NewExt =
1856 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1857 return TLO.CombineTo(Op, NewExt);
1858 }
1859 }
1860 }
1861 } else {
1862 // This is a variable shift, so we can't shift the demand mask by a known
1863 // amount. But if we are not demanding high bits, then we are not
1864 // demanding those bits from the pre-shifted operand either.
1865 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1866 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1867 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1868 Depth + 1)) {
1869 SDNodeFlags Flags = Op.getNode()->getFlags();
1870 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1871 // Disable the nsw and nuw flags. We can no longer guarantee that we
1872 // won't wrap after simplification.
1873 Flags.setNoSignedWrap(false);
1874 Flags.setNoUnsignedWrap(false);
1875 Op->setFlags(Flags);
1876 }
1877 return true;
1878 }
1879 Known.resetAll();
1880 }
1881 }
1882
1883 // If we are only demanding sign bits then we can use the shift source
1884 // directly.
1885 if (const APInt *MaxSA =
1886 TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
1887 unsigned ShAmt = MaxSA->getZExtValue();
1888 unsigned NumSignBits =
1889 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1890 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1891 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1892 return TLO.CombineTo(Op, Op0);
1893 }
1894 break;
1895 }
1896 case ISD::SRL: {
1897 SDValue Op0 = Op.getOperand(0);
1898 SDValue Op1 = Op.getOperand(1);
1899 EVT ShiftVT = Op1.getValueType();
1900
1901 // Try to match AVG patterns.
1902 if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
1903 DemandedElts, Depth + 1))
1904 return TLO.CombineTo(Op, AVG);
1905
1906 if (const APInt *SA =
1907 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1908 unsigned ShAmt = SA->getZExtValue();
1909 if (ShAmt == 0)
1910 return TLO.CombineTo(Op, Op0);
1911
1912 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1913 // single shift. We can do this if the top bits (which are shifted out)
1914 // are never demanded.
1915 // TODO - support non-uniform vector amounts.
1916 if (Op0.getOpcode() == ISD::SHL) {
1917 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1918 if (const APInt *SA2 =
1919 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1920 unsigned C1 = SA2->getZExtValue();
1921 unsigned Opc = ISD::SRL;
1922 int Diff = ShAmt - C1;
1923 if (Diff < 0) {
1924 Diff = -Diff;
1925 Opc = ISD::SHL;
1926 }
1927 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1928 return TLO.CombineTo(
1929 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1930 }
1931 }
1932 }
1933
1934 APInt InDemandedMask = (DemandedBits << ShAmt);
1935
1936 // If the shift is exact, then it does demand the low bits (and knows that
1937 // they are zero).
1938 if (Op->getFlags().hasExact())
1939 InDemandedMask.setLowBits(ShAmt);
1940
1941 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1942 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
1943 if ((BitWidth % 2) == 0 && !VT.isVector()) {
1945 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
1946 if (isNarrowingProfitable(VT, HalfVT) &&
1947 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
1948 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1949 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
1950 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
1951 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
1952 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1953 SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
1954 ShAmt, HalfVT, dl, TLO.LegalTypes());
1955 SDValue NewShift =
1956 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
1957 return TLO.CombineTo(
1958 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
1959 }
1960 }
1961
1962 // Compute the new bits that are at the top now.
1963 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1964 Depth + 1))
1965 return true;
1966 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1967 Known.Zero.lshrInPlace(ShAmt);
1968 Known.One.lshrInPlace(ShAmt);
1969 // High bits known zero.
1970 Known.Zero.setHighBits(ShAmt);
1971
1972 // Attempt to avoid multi-use ops if we don't need anything from them.
1973 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1974 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1975 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1976 if (DemandedOp0) {
1977 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
1978 return TLO.CombineTo(Op, NewOp);
1979 }
1980 }
1981 } else {
1982 // Use generic knownbits computation as it has support for non-uniform
1983 // shift amounts.
1984 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1985 }
1986 break;
1987 }
1988 case ISD::SRA: {
1989 SDValue Op0 = Op.getOperand(0);
1990 SDValue Op1 = Op.getOperand(1);
1991 EVT ShiftVT = Op1.getValueType();
1992
1993 // If we only want bits that already match the signbit then we don't need
1994 // to shift.
1995 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
1996 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
1997 NumHiDemandedBits)
1998 return TLO.CombineTo(Op, Op0);
1999
2000 // If this is an arithmetic shift right and only the low-bit is set, we can
2001 // always convert this into a logical shr, even if the shift amount is
2002 // variable. The low bit of the shift cannot be an input sign bit unless
2003 // the shift amount is >= the size of the datatype, which is undefined.
2004 if (DemandedBits.isOne())
2005 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2006
2007 // Try to match AVG patterns.
2008 if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
2009 DemandedElts, Depth + 1))
2010 return TLO.CombineTo(Op, AVG);
2011
2012 if (const APInt *SA =
2013 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
2014 unsigned ShAmt = SA->getZExtValue();
2015 if (ShAmt == 0)
2016 return TLO.CombineTo(Op, Op0);
2017
2018 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2019 // supports sext_inreg.
2020 if (Op0.getOpcode() == ISD::SHL) {
2021 if (const APInt *InnerSA =
2022 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
2023 unsigned LowBits = BitWidth - ShAmt;
2024 EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
2025 if (VT.isVector())
2026 ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
2028
2029 if (*InnerSA == ShAmt) {
2030 if (!TLO.LegalOperations() ||
2032 return TLO.CombineTo(
2033 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2034 Op0.getOperand(0),
2035 TLO.DAG.getValueType(ExtVT)));
2036
2037 // Even if we can't convert to sext_inreg, we might be able to
2038 // remove this shift pair if the input is already sign extended.
2039 unsigned NumSignBits =
2040 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2041 if (NumSignBits > ShAmt)
2042 return TLO.CombineTo(Op, Op0.getOperand(0));
2043 }
2044 }
2045 }
2046
2047 APInt InDemandedMask = (DemandedBits << ShAmt);
2048
2049 // If the shift is exact, then it does demand the low bits (and knows that
2050 // they are zero).
2051 if (Op->getFlags().hasExact())
2052 InDemandedMask.setLowBits(ShAmt);
2053
2054 // If any of the demanded bits are produced by the sign extension, we also
2055 // demand the input sign bit.
2056 if (DemandedBits.countl_zero() < ShAmt)
2057 InDemandedMask.setSignBit();
2058
2059 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2060 Depth + 1))
2061 return true;
2062 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2063 Known.Zero.lshrInPlace(ShAmt);
2064 Known.One.lshrInPlace(ShAmt);
2065
2066 // If the input sign bit is known to be zero, or if none of the top bits
2067 // are demanded, turn this into an unsigned shift right.
2068 if (Known.Zero[BitWidth - ShAmt - 1] ||
2069 DemandedBits.countl_zero() >= ShAmt) {
2070 SDNodeFlags Flags;
2071 Flags.setExact(Op->getFlags().hasExact());
2072 return TLO.CombineTo(
2073 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2074 }
2075
2076 int Log2 = DemandedBits.exactLogBase2();
2077 if (Log2 >= 0) {
2078 // The bit must come from the sign.
2079 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2080 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2081 }
2082
2083 if (Known.One[BitWidth - ShAmt - 1])
2084 // New bits are known one.
2085 Known.One.setHighBits(ShAmt);
2086
2087 // Attempt to avoid multi-use ops if we don't need anything from them.
2088 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2089 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2090 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2091 if (DemandedOp0) {
2092 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2093 return TLO.CombineTo(Op, NewOp);
2094 }
2095 }
2096 }
2097 break;
2098 }
2099 case ISD::FSHL:
2100 case ISD::FSHR: {
2101 SDValue Op0 = Op.getOperand(0);
2102 SDValue Op1 = Op.getOperand(1);
2103 SDValue Op2 = Op.getOperand(2);
2104 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2105
2106 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2107 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2108
2109 // For fshl, 0-shift returns the 1st arg.
2110 // For fshr, 0-shift returns the 2nd arg.
2111 if (Amt == 0) {
2112 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2113 Known, TLO, Depth + 1))
2114 return true;
2115 break;
2116 }
2117
2118 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2119 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2120 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2121 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2122 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2123 Depth + 1))
2124 return true;
2125 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2126 Depth + 1))
2127 return true;
2128
2129 Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2130 Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2131 Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2132 Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2133 Known = Known.unionWith(Known2);
2134
2135 // Attempt to avoid multi-use ops if we don't need anything from them.
2136 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2137 !DemandedElts.isAllOnes()) {
2138 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2139 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2140 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2141 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2142 if (DemandedOp0 || DemandedOp1) {
2143 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2144 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2145 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2146 DemandedOp1, Op2);
2147 return TLO.CombineTo(Op, NewOp);
2148 }
2149 }
2150 }
2151
2152 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2153 if (isPowerOf2_32(BitWidth)) {
2154 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2155 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2156 Known2, TLO, Depth + 1))
2157 return true;
2158 }
2159 break;
2160 }
2161 case ISD::ROTL:
2162 case ISD::ROTR: {
2163 SDValue Op0 = Op.getOperand(0);
2164 SDValue Op1 = Op.getOperand(1);
2165 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2166
2167 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2168 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2169 return TLO.CombineTo(Op, Op0);
2170
2171 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2172 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2173 unsigned RevAmt = BitWidth - Amt;
2174
2175 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2176 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2177 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2178 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2179 Depth + 1))
2180 return true;
2181
2182 // rot*(x, 0) --> x
2183 if (Amt == 0)
2184 return TLO.CombineTo(Op, Op0);
2185
2186 // See if we don't demand either half of the rotated bits.
2187 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2188 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2189 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2190 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2191 }
2192 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2193 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2194 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2195 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2196 }
2197 }
2198
2199 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2200 if (isPowerOf2_32(BitWidth)) {
2201 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2202 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2203 Depth + 1))
2204 return true;
2205 }
2206 break;
2207 }
2208 case ISD::SMIN:
2209 case ISD::SMAX:
2210 case ISD::UMIN:
2211 case ISD::UMAX: {
2212 unsigned Opc = Op.getOpcode();
2213 SDValue Op0 = Op.getOperand(0);
2214 SDValue Op1 = Op.getOperand(1);
2215
2216 // If we're only demanding signbits, then we can simplify to OR/AND node.
2217 unsigned BitOp =
2218 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2219 unsigned NumSignBits =
2220 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2221 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2222 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2223 if (NumSignBits >= NumDemandedUpperBits)
2224 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2225
2226 // Check if one arg is always less/greater than (or equal) to the other arg.
2227 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2228 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2229 switch (Opc) {
2230 case ISD::SMIN:
2231 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2232 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2233 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2234 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2235 Known = KnownBits::smin(Known0, Known1);
2236 break;
2237 case ISD::SMAX:
2238 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2239 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2240 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2241 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2242 Known = KnownBits::smax(Known0, Known1);
2243 break;
2244 case ISD::UMIN:
2245 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2246 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2247 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2248 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2249 Known = KnownBits::umin(Known0, Known1);
2250 break;
2251 case ISD::UMAX:
2252 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2253 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2254 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2255 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2256 Known = KnownBits::umax(Known0, Known1);
2257 break;
2258 }
2259 break;
2260 }
2261 case ISD::BITREVERSE: {
2262 SDValue Src = Op.getOperand(0);
2263 APInt DemandedSrcBits = DemandedBits.reverseBits();
2264 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2265 Depth + 1))
2266 return true;
2267 Known.One = Known2.One.reverseBits();
2268 Known.Zero = Known2.Zero.reverseBits();
2269 break;
2270 }
2271 case ISD::BSWAP: {
2272 SDValue Src = Op.getOperand(0);
2273
2274 // If the only bits demanded come from one byte of the bswap result,
2275 // just shift the input byte into position to eliminate the bswap.
2276 unsigned NLZ = DemandedBits.countl_zero();
2277 unsigned NTZ = DemandedBits.countr_zero();
2278
2279 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2280 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2281 // have 14 leading zeros, round to 8.
2282 NLZ = alignDown(NLZ, 8);
2283 NTZ = alignDown(NTZ, 8);
2284 // If we need exactly one byte, we can do this transformation.
2285 if (BitWidth - NLZ - NTZ == 8) {
2286 // Replace this with either a left or right shift to get the byte into
2287 // the right place.
2288 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2289 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2290 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2291 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2292 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2293 return TLO.CombineTo(Op, NewOp);
2294 }
2295 }
2296
2297 APInt DemandedSrcBits = DemandedBits.byteSwap();
2298 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2299 Depth + 1))
2300 return true;
2301 Known.One = Known2.One.byteSwap();
2302 Known.Zero = Known2.Zero.byteSwap();
2303 break;
2304 }
2305 case ISD::CTPOP: {
2306 // If only 1 bit is demanded, replace with PARITY as long as we're before
2307 // op legalization.
2308 // FIXME: Limit to scalars for now.
2309 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2310 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2311 Op.getOperand(0)));
2312
2313 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2314 break;
2315 }
2317 SDValue Op0 = Op.getOperand(0);
2318 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2319 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2320
2321 // If we only care about the highest bit, don't bother shifting right.
2322 if (DemandedBits.isSignMask()) {
2323 unsigned MinSignedBits =
2324 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2325 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2326 // However if the input is already sign extended we expect the sign
2327 // extension to be dropped altogether later and do not simplify.
2328 if (!AlreadySignExtended) {
2329 // Compute the correct shift amount type, which must be getShiftAmountTy
2330 // for scalar types after legalization.
2331 SDValue ShiftAmt =
2332 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2333 return TLO.CombineTo(Op,
2334 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2335 }
2336 }
2337
2338 // If none of the extended bits are demanded, eliminate the sextinreg.
2339 if (DemandedBits.getActiveBits() <= ExVTBits)
2340 return TLO.CombineTo(Op, Op0);
2341
2342 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2343
2344 // Since the sign extended bits are demanded, we know that the sign
2345 // bit is demanded.
2346 InputDemandedBits.setBit(ExVTBits - 1);
2347
2348 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2349 Depth + 1))
2350 return true;
2351 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2352
2353 // If the sign bit of the input is known set or clear, then we know the
2354 // top bits of the result.
2355
2356 // If the input sign bit is known zero, convert this into a zero extension.
2357 if (Known.Zero[ExVTBits - 1])
2358 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2359
2360 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2361 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2362 Known.One.setBitsFrom(ExVTBits);
2363 Known.Zero &= Mask;
2364 } else { // Input sign bit unknown
2365 Known.Zero &= Mask;
2366 Known.One &= Mask;
2367 }
2368 break;
2369 }
2370 case ISD::BUILD_PAIR: {
2371 EVT HalfVT = Op.getOperand(0).getValueType();
2372 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2373
2374 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2375 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2376
2377 KnownBits KnownLo, KnownHi;
2378
2379 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2380 return true;
2381
2382 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2383 return true;
2384
2385 Known = KnownHi.concat(KnownLo);
2386 break;
2387 }
2389 if (VT.isScalableVector())
2390 return false;
2391 [[fallthrough]];
2392 case ISD::ZERO_EXTEND: {
2393 SDValue Src = Op.getOperand(0);
2394 EVT SrcVT = Src.getValueType();
2395 unsigned InBits = SrcVT.getScalarSizeInBits();
2396 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2397 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2398
2399 // If none of the top bits are demanded, convert this into an any_extend.
2400 if (DemandedBits.getActiveBits() <= InBits) {
2401 // If we only need the non-extended bits of the bottom element
2402 // then we can just bitcast to the result.
2403 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2404 VT.getSizeInBits() == SrcVT.getSizeInBits())
2405 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2406
2407 unsigned Opc =
2409 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2410 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2411 }
2412
2413 SDNodeFlags Flags = Op->getFlags();
2414 APInt InDemandedBits = DemandedBits.trunc(InBits);
2415 APInt InDemandedElts = DemandedElts.zext(InElts);
2416 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2417 Depth + 1)) {
2418 if (Flags.hasNonNeg()) {
2419 Flags.setNonNeg(false);
2420 Op->setFlags(Flags);
2421 }
2422 return true;
2423 }
2424 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2425 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2426 Known = Known.zext(BitWidth);
2427
2428 // Attempt to avoid multi-use ops if we don't need anything from them.
2429 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2430 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2431 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2432 break;
2433 }
2435 if (VT.isScalableVector())
2436 return false;
2437 [[fallthrough]];
2438 case ISD::SIGN_EXTEND: {
2439 SDValue Src = Op.getOperand(0);
2440 EVT SrcVT = Src.getValueType();
2441 unsigned InBits = SrcVT.getScalarSizeInBits();
2442 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2443 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2444
2445 APInt InDemandedElts = DemandedElts.zext(InElts);
2446 APInt InDemandedBits = DemandedBits.trunc(InBits);
2447
2448 // Since some of the sign extended bits are demanded, we know that the sign
2449 // bit is demanded.
2450 InDemandedBits.setBit(InBits - 1);
2451
2452 // If none of the top bits are demanded, convert this into an any_extend.
2453 if (DemandedBits.getActiveBits() <= InBits) {
2454 // If we only need the non-extended bits of the bottom element
2455 // then we can just bitcast to the result.
2456 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2457 VT.getSizeInBits() == SrcVT.getSizeInBits())
2458 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2459
2460 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2462 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2463 InBits) {
2464 unsigned Opc =
2466 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2467 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2468 }
2469 }
2470
2471 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2472 Depth + 1))
2473 return true;
2474 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2475 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2476
2477 // If the sign bit is known one, the top bits match.
2478 Known = Known.sext(BitWidth);
2479
2480 // If the sign bit is known zero, convert this to a zero extend.
2481 if (Known.isNonNegative()) {
2482 unsigned Opc =
2484 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2485 SDNodeFlags Flags;
2486 if (!IsVecInReg)
2487 Flags.setNonNeg(true);
2488 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2489 }
2490 }
2491
2492 // Attempt to avoid multi-use ops if we don't need anything from them.
2493 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2494 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2495 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2496 break;
2497 }
2499 if (VT.isScalableVector())
2500 return false;
2501 [[fallthrough]];
2502 case ISD::ANY_EXTEND: {
2503 SDValue Src = Op.getOperand(0);
2504 EVT SrcVT = Src.getValueType();
2505 unsigned InBits = SrcVT.getScalarSizeInBits();
2506 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2507 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2508
2509 // If we only need the bottom element then we can just bitcast.
2510 // TODO: Handle ANY_EXTEND?
2511 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2512 VT.getSizeInBits() == SrcVT.getSizeInBits())
2513 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2514
2515 APInt InDemandedBits = DemandedBits.trunc(InBits);
2516 APInt InDemandedElts = DemandedElts.zext(InElts);
2517 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2518 Depth + 1))
2519 return true;
2520 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2521 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2522 Known = Known.anyext(BitWidth);
2523
2524 // Attempt to avoid multi-use ops if we don't need anything from them.
2525 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2526 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2527 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2528 break;
2529 }
2530 case ISD::TRUNCATE: {
2531 SDValue Src = Op.getOperand(0);
2532
2533 // Simplify the input, using demanded bit information, and compute the known
2534 // zero/one bits live out.
2535 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2536 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2537 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2538 Depth + 1))
2539 return true;
2540 Known = Known.trunc(BitWidth);
2541
2542 // Attempt to avoid multi-use ops if we don't need anything from them.
2543 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2544 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2545 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2546
2547 // If the input is only used by this truncate, see if we can shrink it based
2548 // on the known demanded bits.
2549 switch (Src.getOpcode()) {
2550 default:
2551 break;
2552 case ISD::SRL:
2553 // Shrink SRL by a constant if none of the high bits shifted in are
2554 // demanded.
2555 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2556 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2557 // undesirable.
2558 break;
2559
2560 if (Src.getNode()->hasOneUse()) {
2561 const APInt *ShAmtC =
2562 TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
2563 if (!ShAmtC || ShAmtC->uge(BitWidth))
2564 break;
2565 uint64_t ShVal = ShAmtC->getZExtValue();
2566
2567 APInt HighBits =
2568 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2569 HighBits.lshrInPlace(ShVal);
2570 HighBits = HighBits.trunc(BitWidth);
2571
2572 if (!(HighBits & DemandedBits)) {
2573 // None of the shifted in bits are needed. Add a truncate of the
2574 // shift input, then shift it.
2575 SDValue NewShAmt =
2576 TLO.DAG.getShiftAmountConstant(ShVal, VT, dl, TLO.LegalTypes());
2577 SDValue NewTrunc =
2578 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2579 return TLO.CombineTo(
2580 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2581 }
2582 }
2583 break;
2584 }
2585
2586 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2587 break;
2588 }
2589 case ISD::AssertZext: {
2590 // AssertZext demands all of the high bits, plus any of the low bits
2591 // demanded by its users.
2592 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2594 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2595 TLO, Depth + 1))
2596 return true;
2597 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2598
2599 Known.Zero |= ~InMask;
2600 Known.One &= (~Known.Zero);
2601 break;
2602 }
2604 SDValue Src = Op.getOperand(0);
2605 SDValue Idx = Op.getOperand(1);
2606 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2607 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2608
2609 if (SrcEltCnt.isScalable())
2610 return false;
2611
2612 // Demand the bits from every vector element without a constant index.
2613 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2614 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2615 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2616 if (CIdx->getAPIntValue().ult(NumSrcElts))
2617 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2618
2619 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2620 // anything about the extended bits.
2621 APInt DemandedSrcBits = DemandedBits;
2622 if (BitWidth > EltBitWidth)
2623 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2624
2625 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2626 Depth + 1))
2627 return true;
2628
2629 // Attempt to avoid multi-use ops if we don't need anything from them.
2630 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2631 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2632 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2633 SDValue NewOp =
2634 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2635 return TLO.CombineTo(Op, NewOp);
2636 }
2637 }
2638
2639 Known = Known2;
2640 if (BitWidth > EltBitWidth)
2641 Known = Known.anyext(BitWidth);
2642 break;
2643 }
2644 case ISD::BITCAST: {
2645 if (VT.isScalableVector())
2646 return false;
2647 SDValue Src = Op.getOperand(0);
2648 EVT SrcVT = Src.getValueType();
2649 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2650
2651 // If this is an FP->Int bitcast and if the sign bit is the only
2652 // thing demanded, turn this into a FGETSIGN.
2653 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2654 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2655 SrcVT.isFloatingPoint()) {
2656 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2657 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2658 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2659 SrcVT != MVT::f128) {
2660 // Cannot eliminate/lower SHL for f128 yet.
2661 EVT Ty = OpVTLegal ? VT : MVT::i32;
2662 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2663 // place. We expect the SHL to be eliminated by other optimizations.
2664 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2665 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2666 if (!OpVTLegal && OpVTSizeInBits > 32)
2667 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2668 unsigned ShVal = Op.getValueSizeInBits() - 1;
2669 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2670 return TLO.CombineTo(Op,
2671 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2672 }
2673 }
2674
2675 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2676 // Demand the elt/bit if any of the original elts/bits are demanded.
2677 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2678 unsigned Scale = BitWidth / NumSrcEltBits;
2679 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2680 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2681 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2682 for (unsigned i = 0; i != Scale; ++i) {
2683 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2684 unsigned BitOffset = EltOffset * NumSrcEltBits;
2685 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2686 if (!Sub.isZero()) {
2687 DemandedSrcBits |= Sub;
2688 for (unsigned j = 0; j != NumElts; ++j)
2689 if (DemandedElts[j])
2690 DemandedSrcElts.setBit((j * Scale) + i);
2691 }
2692 }
2693
2694 APInt KnownSrcUndef, KnownSrcZero;
2695 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2696 KnownSrcZero, TLO, Depth + 1))
2697 return true;
2698
2699 KnownBits KnownSrcBits;
2700 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2701 KnownSrcBits, TLO, Depth + 1))
2702 return true;
2703 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2704 // TODO - bigendian once we have test coverage.
2705 unsigned Scale = NumSrcEltBits / BitWidth;
2706 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2707 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2708 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2709 for (unsigned i = 0; i != NumElts; ++i)
2710 if (DemandedElts[i]) {
2711 unsigned Offset = (i % Scale) * BitWidth;
2712 DemandedSrcBits.insertBits(DemandedBits, Offset);
2713 DemandedSrcElts.setBit(i / Scale);
2714 }
2715
2716 if (SrcVT.isVector()) {
2717 APInt KnownSrcUndef, KnownSrcZero;
2718 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2719 KnownSrcZero, TLO, Depth + 1))
2720 return true;
2721 }
2722
2723 KnownBits KnownSrcBits;
2724 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2725 KnownSrcBits, TLO, Depth + 1))
2726 return true;
2727
2728 // Attempt to avoid multi-use ops if we don't need anything from them.
2729 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2730 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2731 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2732 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2733 return TLO.CombineTo(Op, NewOp);
2734 }
2735 }
2736 }
2737
2738 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2739 // recursive call where Known may be useful to the caller.
2740 if (Depth > 0) {
2741 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2742 return false;
2743 }
2744 break;
2745 }
2746 case ISD::MUL:
2747 if (DemandedBits.isPowerOf2()) {
2748 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2749 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2750 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2751 unsigned CTZ = DemandedBits.countr_zero();
2752 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2753 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2754 SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2755 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2756 return TLO.CombineTo(Op, Shl);
2757 }
2758 }
2759 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2760 // X * X is odd iff X is odd.
2761 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2762 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2763 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2764 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2765 return TLO.CombineTo(Op, And1);
2766 }
2767 [[fallthrough]];
2768 case ISD::ADD:
2769 case ISD::SUB: {
2770 // Add, Sub, and Mul don't demand any bits in positions beyond that
2771 // of the highest bit demanded of them.
2772 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2773 SDNodeFlags Flags = Op.getNode()->getFlags();
2774 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2775 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2776 KnownBits KnownOp0, KnownOp1;
2777 if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, KnownOp0, TLO,
2778 Depth + 1) ||
2779 SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2780 Depth + 1) ||
2781 // See if the operation should be performed at a smaller bit width.
2782 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2783 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2784 // Disable the nsw and nuw flags. We can no longer guarantee that we
2785 // won't wrap after simplification.
2786 Flags.setNoSignedWrap(false);
2787 Flags.setNoUnsignedWrap(false);
2788 Op->setFlags(Flags);
2789 }
2790 return true;
2791 }
2792
2793 // neg x with only low bit demanded is simply x.
2794 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2795 isNullConstant(Op0))
2796 return TLO.CombineTo(Op, Op1);
2797
2798 // Attempt to avoid multi-use ops if we don't need anything from them.
2799 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2800 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2801 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2802 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2803 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2804 if (DemandedOp0 || DemandedOp1) {
2805 Flags.setNoSignedWrap(false);
2806 Flags.setNoUnsignedWrap(false);
2807 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2808 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2809 SDValue NewOp =
2810 TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2811 return TLO.CombineTo(Op, NewOp);
2812 }
2813 }
2814
2815 // If we have a constant operand, we may be able to turn it into -1 if we
2816 // do not demand the high bits. This can make the constant smaller to
2817 // encode, allow more general folding, or match specialized instruction
2818 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2819 // is probably not useful (and could be detrimental).
2821 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2822 if (C && !C->isAllOnes() && !C->isOne() &&
2823 (C->getAPIntValue() | HighMask).isAllOnes()) {
2824 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2825 // Disable the nsw and nuw flags. We can no longer guarantee that we
2826 // won't wrap after simplification.
2827 Flags.setNoSignedWrap(false);
2828 Flags.setNoUnsignedWrap(false);
2829 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
2830 return TLO.CombineTo(Op, NewOp);
2831 }
2832
2833 // Match a multiply with a disguised negated-power-of-2 and convert to a
2834 // an equivalent shift-left amount.
2835 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2836 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2837 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2838 return 0;
2839
2840 // Don't touch opaque constants. Also, ignore zero and power-of-2
2841 // multiplies. Those will get folded later.
2842 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2843 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2844 !MulC->getAPIntValue().isPowerOf2()) {
2845 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2846 if (UnmaskedC.isNegatedPowerOf2())
2847 return (-UnmaskedC).logBase2();
2848 }
2849 return 0;
2850 };
2851
2852 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2853 unsigned ShlAmt) {
2854 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2855 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2856 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2857 return TLO.CombineTo(Op, Res);
2858 };
2859
2861 if (Op.getOpcode() == ISD::ADD) {
2862 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2863 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2864 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2865 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2866 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2867 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2868 }
2869 if (Op.getOpcode() == ISD::SUB) {
2870 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2871 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2872 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2873 }
2874 }
2875
2876 if (Op.getOpcode() == ISD::MUL) {
2877 Known = KnownBits::mul(KnownOp0, KnownOp1);
2878 } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2880 Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(),
2881 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2882 }
2883 break;
2884 }
2885 default:
2886 // We also ask the target about intrinsics (which could be specific to it).
2887 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2888 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2889 // TODO: Probably okay to remove after audit; here to reduce change size
2890 // in initial enablement patch for scalable vectors
2891 if (Op.getValueType().isScalableVector())
2892 break;
2893 if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2894 Known, TLO, Depth))
2895 return true;
2896 break;
2897 }
2898
2899 // Just use computeKnownBits to compute output bits.
2900 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2901 break;
2902 }
2903
2904 // If we know the value of all of the demanded bits, return this as a
2905 // constant.
2906 if (!isTargetCanonicalConstantNode(Op) &&
2907 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2908 // Avoid folding to a constant if any OpaqueConstant is involved.
2909 const SDNode *N = Op.getNode();
2910 for (SDNode *Op :
2912 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
2913 if (C->isOpaque())
2914 return false;
2915 }
2916 if (VT.isInteger())
2917 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2918 if (VT.isFloatingPoint())
2919 return TLO.CombineTo(
2920 Op,
2921 TLO.DAG.getConstantFP(
2922 APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
2923 }
2924
2925 // A multi use 'all demanded elts' simplify failed to find any knownbits.
2926 // Try again just for the original demanded elts.
2927 // Ensure we do this AFTER constant folding above.
2928 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
2929 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
2930
2931 return false;
2932}
2933
2935 const APInt &DemandedElts,
2936 DAGCombinerInfo &DCI) const {
2937 SelectionDAG &DAG = DCI.DAG;
2938 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2939 !DCI.isBeforeLegalizeOps());
2940
2941 APInt KnownUndef, KnownZero;
2942 bool Simplified =
2943 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2944 if (Simplified) {
2945 DCI.AddToWorklist(Op.getNode());
2946 DCI.CommitTargetLoweringOpt(TLO);
2947 }
2948
2949 return Simplified;
2950}
2951
2952/// Given a vector binary operation and known undefined elements for each input
2953/// operand, compute whether each element of the output is undefined.
2955 const APInt &UndefOp0,
2956 const APInt &UndefOp1) {
2957 EVT VT = BO.getValueType();
2959 "Vector binop only");
2960
2961 EVT EltVT = VT.getVectorElementType();
2962 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
2963 assert(UndefOp0.getBitWidth() == NumElts &&
2964 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2965
2966 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2967 const APInt &UndefVals) {
2968 if (UndefVals[Index])
2969 return DAG.getUNDEF(EltVT);
2970
2971 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
2972 // Try hard to make sure that the getNode() call is not creating temporary
2973 // nodes. Ignore opaque integers because they do not constant fold.
2974 SDValue Elt = BV->getOperand(Index);
2975 auto *C = dyn_cast<ConstantSDNode>(Elt);
2976 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2977 return Elt;
2978 }
2979
2980 return SDValue();
2981 };
2982
2983 APInt KnownUndef = APInt::getZero(NumElts);
2984 for (unsigned i = 0; i != NumElts; ++i) {
2985 // If both inputs for this element are either constant or undef and match
2986 // the element type, compute the constant/undef result for this element of
2987 // the vector.
2988 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2989 // not handle FP constants. The code within getNode() should be refactored
2990 // to avoid the danger of creating a bogus temporary node here.
2991 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
2992 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
2993 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
2994 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
2995 KnownUndef.setBit(i);
2996 }
2997 return KnownUndef;
2998}
2999
3001 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3002 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3003 bool AssumeSingleUse) const {
3004 EVT VT = Op.getValueType();
3005 unsigned Opcode = Op.getOpcode();
3006 APInt DemandedElts = OriginalDemandedElts;
3007 unsigned NumElts = DemandedElts.getBitWidth();
3008 assert(VT.isVector() && "Expected vector op");
3009
3010 KnownUndef = KnownZero = APInt::getZero(NumElts);
3011
3012 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
3013 if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
3014 return false;
3015
3016 // TODO: For now we assume we know nothing about scalable vectors.
3017 if (VT.isScalableVector())
3018 return false;
3019
3020 assert(VT.getVectorNumElements() == NumElts &&
3021 "Mask size mismatches value type element count!");
3022
3023 // Undef operand.
3024 if (Op.isUndef()) {
3025 KnownUndef.setAllBits();
3026 return false;
3027 }
3028
3029 // If Op has other users, assume that all elements are needed.
3030 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3031 DemandedElts.setAllBits();
3032
3033 // Not demanding any elements from Op.
3034 if (DemandedElts == 0) {
3035 KnownUndef.setAllBits();
3036 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3037 }
3038
3039 // Limit search depth.
3041 return false;
3042
3043 SDLoc DL(Op);
3044 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3045 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3046
3047 // Helper for demanding the specified elements and all the bits of both binary
3048 // operands.
3049 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3050 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3051 TLO.DAG, Depth + 1);
3052 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3053 TLO.DAG, Depth + 1);
3054 if (NewOp0 || NewOp1) {
3055 SDValue NewOp =
3056 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3057 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3058 return TLO.CombineTo(Op, NewOp);
3059 }
3060 return false;
3061 };
3062
3063 switch (Opcode) {
3064 case ISD::SCALAR_TO_VECTOR: {
3065 if (!DemandedElts[0]) {
3066 KnownUndef.setAllBits();
3067 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3068 }
3069 SDValue ScalarSrc = Op.getOperand(0);
3070 if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3071 SDValue Src = ScalarSrc.getOperand(0);
3072 SDValue Idx = ScalarSrc.getOperand(1);
3073 EVT SrcVT = Src.getValueType();
3074
3075 ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3076
3077 if (SrcEltCnt.isScalable())
3078 return false;
3079
3080 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3081 if (isNullConstant(Idx)) {
3082 APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
3083 APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
3084 APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
3085 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3086 TLO, Depth + 1))
3087 return true;
3088 }
3089 }
3090 KnownUndef.setHighBits(NumElts - 1);
3091 break;
3092 }
3093 case ISD::BITCAST: {
3094 SDValue Src = Op.getOperand(0);
3095 EVT SrcVT = Src.getValueType();
3096
3097 // We only handle vectors here.
3098 // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
3099 if (!SrcVT.isVector())
3100 break;
3101
3102 // Fast handling of 'identity' bitcasts.
3103 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3104 if (NumSrcElts == NumElts)
3105 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3106 KnownZero, TLO, Depth + 1);
3107
3108 APInt SrcDemandedElts, SrcZero, SrcUndef;
3109
3110 // Bitcast from 'large element' src vector to 'small element' vector, we
3111 // must demand a source element if any DemandedElt maps to it.
3112 if ((NumElts % NumSrcElts) == 0) {
3113 unsigned Scale = NumElts / NumSrcElts;
3114 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3115 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3116 TLO, Depth + 1))
3117 return true;
3118
3119 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3120 // of the large element.
3121 // TODO - bigendian once we have test coverage.
3122 if (IsLE) {
3123 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3124 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3125 for (unsigned i = 0; i != NumElts; ++i)
3126 if (DemandedElts[i]) {
3127 unsigned Ofs = (i % Scale) * EltSizeInBits;
3128 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3129 }
3130
3131 KnownBits Known;
3132 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3133 TLO, Depth + 1))
3134 return true;
3135
3136 // The bitcast has split each wide element into a number of
3137 // narrow subelements. We have just computed the Known bits
3138 // for wide elements. See if element splitting results in
3139 // some subelements being zero. Only for demanded elements!
3140 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3141 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3142 .isAllOnes())
3143 continue;
3144 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3145 unsigned Elt = Scale * SrcElt + SubElt;
3146 if (DemandedElts[Elt])
3147 KnownZero.setBit(Elt);
3148 }
3149 }
3150 }
3151
3152 // If the src element is zero/undef then all the output elements will be -
3153 // only demanded elements are guaranteed to be correct.
3154 for (unsigned i = 0; i != NumSrcElts; ++i) {
3155 if (SrcDemandedElts[i]) {
3156 if (SrcZero[i])
3157 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3158 if (SrcUndef[i])
3159 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3160 }
3161 }
3162 }
3163
3164 // Bitcast from 'small element' src vector to 'large element' vector, we
3165 // demand all smaller source elements covered by the larger demanded element
3166 // of this vector.
3167 if ((NumSrcElts % NumElts) == 0) {
3168 unsigned Scale = NumSrcElts / NumElts;
3169 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3170 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3171 TLO, Depth + 1))
3172 return true;
3173
3174 // If all the src elements covering an output element are zero/undef, then
3175 // the output element will be as well, assuming it was demanded.
3176 for (unsigned i = 0; i != NumElts; ++i) {
3177 if (DemandedElts[i]) {
3178 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3179 KnownZero.setBit(i);
3180 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3181 KnownUndef.setBit(i);
3182 }
3183 }
3184 }
3185 break;
3186 }
3187 case ISD::BUILD_VECTOR: {
3188 // Check all elements and simplify any unused elements with UNDEF.
3189 if (!DemandedElts.isAllOnes()) {
3190 // Don't simplify BROADCASTS.
3191 if (llvm::any_of(Op->op_values(),
3192 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3193 SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
3194 bool Updated = false;
3195 for (unsigned i = 0; i != NumElts; ++i) {
3196 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3197 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3198 KnownUndef.setBit(i);
3199 Updated = true;
3200 }
3201 }
3202 if (Updated)
3203 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3204 }
3205 }
3206 for (unsigned i = 0; i != NumElts; ++i) {
3207 SDValue SrcOp = Op.getOperand(i);
3208 if (SrcOp.isUndef()) {
3209 KnownUndef.setBit(i);
3210 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3212 KnownZero.setBit(i);
3213 }
3214 }
3215 break;
3216 }
3217 case ISD::CONCAT_VECTORS: {
3218 EVT SubVT = Op.getOperand(0).getValueType();
3219 unsigned NumSubVecs = Op.getNumOperands();
3220 unsigned NumSubElts = SubVT.getVectorNumElements();
3221 for (unsigned i = 0; i != NumSubVecs; ++i) {
3222 SDValue SubOp = Op.getOperand(i);
3223 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3224 APInt SubUndef, SubZero;
3225 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3226 Depth + 1))
3227 return true;
3228 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3229 KnownZero.insertBits(SubZero, i * NumSubElts);
3230 }
3231
3232 // Attempt to avoid multi-use ops if we don't need anything from them.
3233 if (!DemandedElts.isAllOnes()) {
3234 bool FoundNewSub = false;
3235 SmallVector<SDValue, 2> DemandedSubOps;
3236 for (unsigned i = 0; i != NumSubVecs; ++i) {
3237 SDValue SubOp = Op.getOperand(i);
3238 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3239 SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3240 SubOp, SubElts, TLO.DAG, Depth + 1);
3241 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3242 FoundNewSub = NewSubOp ? true : FoundNewSub;
3243 }
3244 if (FoundNewSub) {
3245 SDValue NewOp =
3246 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3247 return TLO.CombineTo(Op, NewOp);
3248 }
3249 }
3250 break;
3251 }
3252 case ISD::INSERT_SUBVECTOR: {
3253 // Demand any elements from the subvector and the remainder from the src its
3254 // inserted into.
3255 SDValue Src = Op.getOperand(0);
3256 SDValue Sub = Op.getOperand(1);
3257 uint64_t Idx = Op.getConstantOperandVal(2);
3258 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3259 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3260 APInt DemandedSrcElts = DemandedElts;
3261 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
3262
3263 APInt SubUndef, SubZero;
3264 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3265 Depth + 1))
3266 return true;
3267
3268 // If none of the src operand elements are demanded, replace it with undef.
3269 if (!DemandedSrcElts && !Src.isUndef())
3270 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3271 TLO.DAG.getUNDEF(VT), Sub,
3272 Op.getOperand(2)));
3273
3274 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3275 TLO, Depth + 1))
3276 return true;
3277 KnownUndef.insertBits(SubUndef, Idx);
3278 KnownZero.insertBits(SubZero, Idx);
3279
3280 // Attempt to avoid multi-use ops if we don't need anything from them.
3281 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3282 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3283 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3284 SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3285 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3286 if (NewSrc || NewSub) {
3287 NewSrc = NewSrc ? NewSrc : Src;
3288 NewSub = NewSub ? NewSub : Sub;
3289 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3290 NewSub, Op.getOperand(2));
3291 return TLO.CombineTo(Op, NewOp);
3292 }
3293 }
3294 break;
3295 }
3297 // Offset the demanded elts by the subvector index.
3298 SDValue Src = Op.getOperand(0);
3299 if (Src.getValueType().isScalableVector())
3300 break;
3301 uint64_t Idx = Op.getConstantOperandVal(1);
3302 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3303 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3304
3305 APInt SrcUndef, SrcZero;
3306 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3307 Depth + 1))
3308 return true;
3309 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3310 KnownZero = SrcZero.extractBits(NumElts, Idx);
3311
3312 // Attempt to avoid multi-use ops if we don't need anything from them.
3313 if (!DemandedElts.isAllOnes()) {
3314 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3315 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3316 if (NewSrc) {
3317 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3318 Op.getOperand(1));
3319 return TLO.CombineTo(Op, NewOp);
3320 }
3321 }
3322 break;
3323 }
3325 SDValue Vec = Op.getOperand(0);
3326 SDValue Scl = Op.getOperand(1);
3327 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3328
3329 // For a legal, constant insertion index, if we don't need this insertion
3330 // then strip it, else remove it from the demanded elts.
3331 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3332 unsigned Idx = CIdx->getZExtValue();
3333 if (!DemandedElts[Idx])
3334 return TLO.CombineTo(Op, Vec);
3335
3336 APInt DemandedVecElts(DemandedElts);
3337 DemandedVecElts.clearBit(Idx);
3338 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3339 KnownZero, TLO, Depth + 1))
3340 return true;
3341
3342 KnownUndef.setBitVal(Idx, Scl.isUndef());
3343
3344 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3345 break;
3346 }
3347
3348 APInt VecUndef, VecZero;
3349 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3350 Depth + 1))
3351 return true;
3352 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3353 break;
3354 }
3355 case ISD::VSELECT: {
3356 SDValue Sel = Op.getOperand(0);
3357 SDValue LHS = Op.getOperand(1);
3358 SDValue RHS = Op.getOperand(2);
3359
3360 // Try to transform the select condition based on the current demanded
3361 // elements.
3362 APInt UndefSel, ZeroSel;
3363 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3364 Depth + 1))
3365 return true;
3366
3367 // See if we can simplify either vselect operand.
3368 APInt DemandedLHS(DemandedElts);
3369 APInt DemandedRHS(DemandedElts);
3370 APInt UndefLHS, ZeroLHS;
3371 APInt UndefRHS, ZeroRHS;
3372 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3373 Depth + 1))
3374 return true;
3375 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3376 Depth + 1))
3377 return true;
3378
3379 KnownUndef = UndefLHS & UndefRHS;
3380 KnownZero = ZeroLHS & ZeroRHS;
3381
3382 // If we know that the selected element is always zero, we don't need the
3383 // select value element.
3384 APInt DemandedSel = DemandedElts & ~KnownZero;
3385 if (DemandedSel != DemandedElts)
3386 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3387 Depth + 1))
3388 return true;
3389
3390 break;
3391 }
3392 case ISD::VECTOR_SHUFFLE: {
3393 SDValue LHS = Op.getOperand(0);
3394 SDValue RHS = Op.getOperand(1);
3395 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3396
3397 // Collect demanded elements from shuffle operands..
3398 APInt DemandedLHS(NumElts, 0);
3399 APInt DemandedRHS(NumElts, 0);
3400 for (unsigned i = 0; i != NumElts; ++i) {
3401 int M = ShuffleMask[i];
3402 if (M < 0 || !DemandedElts[i])
3403 continue;
3404 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3405 if (M < (int)NumElts)
3406 DemandedLHS.setBit(M);
3407 else
3408 DemandedRHS.setBit(M - NumElts);
3409 }
3410
3411 // See if we can simplify either shuffle operand.
3412 APInt UndefLHS, ZeroLHS;
3413 APInt UndefRHS, ZeroRHS;
3414 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3415 Depth + 1))
3416 return true;
3417 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3418 Depth + 1))
3419 return true;
3420
3421 // Simplify mask using undef elements from LHS/RHS.
3422 bool Updated = false;
3423 bool IdentityLHS = true, IdentityRHS = true;
3424 SmallVector<int, 32> NewMask(ShuffleMask);
3425 for (unsigned i = 0; i != NumElts; ++i) {
3426 int &M = NewMask[i];
3427 if (M < 0)
3428 continue;
3429 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3430 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3431 Updated = true;
3432 M = -1;
3433 }
3434 IdentityLHS &= (M < 0) || (M == (int)i);
3435 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3436 }
3437
3438 // Update legal shuffle masks based on demanded elements if it won't reduce
3439 // to Identity which can cause premature removal of the shuffle mask.
3440 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3441 SDValue LegalShuffle =
3442 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3443 if (LegalShuffle)
3444 return TLO.CombineTo(Op, LegalShuffle);
3445 }
3446
3447 // Propagate undef/zero elements from LHS/RHS.
3448 for (unsigned i = 0; i != NumElts; ++i) {
3449 int M = ShuffleMask[i];
3450 if (M < 0) {
3451 KnownUndef.setBit(i);
3452 } else if (M < (int)NumElts) {
3453 if (UndefLHS[M])
3454 KnownUndef.setBit(i);
3455 if (ZeroLHS[M])
3456 KnownZero.setBit(i);
3457 } else {
3458 if (UndefRHS[M - NumElts])
3459 KnownUndef.setBit(i);
3460 if (ZeroRHS[M - NumElts])
3461 KnownZero.setBit(i);
3462 }
3463 }
3464 break;
3465 }
3469 APInt SrcUndef, SrcZero;
3470 SDValue Src = Op.getOperand(0);
3471 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3472 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3473 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3474 Depth + 1))
3475 return true;
3476 KnownZero = SrcZero.zextOrTrunc(NumElts);
3477 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3478
3479 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3480 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3481 DemandedSrcElts == 1) {
3482 // aext - if we just need the bottom element then we can bitcast.
3483 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3484 }
3485
3486 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3487 // zext(undef) upper bits are guaranteed to be zero.
3488 if (DemandedElts.isSubsetOf(KnownUndef))
3489 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3490 KnownUndef.clearAllBits();
3491
3492 // zext - if we just need the bottom element then we can mask:
3493 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3494 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3495 Op->isOnlyUserOf(Src.getNode()) &&
3496 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3497 SDLoc DL(Op);
3498 EVT SrcVT = Src.getValueType();
3499 EVT SrcSVT = SrcVT.getScalarType();
3500 SmallVector<SDValue> MaskElts;
3501 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3502 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3503 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3504 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3505 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3506 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3507 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3508 }
3509 }
3510 }
3511 break;
3512 }
3513
3514 // TODO: There are more binop opcodes that could be handled here - MIN,
3515 // MAX, saturated math, etc.
3516 case ISD::ADD: {
3517 SDValue Op0 = Op.getOperand(0);
3518 SDValue Op1 = Op.getOperand(1);
3519 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3520 APInt UndefLHS, ZeroLHS;
3521 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3522 Depth + 1, /*AssumeSingleUse*/ true))
3523 return true;
3524 }
3525 [[fallthrough]];
3526 }
3527 case ISD::OR:
3528 case ISD::XOR:
3529 case ISD::SUB:
3530 case ISD::FADD:
3531 case ISD::FSUB:
3532 case ISD::FMUL:
3533 case ISD::FDIV:
3534 case ISD::FREM: {
3535 SDValue Op0 = Op.getOperand(0);
3536 SDValue Op1 = Op.getOperand(1);
3537
3538 APInt UndefRHS, ZeroRHS;
3539 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3540 Depth + 1))
3541 return true;
3542 APInt UndefLHS, ZeroLHS;
3543 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3544 Depth + 1))
3545 return true;
3546
3547 KnownZero = ZeroLHS & ZeroRHS;
3548 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3549
3550 // Attempt to avoid multi-use ops if we don't need anything from them.
3551 // TODO - use KnownUndef to relax the demandedelts?
3552 if (!DemandedElts.isAllOnes())
3553 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3554 return true;
3555 break;
3556 }
3557 case ISD::SHL:
3558 case ISD::SRL:
3559 case ISD::SRA:
3560 case ISD::ROTL:
3561 case ISD::ROTR: {
3562 SDValue Op0 = Op.getOperand(0);
3563 SDValue Op1 = Op.getOperand(1);
3564
3565 APInt UndefRHS, ZeroRHS;
3566 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3567 Depth + 1))
3568 return true;
3569 APInt UndefLHS, ZeroLHS;
3570 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3571 Depth + 1))
3572 return true;
3573
3574 KnownZero = ZeroLHS;
3575 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3576
3577 // Attempt to avoid multi-use ops if we don't need anything from them.
3578 // TODO - use KnownUndef to relax the demandedelts?
3579 if (!DemandedElts.isAllOnes())
3580 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3581 return true;
3582 break;
3583 }
3584 case ISD::MUL:
3585 case ISD::MULHU:
3586 case ISD::MULHS:
3587 case ISD::AND: {
3588 SDValue Op0 = Op.getOperand(0);
3589 SDValue Op1 = Op.getOperand(1);
3590
3591 APInt SrcUndef, SrcZero;
3592 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3593 Depth + 1))
3594 return true;
3595 // If we know that a demanded element was zero in Op1 we don't need to
3596 // demand it in Op0 - its guaranteed to be zero.
3597 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3598 if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3599 TLO, Depth + 1))
3600 return true;
3601
3602 KnownUndef &= DemandedElts0;
3603 KnownZero &= DemandedElts0;
3604
3605 // If every element pair has a zero/undef then just fold to zero.
3606 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3607 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3608 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3609 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3610
3611 // If either side has a zero element, then the result element is zero, even
3612 // if the other is an UNDEF.
3613 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3614 // and then handle 'and' nodes with the rest of the binop opcodes.
3615 KnownZero |= SrcZero;
3616 KnownUndef &= SrcUndef;
3617 KnownUndef &= ~KnownZero;
3618
3619 // Attempt to avoid multi-use ops if we don't need anything from them.
3620 if (!DemandedElts.isAllOnes())
3621 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3622 return true;
3623 break;
3624 }
3625 case ISD::TRUNCATE:
3626 case ISD::SIGN_EXTEND:
3627 case ISD::ZERO_EXTEND:
3628 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3629 KnownZero, TLO, Depth + 1))
3630 return true;
3631
3632 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3633 // zext(undef) upper bits are guaranteed to be zero.
3634 if (DemandedElts.isSubsetOf(KnownUndef))
3635 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3636 KnownUndef.clearAllBits();
3637 }
3638 break;
3639 default: {
3640 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3641 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3642 KnownZero, TLO, Depth))
3643 return true;
3644 } else {
3645 KnownBits Known;
3646 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3647 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3648 TLO, Depth, AssumeSingleUse))
3649 return true;
3650 }
3651 break;
3652 }
3653 }
3654 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3655
3656 // Constant fold all undef cases.
3657 // TODO: Handle zero cases as well.
3658 if (DemandedElts.isSubsetOf(KnownUndef))
3659 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3660
3661 return false;
3662}
3663
3664/// Determine which of the bits specified in Mask are known to be either zero or
3665/// one and return them in the Known.
3667 KnownBits &Known,
3668 const APInt &DemandedElts,
3669 const SelectionDAG &DAG,
3670 unsigned Depth) const {
3671 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3672 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3673 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3674 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3675 "Should use MaskedValueIsZero if you don't know whether Op"
3676 " is a target node!");
3677 Known.resetAll();
3678}
3679
3682 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3683 unsigned Depth) const {
3684 Known.resetAll();
3685}
3686
3688 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3689 // The low bits are known zero if the pointer is aligned.
3690 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3691}
3692
3695 unsigned Depth) const {
3696 return Align(1);
3697}
3698
3699/// This method can be implemented by targets that want to expose additional
3700/// information about sign bits to the DAG Combiner.
3702 const APInt &,
3703 const SelectionDAG &,
3704 unsigned Depth) const {
3705 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3706 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3707 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3708 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3709 "Should use ComputeNumSignBits if you don't know whether Op"
3710 " is a target node!");
3711 return 1;
3712}
3713
3715 GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3716 const MachineRegisterInfo &MRI, unsigned Depth) const {
3717 return 1;
3718}
3719
3721 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3722 TargetLoweringOpt &TLO, unsigned Depth) const {
3723 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3724 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3725 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3726 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3727 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3728 " is a target node!");
3729 return false;
3730}
3731
3733 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3734 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3735 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3736 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3737 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3738 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3739 "Should use SimplifyDemandedBits if you don't know whether Op"
3740 " is a target node!");
3741 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3742 return false;
3743}
3744
3746 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3747 SelectionDAG &DAG, unsigned Depth) const {
3748 assert(
3749 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3750 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3751 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3752 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3753 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3754 " is a target node!");
3755 return SDValue();
3756}
3757
3758SDValue
3761 SelectionDAG &DAG) const {
3762 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3763 if (!LegalMask) {
3764 std::swap(N0, N1);
3766 LegalMask = isShuffleMaskLegal(Mask, VT);
3767 }
3768
3769 if (!LegalMask)
3770 return SDValue();
3771
3772 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3773}
3774
3776 return nullptr;
3777}
3778
3780 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3781 bool PoisonOnly, unsigned Depth) const {
3782 assert(
3783 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3784 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3785 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3786 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3787 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3788 " is a target node!");
3789
3790 // If Op can't create undef/poison and none of its operands are undef/poison
3791 // then Op is never undef/poison.
3792 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
3793 /*ConsiderFlags*/ true, Depth) &&
3794 all_of(Op->ops(), [&](SDValue V) {
3795 return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
3796 Depth + 1);
3797 });
3798}
3799
3801 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3802 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3803 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3804 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3805 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3806 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3807 "Should use canCreateUndefOrPoison if you don't know whether Op"
3808 " is a target node!");
3809 // Be conservative and return true.
3810 return true;
3811}
3812
3814 const SelectionDAG &DAG,
3815 bool SNaN,
3816 unsigned Depth) const {
3817 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3818 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3819 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3820 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3821 "Should use isKnownNeverNaN if you don't know whether Op"
3822 " is a target node!");
3823 return false;
3824}
3825
3827 const APInt &DemandedElts,
3828 APInt &UndefElts,
3829 const SelectionDAG &DAG,
3830 unsigned Depth) const {
3831 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3832 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3833 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3834 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3835 "Should use isSplatValue if you don't know whether Op"
3836 " is a target node!");
3837 return false;
3838}
3839
3840// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3841// work with truncating build vectors and vectors with elements of less than
3842// 8 bits.
3844 if (!N)
3845 return false;
3846
3847 unsigned EltWidth;
3848 APInt CVal;
3849 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3850 /*AllowTruncation=*/true)) {
3851 CVal = CN->getAPIntValue();
3852 EltWidth = N.getValueType().getScalarSizeInBits();
3853 } else
3854 return false;
3855
3856 // If this is a truncating splat, truncate the splat value.
3857 // Otherwise, we may fail to match the expected values below.
3858 if (EltWidth < CVal.getBitWidth())
3859 CVal = CVal.trunc(EltWidth);
3860
3861 switch (getBooleanContents(N.getValueType())) {
3863 return CVal[0];
3865 return CVal.isOne();
3867 return CVal.isAllOnes();
3868 }
3869
3870 llvm_unreachable("Invalid boolean contents");
3871}
3872
3874 if (!N)
3875 return false;
3876
3877 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3878 if (!CN) {
3879 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3880 if (!BV)
3881 return false;
3882
3883 // Only interested in constant splats, we don't care about undef
3884 // elements in identifying boolean constants and getConstantSplatNode
3885 // returns NULL if all ops are undef;
3886 CN = BV->getConstantSplatNode();
3887 if (!CN)
3888 return false;
3889 }
3890
3891 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3892 return !CN->getAPIntValue()[0];
3893
3894 return CN->isZero();
3895}
3896
3898 bool SExt) const {
3899 if (VT == MVT::i1)
3900 return N->isOne();
3901
3903 switch (Cnt) {
3905 // An extended value of 1 is always true, unless its original type is i1,
3906 // in which case it will be sign extended to -1.
3907 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3910 return N->isAllOnes() && SExt;
3911 }
3912 llvm_unreachable("Unexpected enumeration.");
3913}
3914
3915/// This helper function of SimplifySetCC tries to optimize the comparison when
3916/// either operand of the SetCC node is a bitwise-and instruction.
3917SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3918 ISD::CondCode Cond, const SDLoc &DL,
3919 DAGCombinerInfo &DCI) const {
3920 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3921 std::swap(N0, N1);
3922
3923 SelectionDAG &DAG = DCI.DAG;
3924 EVT OpVT = N0.getValueType();
3925 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3926 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3927 return SDValue();
3928
3929 // (X & Y) != 0 --> zextOrTrunc(X & Y)
3930 // iff everything but LSB is known zero:
3931 if (Cond == ISD::SETNE && isNullConstant(N1) &&
3934 unsigned NumEltBits = OpVT.getScalarSizeInBits();
3935 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
3936 if (DAG.MaskedValueIsZero(N0, UpperBits))
3937 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
3938 }
3939
3940 // Try to eliminate a power-of-2 mask constant by converting to a signbit
3941 // test in a narrow type that we can truncate to with no cost. Examples:
3942 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
3943 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
3944 // TODO: This conservatively checks for type legality on the source and
3945 // destination types. That may inhibit optimizations, but it also
3946 // allows setcc->shift transforms that may be more beneficial.
3947 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3948 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
3949 isTypeLegal(OpVT) && N0.hasOneUse()) {
3950 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
3951 AndC->getAPIntValue().getActiveBits());
3952 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
3953 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
3954 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
3955 return DAG.getSetCC(DL, VT, Trunc, Zero,
3957 }
3958 }
3959
3960 // Match these patterns in any of their permutations:
3961 // (X & Y) == Y
3962 // (X & Y) != Y
3963 SDValue X, Y;
3964 if (N0.getOperand(0) == N1) {
3965 X = N0.getOperand(1);
3966 Y = N0.getOperand(0);
3967 } else if (N0.getOperand(1) == N1) {
3968 X = N0.getOperand(0);
3969 Y = N0.getOperand(1);
3970 } else {
3971 return SDValue();
3972 }
3973
3974 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
3975 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
3976 // its liable to create and infinite loop.
3977 SDValue Zero = DAG.getConstant(0, DL, OpVT);
3978 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
3980 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
3981 // Note that where Y is variable and is known to have at most one bit set
3982 // (for example, if it is Z & 1) we cannot do this; the expressions are not
3983 // equivalent when Y == 0.
3984 assert(OpVT.isInteger());
3986 if (DCI.isBeforeLegalizeOps() ||
3988 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
3989 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
3990 // If the target supports an 'and-not' or 'and-complement' logic operation,
3991 // try to use that to make a comparison operation more efficient.
3992 // But don't do this transform if the mask is a single bit because there are
3993 // more efficient ways to deal with that case (for example, 'bt' on x86 or
3994 // 'rlwinm' on PPC).
3995
3996 // Bail out if the compare operand that we want to turn into a zero is
3997 // already a zero (otherwise, infinite loop).
3998 if (isNullConstant(Y))
3999 return SDValue();
4000
4001 // Transform this into: ~X & Y == 0.
4002 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4003 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4004 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4005 }
4006
4007 return SDValue();
4008}
4009
4010/// There are multiple IR patterns that could be checking whether certain
4011/// truncation of a signed number would be lossy or not. The pattern which is
4012/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4013/// We are looking for the following pattern: (KeptBits is a constant)
4014/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4015/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4016/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4017/// We will unfold it into the natural trunc+sext pattern:
4018/// ((%x << C) a>> C) dstcond %x
4019/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4020SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4021 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4022 const SDLoc &DL) const {
4023 // We must be comparing with a constant.
4024 ConstantSDNode *C1;
4025 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4026 return SDValue();
4027
4028 // N0 should be: add %x, (1 << (KeptBits-1))
4029 if (N0->getOpcode() != ISD::ADD)
4030 return SDValue();
4031
4032 // And we must be 'add'ing a constant.
4033 ConstantSDNode *C01;
4034 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4035 return SDValue();
4036
4037 SDValue X = N0->getOperand(0);
4038 EVT XVT = X.getValueType();
4039
4040 // Validate constants ...
4041
4042 APInt I1 = C1->getAPIntValue();
4043
4044 ISD::CondCode NewCond;
4045 if (Cond == ISD::CondCode::SETULT) {
4046 NewCond = ISD::CondCode::SETEQ;
4047 } else if (Cond == ISD::CondCode::SETULE) {
4048 NewCond = ISD::CondCode::SETEQ;
4049 // But need to 'canonicalize' the constant.
4050 I1 += 1;
4051 } else if (Cond == ISD::CondCode::SETUGT) {
4052 NewCond = ISD::CondCode::SETNE;
4053 // But need to 'canonicalize' the constant.
4054 I1 += 1;
4055 } else if (Cond == ISD::CondCode::SETUGE) {
4056 NewCond = ISD::CondCode::SETNE;
4057 } else
4058 return SDValue();
4059
4060 APInt I01 = C01->getAPIntValue();
4061
4062 auto checkConstants = [&I1, &I01]() -> bool {
4063 // Both of them must be power-of-two, and the constant from setcc is bigger.
4064 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4065 };
4066
4067 if (checkConstants()) {
4068 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4069 } else {
4070 // What if we invert constants? (and the target predicate)
4071 I1.negate();
4072 I01.negate();
4073 assert(XVT.isInteger());
4074 NewCond = getSetCCInverse(NewCond, XVT);
4075 if (!checkConstants())
4076 return SDValue();
4077 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4078 }
4079
4080 // They are power-of-two, so which bit is set?
4081 const unsigned KeptBits = I1.logBase2();
4082 const unsigned KeptBitsMinusOne = I01.logBase2();
4083
4084 // Magic!
4085 if (KeptBits != (KeptBitsMinusOne + 1))
4086 return SDValue();
4087 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4088
4089 // We don't want to do this in every single case.
4090 SelectionDAG &DAG = DCI.DAG;
4092 XVT, KeptBits))
4093 return SDValue();
4094
4095 // Unfold into: sext_inreg(%x) cond %x
4096 // Where 'cond' will be either 'eq' or 'ne'.
4097 SDValue SExtInReg = DAG.getNode(
4099 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4100 return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4101}
4102
4103// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4104SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4105 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4106 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4108 "Should be a comparison with 0.");
4109 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4110 "Valid only for [in]equality comparisons.");
4111
4112 unsigned NewShiftOpcode;
4113 SDValue X, C, Y;
4114
4115 SelectionDAG &DAG = DCI.DAG;
4116 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4117
4118 // Look for '(C l>>/<< Y)'.
4119 auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
4120 // The shift should be one-use.
4121 if (!V.hasOneUse())
4122 return false;
4123 unsigned OldShiftOpcode = V.getOpcode();
4124 switch (OldShiftOpcode) {
4125 case ISD::SHL:
4126 NewShiftOpcode = ISD::SRL;
4127 break;
4128 case ISD::SRL:
4129 NewShiftOpcode = ISD::SHL;
4130 break;
4131 default:
4132 return false; // must be a logical shift.
4133 }
4134 // We should be shifting a constant.
4135 // FIXME: best to use isConstantOrConstantVector().
4136 C = V.getOperand(0);
4138 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4139 if (!CC)
4140 return false;
4141 Y = V.getOperand(1);
4142
4144 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4145 return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4146 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4147 };
4148
4149 // LHS of comparison should be an one-use 'and'.
4150 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4151 return SDValue();
4152
4153 X = N0.getOperand(0);
4154 SDValue Mask = N0.getOperand(1);
4155
4156 // 'and' is commutative!
4157 if (!Match(Mask)) {
4158 std::swap(X, Mask);
4159 if (!Match(Mask))
4160 return SDValue();
4161 }
4162
4163 EVT VT = X.getValueType();
4164
4165 // Produce:
4166 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4167 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4168 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4169 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4170 return T2;
4171}
4172
4173/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4174/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4175/// handle the commuted versions of these patterns.
4176SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4177 ISD::CondCode Cond, const SDLoc &DL,
4178 DAGCombinerInfo &DCI) const {
4179 unsigned BOpcode = N0.getOpcode();
4180 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4181 "Unexpected binop");
4182 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4183
4184 // (X + Y) == X --> Y == 0
4185 // (X - Y) == X --> Y == 0
4186 // (X ^ Y) == X --> Y == 0
4187 SelectionDAG &DAG = DCI.DAG;
4188 EVT OpVT = N0.getValueType();
4189 SDValue X = N0.getOperand(0);
4190 SDValue Y = N0.getOperand(1);
4191 if (X == N1)
4192 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4193
4194 if (Y != N1)
4195 return SDValue();
4196
4197 // (X + Y) == Y --> X == 0
4198 // (X ^ Y) == Y --> X == 0
4199 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4200 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4201
4202 // The shift would not be valid if the operands are boolean (i1).
4203 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4204 return SDValue();
4205
4206 // (X - Y) == Y --> X == Y << 1
4207 SDValue One =
4208 DAG.getShiftAmountConstant(1, OpVT, DL, !DCI.isBeforeLegalize());
4209 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4210 if (!DCI.isCalledByLegalizer())
4211 DCI.AddToWorklist(YShl1.getNode());
4212 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4213}
4214
4216 SDValue N0, const APInt &C1,
4217 ISD::CondCode Cond, const SDLoc &dl,
4218 SelectionDAG &DAG) {
4219 // Look through truncs that don't change the value of a ctpop.
4220 // FIXME: Add vector support? Need to be careful with setcc result type below.
4221 SDValue CTPOP = N0;
4222 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4224 CTPOP = N0.getOperand(0);
4225
4226 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4227 return SDValue();
4228
4229 EVT CTVT = CTPOP.getValueType();
4230 SDValue CTOp = CTPOP.getOperand(0);
4231
4232 // Expand a power-of-2-or-zero comparison based on ctpop:
4233 // (ctpop x) u< 2 -> (x & x-1) == 0
4234 // (ctpop x) u> 1 -> (x & x-1) != 0
4235 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4236 // Keep the CTPOP if it is a cheap vector op.
4237 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4238 return SDValue();
4239
4240 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4241 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4242 return SDValue();
4243 if (C1 == 0 && (Cond == ISD::SETULT))
4244 return SDValue(); // This is handled elsewhere.
4245
4246 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4247
4248 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4249 SDValue Result = CTOp;
4250 for (unsigned i = 0; i < Passes; i++) {
4251 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4252 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4253 }
4255 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4256 }
4257
4258 // Expand a power-of-2 comparison based on ctpop
4259 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4260 // Keep the CTPOP if it is cheap.
4261 if (TLI.isCtpopFast(CTVT))
4262 return SDValue();
4263
4264 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4265 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4266 assert(CTVT.isInteger());
4267 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4268
4269 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4270 // check before emitting a potentially unnecessary op.
4271 if (DAG.isKnownNeverZero(CTOp)) {
4272 // (ctpop x) == 1 --> (x & x-1) == 0
4273 // (ctpop x) != 1 --> (x & x-1) != 0
4274 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4275 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4276 return RHS;
4277 }
4278
4279 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4280 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4281 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4283 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4284 }
4285
4286 return SDValue();
4287}
4288
4290 ISD::CondCode Cond, const SDLoc &dl,
4291 SelectionDAG &DAG) {
4292 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4293 return SDValue();
4294
4295 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4296 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4297 return SDValue();
4298
4299 auto getRotateSource = [](SDValue X) {
4300 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4301 return X.getOperand(0);
4302 return SDValue();
4303 };
4304
4305 // Peek through a rotated value compared against 0 or -1:
4306 // (rot X, Y) == 0/-1 --> X == 0/-1
4307 // (rot X, Y) != 0/-1 --> X != 0/-1
4308 if (SDValue R = getRotateSource(N0))
4309 return DAG.getSetCC(dl, VT, R, N1, Cond);
4310
4311 // Peek through an 'or' of a rotated value compared against 0:
4312 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4313 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4314 //
4315 // TODO: Add the 'and' with -1 sibling.
4316 // TODO: Recurse through a series of 'or' ops to find the rotate.
4317 EVT OpVT = N0.getValueType();
4318 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4319 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4320 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4321 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4322 }
4323 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4324 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4325 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4326 }
4327 }
4328
4329 return SDValue();
4330}
4331
4333 ISD::CondCode Cond, const SDLoc &dl,
4334 SelectionDAG &DAG) {
4335 // If we are testing for all-bits-clear, we might be able to do that with
4336 // less shifting since bit-order does not matter.
4337 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4338 return SDValue();
4339
4340 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4341 if (!C1 || !C1->isZero())
4342 return SDValue();
4343
4344 if (!N0.hasOneUse() ||
4345 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4346 return SDValue();
4347
4348 unsigned BitWidth = N0.getScalarValueSizeInBits();
4349 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4350 if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
4351 return SDValue();
4352
4353 // Canonicalize fshr as fshl to reduce pattern-matching.
4354 unsigned ShAmt = ShAmtC->getZExtValue();
4355 if (N0.getOpcode() == ISD::FSHR)
4356 ShAmt = BitWidth - ShAmt;
4357
4358 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4359 SDValue X, Y;
4360 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4361 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4362 return false;
4363 if (Or.getOperand(0) == Other) {
4364 X = Or.getOperand(0);
4365 Y = Or.getOperand(1);
4366 return true;
4367 }
4368 if (Or.getOperand(1) == Other) {
4369 X = Or.getOperand(1);
4370 Y = Or.getOperand(0);
4371 return true;
4372 }
4373 return false;
4374 };
4375
4376 EVT OpVT = N0.getValueType();
4377 EVT ShAmtVT = N0.getOperand(2).getValueType();
4378 SDValue F0 = N0.getOperand(0);
4379 SDValue F1 = N0.getOperand(1);
4380 if (matchOr(F0, F1)) {
4381 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4382 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4383 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4384 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4385 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4386 }
4387 if (matchOr(F1, F0)) {
4388 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4389 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4390 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4391 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4392 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4393 }
4394
4395 return SDValue();
4396}
4397
4398/// Try to simplify a setcc built with the specified operands and cc. If it is
4399/// unable to simplify it, return a null SDValue.
4401 ISD::CondCode Cond, bool foldBooleans,
4402 DAGCombinerInfo &DCI,
4403 const SDLoc &dl) const {
4404 SelectionDAG &DAG = DCI.DAG;
4405 const DataLayout &Layout = DAG.getDataLayout();
4406 EVT OpVT = N0.getValueType();
4408
4409 // Constant fold or commute setcc.
4410 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4411 return Fold;
4412
4413 bool N0ConstOrSplat =
4414 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4415 bool N1ConstOrSplat =
4416 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4417
4418 // Canonicalize toward having the constant on the RHS.
4419 // TODO: Handle non-splat vector constants. All undef causes trouble.
4420 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4421 // infinite loop here when we encounter one.
4423 if (N0ConstOrSplat && !N1ConstOrSplat &&
4424 (DCI.isBeforeLegalizeOps() ||
4425 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4426 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4427
4428 // If we have a subtract with the same 2 non-constant operands as this setcc
4429 // -- but in reverse order -- then try to commute the operands of this setcc
4430 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4431 // instruction on some targets.
4432 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4433 (DCI.isBeforeLegalizeOps() ||
4434 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4435 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4436 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4437 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4438
4439 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4440 return V;
4441
4442 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4443 return V;
4444
4445 if (auto *N1C = isConstOrConstSplat(N1)) {
4446 const APInt &C1 = N1C->getAPIntValue();
4447
4448 // Optimize some CTPOP cases.
4449 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4450 return V;
4451
4452 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4453 // X * Y == 0 --> (X == 0) || (Y == 0)
4454 // X * Y != 0 --> (X != 0) && (Y != 0)
4455 // TODO: This bails out if minsize is set, but if the target doesn't have a
4456 // single instruction multiply for this type, it would likely be
4457 // smaller to decompose.
4458 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4459 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4460 (N0->getFlags().hasNoUnsignedWrap() ||
4461 N0->getFlags().hasNoSignedWrap()) &&
4462 !Attr.hasFnAttr(Attribute::MinSize)) {
4463 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4464 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4465 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4466 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4467 }
4468
4469 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4470 // equality comparison, then we're just comparing whether X itself is
4471 // zero.
4472 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4473 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4474 llvm::has_single_bit<uint32_t>(N0.getScalarValueSizeInBits())) {
4475 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4476 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4477 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4478 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4479 // (srl (ctlz x), 5) == 0 -> X != 0
4480 // (srl (ctlz x), 5) != 1 -> X != 0
4481 Cond = ISD::SETNE;
4482 } else {
4483 // (srl (ctlz x), 5) != 0 -> X == 0
4484 // (srl (ctlz x), 5) == 1 -> X == 0
4485 Cond = ISD::SETEQ;
4486 }
4487 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4488 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4489 Cond);
4490 }
4491 }
4492 }
4493 }
4494
4495 // FIXME: Support vectors.
4496 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4497 const APInt &C1 = N1C->getAPIntValue();
4498
4499 // (zext x) == C --> x == (trunc C)
4500 // (sext x) == C --> x == (trunc C)
4501 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4502 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4503 unsigned MinBits = N0.getValueSizeInBits();
4504 SDValue PreExt;
4505 bool Signed = false;
4506 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4507 // ZExt
4508 MinBits = N0->getOperand(0).getValueSizeInBits();
4509 PreExt = N0->getOperand(0);
4510 } else if (N0->getOpcode() == ISD::AND) {
4511 // DAGCombine turns costly ZExts into ANDs
4512 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4513 if ((C->getAPIntValue()+1).isPowerOf2()) {
4514 MinBits = C->getAPIntValue().countr_one();
4515 PreExt = N0->getOperand(0);
4516 }
4517 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4518 // SExt
4519 MinBits = N0->getOperand(0).getValueSizeInBits();
4520 PreExt = N0->getOperand(0);
4521 Signed = true;
4522 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4523 // ZEXTLOAD / SEXTLOAD
4524 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4525 MinBits = LN0->getMemoryVT().getSizeInBits();
4526 PreExt = N0;
4527 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4528 Signed = true;
4529 MinBits = LN0->getMemoryVT().getSizeInBits();
4530 PreExt = N0;
4531 }
4532 }
4533
4534 // Figure out how many bits we need to preserve this constant.
4535 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4536
4537 // Make sure we're not losing bits from the constant.
4538 if (MinBits > 0 &&
4539 MinBits < C1.getBitWidth() &&
4540 MinBits >= ReqdBits) {
4541 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4542 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4543 // Will get folded away.
4544 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4545 if (MinBits == 1 && C1 == 1)
4546 // Invert the condition.
4547 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4549 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4550 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4551 }
4552
4553 // If truncating the setcc operands is not desirable, we can still
4554 // simplify the expression in some cases:
4555 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4556 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4557 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4558 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4559 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4560 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4561 SDValue TopSetCC = N0->getOperand(0);
4562 unsigned N0Opc = N0->getOpcode();
4563 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4564 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4565 TopSetCC.getOpcode() == ISD::SETCC &&
4566 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4567 (isConstFalseVal(N1) ||
4568 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4569
4570 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4571 (!N1C->isZero() && Cond == ISD::SETNE);
4572
4573 if (!Inverse)
4574 return TopSetCC;
4575
4577 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4578 TopSetCC.getOperand(0).getValueType());
4579 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4580 TopSetCC.getOperand(1),
4581 InvCond);
4582 }
4583 }
4584 }
4585
4586 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4587 // equality or unsigned, and all 1 bits of the const are in the same
4588 // partial word, see if we can shorten the load.
4589 if (DCI.isBeforeLegalize() &&
4591 N0.getOpcode() == ISD::AND && C1 == 0 &&
4592 N0.getNode()->hasOneUse() &&
4593 isa<LoadSDNode>(N0.getOperand(0)) &&
4594 N0.getOperand(0).getNode()->hasOneUse() &&
4595 isa<ConstantSDNode>(N0.getOperand(1))) {
4596 LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
4597 APInt bestMask;
4598 unsigned bestWidth = 0, bestOffset = 0;
4599 if (Lod->isSimple() && Lod->isUnindexed()) {
4600 unsigned origWidth = N0.getValueSizeInBits();
4601 unsigned maskWidth = origWidth;
4602 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4603 // 8 bits, but have to be careful...
4604 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4605 origWidth = Lod->getMemoryVT().getSizeInBits();
4606 const APInt &Mask = N0.getConstantOperandAPInt(1);
4607 for (unsigned width = origWidth / 2; width>=8; width /= 2) {
4608 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4609 for (unsigned offset=0; offset<origWidth/width; offset++) {
4610 if (Mask.isSubsetOf(newMask)) {
4611 if (Layout.isLittleEndian())
4612 bestOffset = (uint64_t)offset * (width/8);
4613 else
4614 bestOffset = (origWidth/width - offset - 1) * (width/8);
4615 bestMask = Mask.lshr(offset * (width/8) * 8);
4616 bestWidth = width;
4617 break;
4618 }
4619 newMask <<= width;
4620 }
4621 }
4622 }
4623 if (bestWidth) {
4624 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4625 if (newVT.isRound() &&
4627 SDValue Ptr = Lod->getBasePtr();
4628 if (bestOffset != 0)
4630 dl);
4631 SDValue NewLoad =
4632 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4633 Lod->getPointerInfo().getWithOffset(bestOffset),
4634 Lod->getOriginalAlign());
4635 return DAG.getSetCC(dl, VT,
4636 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4637 DAG.getConstant(bestMask.trunc(bestWidth),
4638 dl, newVT)),
4639 DAG.getConstant(0LL, dl, newVT), Cond);
4640 }
4641 }
4642 }
4643
4644 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4645 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4646 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4647
4648 // If the comparison constant has bits in the upper part, the
4649 // zero-extended value could never match.
4651 C1.getBitWidth() - InSize))) {
4652 switch (Cond) {
4653 case ISD::SETUGT:
4654 case ISD::SETUGE:
4655 case ISD::SETEQ:
4656 return DAG.getConstant(0, dl, VT);
4657 case ISD::SETULT:
4658 case ISD::SETULE:
4659 case ISD::SETNE:
4660 return DAG.getConstant(1, dl, VT);
4661 case ISD::SETGT:
4662 case ISD::SETGE:
4663 // True if the sign bit of C1 is set.
4664 return DAG.getConstant(C1.isNegative(), dl, VT);
4665 case ISD::SETLT:
4666 case ISD::SETLE:
4667 // True if the sign bit of C1 isn't set.
4668 return DAG.getConstant(C1.isNonNegative(), dl, VT);
4669 default:
4670 break;
4671 }
4672 }
4673
4674 // Otherwise, we can perform the comparison with the low bits.
4675 switch (Cond) {
4676 case ISD::SETEQ:
4677 case ISD::SETNE:
4678 case ISD::SETUGT:
4679 case ISD::SETUGE:
4680 case ISD::SETULT:
4681 case ISD::SETULE: {
4682 EVT newVT = N0.getOperand(0).getValueType();
4683 if (DCI.isBeforeLegalizeOps() ||
4684 (isOperationLegal(ISD::SETCC, newVT) &&
4685 isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
4686 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4687 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4688
4689 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4690 NewConst, Cond);
4691 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4692 }
4693 break;
4694 }
4695 default:
4696 break; // todo, be more careful with signed comparisons
4697 }
4698 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4699 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4700 !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
4701 OpVT)) {
4702 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4703 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4704 EVT ExtDstTy = N0.getValueType();
4705 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4706
4707 // If the constant doesn't fit into the number of bits for the source of
4708 // the sign extension, it is impossible for both sides to be equal.
4709 if (C1.getSignificantBits() > ExtSrcTyBits)
4710 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4711
4712 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4713 ExtDstTy != ExtSrcTy && "Unexpected types!");
4714 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4715 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4716 DAG.getConstant(Imm, dl, ExtDstTy));
4717 if (!DCI.isCalledByLegalizer())
4718 DCI.AddToWorklist(ZextOp.getNode());
4719 // Otherwise, make this a use of a zext.
4720 return DAG.getSetCC(dl, VT, ZextOp,
4721 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4722 } else if ((N1C->isZero() || N1C->isOne()) &&
4723 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4724 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
4725 // excluded as they are handled below whilst checking for foldBooleans.
4726 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
4727 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4728 (N0.getValueType() == MVT::i1 ||
4732 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4733 if (TrueWhenTrue)
4734 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
4735 // Invert the condition.
4736 if (N0.getOpcode() == ISD::SETCC) {
4737 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
4739 if (DCI.isBeforeLegalizeOps() ||
4741 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
4742 }
4743 }
4744
4745 if ((N0.getOpcode() == ISD::XOR ||
4746 (N0.getOpcode() == ISD::AND &&
4747 N0.getOperand(0).getOpcode() == ISD::XOR &&
4748 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
4749 isOneConstant(N0.getOperand(1))) {
4750 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
4751 // can only do this if the top bits are known zero.
4752 unsigned BitWidth = N0.getValueSizeInBits();
4753 if (DAG.MaskedValueIsZero(N0,
4755 BitWidth-1))) {
4756 // Okay, get the un-inverted input value.
4757 SDValue Val;
4758 if (N0.getOpcode() == ISD::XOR) {
4759 Val = N0.getOperand(0);
4760 } else {
4761 assert(N0.getOpcode() == ISD::AND &&
4762 N0.getOperand(0).getOpcode() == ISD::XOR);
4763 // ((X^1)&1)^1 -> X & 1
4764 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
4765 N0.getOperand(0).getOperand(0),
4766 N0.getOperand(1));
4767 }
4768
4769 return DAG.getSetCC(dl, VT, Val, N1,
4771 }
4772 } else if (N1C->isOne()) {
4773 SDValue Op0 = N0;
4774 if (Op0.getOpcode() == ISD::TRUNCATE)
4775 Op0 = Op0.getOperand(0);
4776
4777 if ((Op0.getOpcode() == ISD::XOR) &&
4778 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
4779 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
4780 SDValue XorLHS = Op0.getOperand(0);
4781 SDValue XorRHS = Op0.getOperand(1);
4782 // Ensure that the input setccs return an i1 type or 0/1 value.
4783 if (Op0.getValueType() == MVT::i1 ||
4788 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4790 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
4791 }
4792 }
4793 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
4794 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4795 if (Op0.getValueType().bitsGT(VT))
4796 Op0 = DAG.getNode(ISD::AND, dl, VT,
4797 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
4798 DAG.getConstant(1, dl, VT));
4799 else if (Op0.getValueType().bitsLT(VT))
4800 Op0 = DAG.getNode(ISD::AND, dl, VT,
4801 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
4802 DAG.getConstant(1, dl, VT));
4803
4804 return DAG.getSetCC(dl, VT, Op0,
4805 DAG.getConstant(0, dl, Op0.getValueType()),
4807 }
4808 if (Op0.getOpcode() == ISD::AssertZext &&
4809 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
4810 return DAG.getSetCC(dl, VT, Op0,
4811 DAG.getConstant(0, dl, Op0.getValueType()),
4813 }
4814 }
4815
4816 // Given:
4817 // icmp eq/ne (urem %x, %y), 0
4818 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4819 // icmp eq/ne %x, 0
4820 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
4821 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4822 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
4823 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
4824 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
4825 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4826 }
4827
4828 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4829 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4830 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4831 N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
4832 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
4833 N1C && N1C->isAllOnes()) {
4834 return DAG.getSetCC(dl, VT, N0.getOperand(0),
4835 DAG.getConstant(0, dl, OpVT),
4837 }
4838
4839 if (SDValue V =
4840 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
4841 return V;
4842 }
4843
4844 // These simplifications apply to splat vectors as well.
4845 // TODO: Handle more splat vector cases.
4846 if (auto *N1C = isConstOrConstSplat(N1)) {
4847 const APInt &C1 = N1C->getAPIntValue();
4848
4849 APInt MinVal, MaxVal;
4850 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
4852 MinVal = APInt::getSignedMinValue(OperandBitSize);
4853 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
4854 } else {
4855 MinVal = APInt::getMinValue(OperandBitSize);
4856 MaxVal = APInt::getMaxValue(OperandBitSize);
4857 }
4858
4859 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
4860 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
4861 // X >= MIN --> true
4862 if (C1 == MinVal)
4863 return DAG.getBoolConstant(true, dl, VT, OpVT);
4864
4865 if (!VT.isVector()) { // TODO: Support this for vectors.
4866 // X >= C0 --> X > (C0 - 1)
4867 APInt C = C1 - 1;
4869 if ((DCI.isBeforeLegalizeOps() ||
4870 isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4871 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4872 isLegalICmpImmediate(C.getSExtValue())))) {
4873 return DAG.getSetCC(dl, VT, N0,
4874 DAG.getConstant(C, dl, N1.getValueType()),
4875 NewCC);
4876 }
4877 }
4878 }
4879
4880 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
4881 // X <= MAX --> true
4882 if (C1 == MaxVal)
4883 return DAG.getBoolConstant(true, dl, VT, OpVT);
4884
4885 // X <= C0 --> X < (C0 + 1)
4886 if (!VT.isVector()) { // TODO: Support this for vectors.
4887 APInt C = C1 + 1;
4889 if ((DCI.isBeforeLegalizeOps() ||
4890 isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4891 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4892 isLegalICmpImmediate(C.getSExtValue())))) {
4893 return DAG.getSetCC(dl, VT, N0,
4894 DAG.getConstant(C, dl, N1.getValueType()),
4895 NewCC);
4896 }
4897 }
4898 }
4899
4900 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
4901 if (C1 == MinVal)
4902 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
4903
4904 // TODO: Support this for vectors after legalize ops.
4905 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4906 // Canonicalize setlt X, Max --> setne X, Max
4907 if (C1 == MaxVal)
4908 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4909
4910 // If we have setult X, 1, turn it into seteq X, 0
4911 if (C1 == MinVal+1)
4912 return DAG.getSetCC(dl, VT, N0,
4913 DAG.getConstant(MinVal, dl, N0.getValueType()),
4914 ISD::SETEQ);
4915 }
4916 }
4917
4918 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
4919 if (C1 == MaxVal)
4920 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
4921
4922 // TODO: Support this for vectors after legalize ops.
4923 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4924 // Canonicalize setgt X, Min --> setne X, Min
4925 if (C1 == MinVal)
4926 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4927
4928 // If we have setugt X, Max-1, turn it into seteq X, Max
4929 if (C1 == MaxVal-1)
4930 return DAG.getSetCC(dl, VT, N0,
4931 DAG.getConstant(MaxVal, dl, N0.getValueType()),
4932 ISD::SETEQ);
4933 }
4934 }
4935
4936 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
4937 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4938 if (C1.isZero())
4939 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
4940 VT, N0, N1, Cond, DCI, dl))
4941 return CC;
4942
4943 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
4944 // For example, when high 32-bits of i64 X are known clear:
4945 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
4946 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
4947 bool CmpZero = N1C->isZero();
4948 bool CmpNegOne = N1C->isAllOnes();
4949 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
4950 // Match or(lo,shl(hi,bw/2)) pattern.
4951 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
4952 unsigned EltBits = V.getScalarValueSizeInBits();
4953 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
4954 return false;
4955 SDValue LHS = V.getOperand(0);
4956 SDValue RHS = V.getOperand(1);
4957 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
4958 // Unshifted element must have zero upperbits.
4959 if (RHS.getOpcode() == ISD::SHL &&
4960 isa<ConstantSDNode>(RHS.getOperand(1)) &&
4961 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
4962 DAG.MaskedValueIsZero(LHS, HiBits)) {
4963 Lo = LHS;
4964 Hi = RHS.getOperand(0);
4965 return true;
4966 }
4967 if (LHS.getOpcode() == ISD::SHL &&
4968 isa<ConstantSDNode>(LHS.getOperand(1)) &&
4969 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
4970 DAG.MaskedValueIsZero(RHS, HiBits)) {
4971 Lo = RHS;
4972 Hi = LHS.getOperand(0);
4973 return true;
4974 }
4975 return false;
4976 };
4977
4978 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
4979 unsigned EltBits = N0.getScalarValueSizeInBits();
4980 unsigned HalfBits = EltBits / 2;
4981 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
4982 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
4983 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
4984 SDValue NewN0 =
4985 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
4986 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
4987 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
4988 };
4989
4990 SDValue Lo, Hi;
4991 if (IsConcat(N0, Lo, Hi))
4992 return MergeConcat(Lo, Hi);
4993
4994 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
4995 SDValue Lo0, Lo1, Hi0, Hi1;
4996 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
4997 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
4998 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
4999 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5000 }
5001 }
5002 }
5003 }
5004
5005 // If we have "setcc X, C0", check to see if we can shrink the immediate
5006 // by changing cc.
5007 // TODO: Support this for vectors after legalize ops.
5008 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5009 // SETUGT X, SINTMAX -> SETLT X, 0
5010 // SETUGE X, SINTMIN -> SETLT X, 0
5011 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5012 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5013 return DAG.getSetCC(dl, VT, N0,
5014 DAG.getConstant(0, dl, N1.getValueType()),
5015 ISD::SETLT);
5016
5017 // SETULT X, SINTMIN -> SETGT X, -1
5018 // SETULE X, SINTMAX -> SETGT X, -1
5019 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5020 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5021 return DAG.getSetCC(dl, VT, N0,
5022 DAG.getAllOnesConstant(dl, N1.getValueType()),
5023 ISD::SETGT);
5024 }
5025 }
5026
5027 // Back to non-vector simplifications.
5028 // TODO: Can we do these for vector splats?
5029 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5030 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5031 const APInt &C1 = N1C->getAPIntValue();
5032 EVT ShValTy = N0.getValueType();
5033
5034 // Fold bit comparisons when we can. This will result in an
5035 // incorrect value when boolean false is negative one, unless
5036 // the bitsize is 1 in which case the false value is the same
5037 // in practice regardless of the representation.
5038 if ((VT.getSizeInBits() == 1 ||
5040 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5041 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5042 N0.getOpcode() == ISD::AND) {
5043 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5044 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5045 // Perform the xform if the AND RHS is a single bit.
5046 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5047 if (AndRHS->getAPIntValue().isPowerOf2() &&
5048 !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
5049 return DAG.getNode(
5050 ISD::TRUNCATE, dl, VT,
5051 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5053 ShCt, ShValTy, dl, !DCI.isBeforeLegalize())));
5054 }
5055 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5056 // (X & 8) == 8 --> (X & 8) >> 3
5057 // Perform the xform if C1 is a single bit.
5058 unsigned ShCt = C1.logBase2();
5059 if (C1.isPowerOf2() &&
5060 !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
5061 return DAG.getNode(
5062 ISD::TRUNCATE, dl, VT,
5063 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5065 ShCt, ShValTy, dl, !DCI.isBeforeLegalize())));
5066 }
5067 }
5068 }
5069 }
5070
5071 if (C1.getSignificantBits() <= 64 &&
5073 // (X & -256) == 256 -> (X >> 8) == 1
5074 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5075 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5076 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5077 const APInt &AndRHSC = AndRHS->getAPIntValue();
5078 if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {
5079 unsigned ShiftBits = AndRHSC.countr_zero();
5080 if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5081 SDValue Shift = DAG.getNode(
5082 ISD::SRL, dl, ShValTy, N0.getOperand(0),
5083 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl,
5084 !DCI.isBeforeLegalize()));
5085 SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
5086 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5087 }
5088 }
5089 }
5090 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5091 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5092 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5093 // X < 0x100000000 -> (X >> 32) < 1
5094 // X >= 0x100000000 -> (X >> 32) >= 1
5095 // X <= 0x0ffffffff -> (X >> 32) < 1
5096 // X > 0x0ffffffff -> (X >> 32) >= 1
5097 unsigned ShiftBits;
5098 APInt NewC = C1;
5099 ISD::CondCode NewCond = Cond;
5100 if (AdjOne) {
5101 ShiftBits = C1.countr_one();
5102 NewC = NewC + 1;
5103 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5104 } else {
5105 ShiftBits = C1.countr_zero();
5106 }
5107 NewC.lshrInPlace(ShiftBits);
5108 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5110 !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5111 SDValue Shift =
5112 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5113 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl,
5114 !DCI.isBeforeLegalize()));
5115 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5116 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5117 }
5118 }
5119 }
5120 }
5121
5122 if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
5123 auto *CFP = cast<ConstantFPSDNode>(N1);
5124 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5125
5126 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5127 // constant if knowing that the operand is non-nan is enough. We prefer to
5128 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5129 // materialize 0.0.
5130 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5131 return DAG.getSetCC(dl, VT, N0, N0, Cond);
5132
5133 // setcc (fneg x), C -> setcc swap(pred) x, -C
5134 if (N0.getOpcode() == ISD::FNEG) {
5136 if (DCI.isBeforeLegalizeOps() ||
5137 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5138 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5139 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5140 }
5141 }
5142
5143 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5145 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5146 bool IsFabs = N0.getOpcode() == ISD::FABS;
5147 SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5148 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5149 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5150 : (IsFabs ? fcInf : fcPosInf);
5151 if (Cond == ISD::SETUEQ)
5152 Flag |= fcNan;
5153 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5154 DAG.getTargetConstant(Flag, dl, MVT::i32));
5155 }
5156 }
5157
5158 // If the condition is not legal, see if we can find an equivalent one
5159 // which is legal.
5161 // If the comparison was an awkward floating-point == or != and one of
5162 // the comparison operands is infinity or negative infinity, convert the
5163 // condition to a less-awkward <= or >=.
5164 if (CFP->getValueAPF().isInfinity()) {
5165 bool IsNegInf = CFP->getValueAPF().isNegative();
5167 switch (Cond) {
5168 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5169 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5170 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5171 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5172 default: break;
5173 }
5174 if (NewCond != ISD::SETCC_INVALID &&
5175 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5176 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5177 }
5178 }
5179 }
5180
5181 if (N0 == N1) {
5182 // The sext(setcc()) => setcc() optimization relies on the appropriate
5183 // constant being emitted.
5184 assert(!N0.getValueType().isInteger() &&
5185 "Integer types should be handled by FoldSetCC");
5186
5187 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5188 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5189 if (UOF == 2) // FP operators that are undefined on NaNs.
5190 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5191 if (UOF == unsigned(EqTrue))
5192 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5193 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5194 // if it is not already.
5195 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5196 if (NewCond != Cond &&
5197 (DCI.isBeforeLegalizeOps() ||
5198 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5199 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5200 }
5201
5202 // ~X > ~Y --> Y > X
5203 // ~X < ~Y --> Y < X
5204 // ~X < C --> X > ~C
5205 // ~X > C --> X < ~C
5206 if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5207 N0.getValueType().isInteger()) {
5208 if (isBitwiseNot(N0)) {
5209 if (isBitwiseNot(N1))
5210 return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5211
5214 SDValue Not = DAG.getNOT(dl, N1, OpVT);
5215 return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5216 }
5217 }
5218 }
5219
5220 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5221 N0.getValueType().isInteger()) {
5222 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5223 N0.getOpcode() == ISD::XOR) {
5224 // Simplify (X+Y) == (X+Z) --> Y == Z
5225 if (N0.getOpcode() == N1.getOpcode()) {
5226 if (N0.getOperand(0) == N1.getOperand(0))
5227 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5228 if (N0.getOperand(1) == N1.getOperand(1))
5229 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5230 if (isCommutativeBinOp(N0.getOpcode())) {
5231 // If X op Y == Y op X, try other combinations.
5232 if (N0.getOperand(0) == N1.getOperand(1))
5233 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5234 Cond);
5235 if (N0.getOperand(1) == N1.getOperand(0))
5236 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5237 Cond);
5238 }
5239 }
5240
5241 // If RHS is a legal immediate value for a compare instruction, we need
5242 // to be careful about increasing register pressure needlessly.
5243 bool LegalRHSImm = false;
5244
5245 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5246 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5247 // Turn (X+C1) == C2 --> X == C2-C1
5248 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5249 return DAG.getSetCC(
5250 dl, VT, N0.getOperand(0),
5251 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5252 dl, N0.getValueType()),
5253 Cond);
5254
5255 // Turn (X^C1) == C2 --> X == C1^C2
5256 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5257 return DAG.getSetCC(
5258 dl, VT, N0.getOperand(0),
5259 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5260 dl, N0.getValueType()),
5261 Cond);
5262 }
5263
5264 // Turn (C1-X) == C2 --> X == C1-C2
5265 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5266 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5267 return DAG.getSetCC(
5268 dl, VT, N0.getOperand(1),
5269 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5270 dl, N0.getValueType()),
5271 Cond);
5272
5273 // Could RHSC fold directly into a compare?
5274 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5275 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5276 }
5277
5278 // (X+Y) == X --> Y == 0 and similar folds.
5279 // Don't do this if X is an immediate that can fold into a cmp
5280 // instruction and X+Y has other uses. It could be an induction variable
5281 // chain, and the transform would increase register pressure.
5282 if (!LegalRHSImm || N0.hasOneUse())
5283 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5284 return V;
5285 }
5286
5287 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5288 N1.getOpcode() == ISD::XOR)
5289 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5290 return V;
5291
5292 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5293 return V;
5294 }
5295
5296 // Fold remainder of division by a constant.
5297 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5298 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5299 // When division is cheap or optimizing for minimum size,
5300 // fall through to DIVREM creation by skipping this fold.
5301 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5302 if (N0.getOpcode() == ISD::UREM) {
5303 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5304 return Folded;
5305 } else if (N0.getOpcode() == ISD::SREM) {
5306 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5307 return Folded;
5308 }
5309 }
5310 }
5311
5312 // Fold away ALL boolean setcc's.
5313 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5314 SDValue Temp;
5315 switch (Cond) {
5316 default: llvm_unreachable("Unknown integer setcc!");
5317 case ISD::SETEQ: // X == Y -> ~(X^Y)
5318 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5319 N0 = DAG.getNOT(dl, Temp, OpVT);
5320 if (!DCI.isCalledByLegalizer())
5321 DCI.AddToWorklist(Temp.getNode());
5322 break;
5323 case ISD::SETNE: // X != Y --> (X^Y)
5324 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5325 break;
5326 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5327 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5328 Temp = DAG.getNOT(dl, N0, OpVT);
5329 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5330 if (!DCI.isCalledByLegalizer())
5331 DCI.AddToWorklist(Temp.getNode());
5332 break;
5333 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5334 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5335 Temp = DAG.getNOT(dl, N1, OpVT);
5336 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5337 if (!DCI.isCalledByLegalizer())
5338 DCI.AddToWorklist(Temp.getNode());
5339 break;
5340 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5341 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5342 Temp = DAG.getNOT(dl, N0, OpVT);
5343 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5344 if (!DCI.isCalledByLegalizer())
5345 DCI.AddToWorklist(Temp.getNode());
5346 break;
5347 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5348 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5349 Temp = DAG.getNOT(dl, N1, OpVT);
5350 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5351 break;
5352 }
5353 if (VT.getScalarType() != MVT::i1) {
5354 if (!DCI.isCalledByLegalizer())
5355 DCI.AddToWorklist(N0.getNode());
5356 // FIXME: If running after legalize, we probably can't do this.
5358 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5359 }
5360 return N0;
5361 }
5362
5363 // Could not fold it.
5364 return SDValue();
5365}
5366
5367/// Returns true (and the GlobalValue and the offset) if the node is a
5368/// GlobalAddress + offset.
5370 int64_t &Offset) const {
5371
5372 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5373
5374 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5375 GA = GASD->getGlobal();
5376 Offset += GASD->getOffset();
5377 return true;
5378 }
5379
5380 if (N->getOpcode() == ISD::ADD) {
5381 SDValue N1 = N->getOperand(0);
5382 SDValue N2 = N->getOperand(1);
5383 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5384 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5385 Offset += V->getSExtValue();
5386 return true;
5387 }
5388 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5389 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5390 Offset += V->getSExtValue();
5391 return true;
5392 }
5393 }
5394 }
5395
5396 return false;
5397}
5398
5400 DAGCombinerInfo &DCI) const {
5401 // Default implementation: no optimization.
5402 return SDValue();
5403}
5404
5405//===----------------------------------------------------------------------===//
5406// Inline Assembler Implementation Methods
5407//===----------------------------------------------------------------------===//
5408
5411 unsigned S = Constraint.size();
5412
5413 if (S == 1) {
5414 switch (Constraint[0]) {
5415 default: break;
5416 case 'r':
5417 return C_RegisterClass;
5418 case 'm': // memory
5419 case 'o': // offsetable
5420 case 'V': // not offsetable
5421 return C_Memory;
5422 case 'p': // Address.
5423 return C_Address;
5424 case 'n': // Simple Integer
5425 case 'E': // Floating Point Constant
5426 case 'F': // Floating Point Constant
5427 return C_Immediate;
5428 case 'i': // Simple Integer or Relocatable Constant
5429 case 's': // Relocatable Constant
5430 case 'X': // Allow ANY value.
5431 case 'I': // Target registers.
5432 case 'J':
5433 case 'K':
5434 case 'L':
5435 case 'M':
5436 case 'N':
5437 case 'O':
5438 case 'P':
5439 case '<':
5440 case '>':
5441 return C_Other;
5442 }
5443 }
5444
5445 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5446 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5447 return C_Memory;
5448 return C_Register;
5449 }
5450 return C_Unknown;
5451}
5452
5453/// Try to replace an X constraint, which matches anything, with another that
5454/// has more specific requirements based on the type of the corresponding
5455/// operand.
5456const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5457 if (ConstraintVT.isInteger())
5458 return "r";
5459 if (ConstraintVT.isFloatingPoint())
5460 return "f"; // works for many targets
5461 return nullptr;
5462}
5463
5465 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5466 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5467 return SDValue();
5468}
5469
5470/// Lower the specified operand into the Ops vector.
5471/// If it is invalid, don't add anything to Ops.
5473 StringRef Constraint,
5474 std::vector<SDValue> &Ops,
5475 SelectionDAG &DAG) const {
5476
5477 if (Constraint.size() > 1)
5478 return;
5479
5480 char ConstraintLetter = Constraint[0];
5481 switch (ConstraintLetter) {
5482 default: break;
5483 case 'X': // Allows any operand
5484 case 'i': // Simple Integer or Relocatable Constant
5485 case 'n': // Simple Integer
5486 case 's': { // Relocatable Constant
5487
5489 uint64_t Offset = 0;
5490
5491 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5492 // etc., since getelementpointer is variadic. We can't use
5493 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5494 // while in this case the GA may be furthest from the root node which is
5495 // likely an ISD::ADD.
5496 while (true) {
5497 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5498 // gcc prints these as sign extended. Sign extend value to 64 bits
5499 // now; without this it would get ZExt'd later in
5500 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5501 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5502 BooleanContent BCont = getBooleanContents(MVT::i64);
5503 ISD::NodeType ExtOpc =
5504 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5505 int64_t ExtVal =
5506 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5507 Ops.push_back(
5508 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5509 return;
5510 }
5511 if (ConstraintLetter != 'n') {
5512 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5513 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5514 GA->getValueType(0),
5515 Offset + GA->getOffset()));
5516 return;
5517 }
5518 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5519 Ops.push_back(DAG.getTargetBlockAddress(
5520 BA->getBlockAddress(), BA->getValueType(0),
5521 Offset + BA->getOffset(), BA->getTargetFlags()));
5522 return;
5523 }
5524 if (isa<BasicBlockSDNode>(Op)) {
5525 Ops.push_back(Op);
5526 return;
5527 }
5528 }
5529 const unsigned OpCode = Op.getOpcode();
5530 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5531 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5532 Op = Op.getOperand(1);
5533 // Subtraction is not commutative.
5534 else if (OpCode == ISD::ADD &&
5535 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5536 Op = Op.getOperand(0);
5537 else
5538 return;
5539 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5540 continue;
5541 }
5542 return;
5543 }
5544 break;
5545 }
5546 }
5547}
5548
5550 const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5551}
5552
5553std::pair<unsigned, const TargetRegisterClass *>
5555 StringRef Constraint,
5556 MVT VT) const {
5557 if (!Constraint.starts_with("{"))
5558 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5559 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5560
5561 // Remove the braces from around the name.
5562 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5563
5564 std::pair<unsigned, const TargetRegisterClass *> R =
5565 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5566
5567 // Figure out which register class contains this reg.
5568 for (const TargetRegisterClass *RC : RI->regclasses()) {
5569 // If none of the value types for this register class are valid, we
5570 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5571 if (!isLegalRC(*RI, *RC))
5572 continue;
5573
5574 for (const MCPhysReg &PR : *RC) {
5575 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5576 std::pair<unsigned, const TargetRegisterClass *> S =
5577 std::make_pair(PR, RC);
5578
5579 // If this register class has the requested value type, return it,
5580 // otherwise keep searching and return the first class found
5581 // if no other is found which explicitly has the requested type.
5582 if (RI->isTypeLegalForClass(*RC, VT))
5583 return S;
5584 if (!R.second)
5585 R = S;
5586 }
5587 }
5588 }
5589
5590 return R;
5591}
5592
5593//===----------------------------------------------------------------------===//
5594// Constraint Selection.
5595
5596/// Return true of this is an input operand that is a matching constraint like
5597/// "4".
5599 assert(!ConstraintCode.empty() && "No known constraint!");
5600 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5601}
5602
5603/// If this is an input matching constraint, this method returns the output
5604/// operand it matches.
5606 assert(!ConstraintCode.empty() && "No known constraint!");
5607 return atoi(ConstraintCode.c_str());
5608}
5609
5610/// Split up the constraint string from the inline assembly value into the
5611/// specific constraints and their prefixes, and also tie in the associated
5612/// operand values.
5613/// If this returns an empty vector, and if the constraint string itself
5614/// isn't empty, there was an error parsing.
5617 const TargetRegisterInfo *TRI,
5618 const CallBase &Call) const {
5619 /// Information about all of the constraints.
5620 AsmOperandInfoVector ConstraintOperands;
5621 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5622 unsigned maCount = 0; // Largest number of multiple alternative constraints.
5623
5624 // Do a prepass over the constraints, canonicalizing them, and building up the
5625 // ConstraintOperands list.
5626 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5627 unsigned ResNo = 0; // ResNo - The result number of the next output.
5628 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5629
5630 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5631 ConstraintOperands.emplace_back(std::move(CI));
5632 AsmOperandInfo &OpInfo = ConstraintOperands.back();
5633
5634 // Update multiple alternative constraint count.
5635 if (OpInfo.multipleAlternatives.size() > maCount)
5636 maCount = OpInfo.multipleAlternatives.size();
5637
5638 OpInfo.ConstraintVT = MVT::Other;
5639
5640 // Compute the value type for each operand.
5641 switch (OpInfo.Type) {
5643 // Indirect outputs just consume an argument.
5644 if (OpInfo.isIndirect) {
5645 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5646 break;
5647 }
5648
5649 // The return value of the call is this value. As such, there is no
5650 // corresponding argument.
5651 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5652 if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
5653 OpInfo.ConstraintVT =
5654 getSimpleValueType(DL, STy->getElementType(ResNo));
5655 } else {
5656 assert(ResNo == 0 && "Asm only has one result!");
5657 OpInfo.ConstraintVT =
5658 getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
5659 }
5660 ++ResNo;
5661 break;
5662 case InlineAsm::isInput:
5663 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5664 break;
5665 case InlineAsm::isLabel:
5666 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5667 ++LabelNo;
5668 continue;
5670 // Nothing to do.
5671 break;
5672 }
5673
5674 if (OpInfo.CallOperandVal) {
5675 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5676 if (OpInfo.isIndirect) {
5677 OpTy = Call.getParamElementType(ArgNo);
5678 assert(OpTy && "Indirect operand must have elementtype attribute");
5679 }
5680
5681 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5682 if (StructType *STy = dyn_cast<StructType>(OpTy))
5683 if (STy->getNumElements() == 1)
5684 OpTy = STy->getElementType(0);
5685
5686 // If OpTy is not a single value, it may be a struct/union that we
5687 // can tile with integers.
5688 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5689 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
5690 switch (BitSize) {
5691 default: break;
5692 case 1:
5693 case 8:
5694 case 16:
5695 case 32:
5696 case 64:
5697 case 128:
5698 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
5699 break;
5700 }
5701 }
5702
5703 EVT VT = getAsmOperandValueType(DL, OpTy, true);
5704 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5705 ArgNo++;
5706 }
5707 }
5708
5709 // If we have multiple alternative constraints, select the best alternative.
5710 if (!ConstraintOperands.empty()) {
5711 if (maCount) {
5712 unsigned bestMAIndex = 0;
5713 int bestWeight = -1;
5714 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
5715 int weight = -1;
5716 unsigned maIndex;
5717 // Compute the sums of the weights for each alternative, keeping track
5718 // of the best (highest weight) one so far.
5719 for (maIndex = 0; maIndex < maCount; ++maIndex) {
5720 int weightSum = 0;
5721 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5722 cIndex != eIndex; ++cIndex) {
5723 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5724 if (OpInfo.Type == InlineAsm::isClobber)
5725 continue;
5726
5727 // If this is an output operand with a matching input operand,
5728 // look up the matching input. If their types mismatch, e.g. one
5729 // is an integer, the other is floating point, or their sizes are
5730 // different, flag it as an maCantMatch.
5731 if (OpInfo.hasMatchingInput()) {
5732 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5733 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5734 if ((OpInfo.ConstraintVT.isInteger() !=
5735 Input.ConstraintVT.isInteger()) ||
5736 (OpInfo.ConstraintVT.getSizeInBits() !=
5737 Input.ConstraintVT.getSizeInBits())) {
5738 weightSum = -1; // Can't match.
5739 break;
5740 }
5741 }
5742 }
5743 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
5744 if (weight == -1) {
5745 weightSum = -1;
5746 break;
5747 }
5748 weightSum += weight;
5749 }
5750 // Update best.
5751 if (weightSum > bestWeight) {
5752 bestWeight = weightSum;
5753 bestMAIndex = maIndex;
5754 }
5755 }
5756
5757 // Now select chosen alternative in each constraint.
5758 for (AsmOperandInfo &cInfo : ConstraintOperands)
5759 if (cInfo.Type != InlineAsm::isClobber)
5760 cInfo.selectAlternative(bestMAIndex);
5761 }
5762 }
5763
5764 // Check and hook up tied operands, choose constraint code to use.
5765 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5766 cIndex != eIndex; ++cIndex) {
5767 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5768
5769 // If this is an output operand with a matching input operand, look up the
5770 // matching input. If their types mismatch, e.g. one is an integer, the
5771 // other is floating point, or their sizes are different, flag it as an
5772 // error.
5773 if (OpInfo.hasMatchingInput()) {
5774 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5775
5776 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5777 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5778 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
5779 OpInfo.ConstraintVT);
5780 std::pair<unsigned, const TargetRegisterClass *> InputRC =
5781 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
5782 Input.ConstraintVT);
5783 if ((OpInfo.ConstraintVT.isInteger() !=
5784 Input.ConstraintVT.isInteger()) ||
5785 (MatchRC.second != InputRC.second)) {
5786 report_fatal_error("Unsupported asm: input constraint"
5787 " with a matching output constraint of"
5788 " incompatible type!");
5789 }
5790 }
5791 }
5792 }
5793
5794 return ConstraintOperands;
5795}
5796
5797/// Return a number indicating our preference for chosing a type of constraint
5798/// over another, for the purpose of sorting them. Immediates are almost always
5799/// preferrable (when they can be emitted). A higher return value means a
5800/// stronger preference for one constraint type relative to another.
5801/// FIXME: We should prefer registers over memory but doing so may lead to
5802/// unrecoverable register exhaustion later.
5803/// https://github.com/llvm/llvm-project/issues/20571
5805 switch (CT) {
5808 return 4;
5811 return 3;
5813 return 2;
5815 return 1;
5817 return 0;
5818 }
5819 llvm_unreachable("Invalid constraint type");
5820}
5821
5822/// Examine constraint type and operand type and determine a weight value.
5823/// This object must already have been set up with the operand type
5824/// and the current alternative constraint selected.
5827 AsmOperandInfo &info, int maIndex) const {
5829 if (maIndex >= (int)info.multipleAlternatives.size())
5830 rCodes = &info.Codes;
5831 else
5832 rCodes = &info.multipleAlternatives[maIndex].Codes;
5833 ConstraintWeight BestWeight = CW_Invalid;
5834
5835 // Loop over the options, keeping track of the most general one.
5836 for (const std::string &rCode : *rCodes) {
5837 ConstraintWeight weight =
5838 getSingleConstraintMatchWeight(info, rCode.c_str());
5839 if (weight > BestWeight)
5840 BestWeight = weight;
5841 }
5842
5843 return BestWeight;
5844}
5845
5846/// Examine constraint type and operand type and determine a weight value.
5847/// This object must already have been set up with the operand type
5848/// and the current alternative constraint selected.
5851 AsmOperandInfo &info, const char *constraint) const {
5852 ConstraintWeight weight = CW_Invalid;
5853 Value *CallOperandVal = info.CallOperandVal;
5854 // If we don't have a value, we can't do a match,
5855 // but allow it at the lowest weight.
5856 if (!CallOperandVal)
5857 return CW_Default;
5858 // Look at the constraint type.
5859 switch (*constraint) {
5860 case 'i': // immediate integer.
5861 case 'n': // immediate integer with a known value.
5862 if (isa<ConstantInt>(CallOperandVal))
5863 weight = CW_Constant;
5864 break;
5865 case 's': // non-explicit intregal immediate.
5866 if (isa<GlobalValue>(CallOperandVal))
5867 weight = CW_Constant;
5868 break;
5869 case 'E': // immediate float if host format.
5870 case 'F': // immediate float.
5871 if (isa<ConstantFP>(CallOperandVal))
5872 weight = CW_Constant;
5873 break;
5874 case '<': // memory operand with autodecrement.
5875 case '>': // memory operand with autoincrement.
5876 case 'm': // memory operand.
5877 case 'o': // offsettable memory operand
5878 case 'V': // non-offsettable memory operand
5879 weight = CW_Memory;
5880 break;
5881 case 'r': // general register.
5882 case 'g': // general register, memory operand or immediate integer.
5883 // note: Clang converts "g" to "imr".
5884 if (CallOperandVal->getType()->isIntegerTy())
5885 weight = CW_Register;
5886 break;
5887 case 'X': // any operand.
5888 default:
5889 weight = CW_Default;
5890 break;
5891 }
5892 return weight;
5893}
5894
5895/// If there are multiple different constraints that we could pick for this
5896/// operand (e.g. "imr") try to pick the 'best' one.
5897/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
5898/// into seven classes:
5899/// Register -> one specific register
5900/// RegisterClass -> a group of regs
5901/// Memory -> memory
5902/// Address -> a symbolic memory reference
5903/// Immediate -> immediate values
5904/// Other -> magic values (such as "Flag Output Operands")
5905/// Unknown -> something we don't recognize yet and can't handle
5906/// Ideally, we would pick the most specific constraint possible: if we have
5907/// something that fits into a register, we would pick it. The problem here
5908/// is that if we have something that could either be in a register or in
5909/// memory that use of the register could cause selection of *other*
5910/// operands to fail: they might only succeed if we pick memory. Because of
5911/// this the heuristic we use is:
5912///
5913/// 1) If there is an 'other' constraint, and if the operand is valid for
5914/// that constraint, use it. This makes us take advantage of 'i'
5915/// constraints when available.
5916/// 2) Otherwise, pick the most general constraint present. This prefers
5917/// 'm' over 'r', for example.
5918///
5920 TargetLowering::AsmOperandInfo &OpInfo) const {
5921 ConstraintGroup Ret;
5922
5923 Ret.reserve(OpInfo.Codes.size());
5924 for (StringRef Code : OpInfo.Codes) {
5925 TargetLowering::ConstraintType CType = getConstraintType(Code);
5926
5927 // Indirect 'other' or 'immediate' constraints are not allowed.
5928 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
5929 CType == TargetLowering::C_Register ||
5931 continue;
5932
5933 // Things with matching constraints can only be registers, per gcc
5934 // documentation. This mainly affects "g" constraints.
5935 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
5936 continue;
5937
5938 Ret.emplace_back(Code, CType);
5939 }
5940
5941 std::stable_sort(
5942 Ret.begin(), Ret.end(), [](ConstraintPair a, ConstraintPair b) {
5943 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
5944 });
5945
5946 return Ret;
5947}
5948
5949/// If we have an immediate, see if we can lower it. Return true if we can,
5950/// false otherwise.
5952 SDValue Op, SelectionDAG *DAG,
5953 const TargetLowering &TLI) {
5954
5955 assert((P.second == TargetLowering::C_Other ||
5956 P.second == TargetLowering::C_Immediate) &&
5957 "need immediate or other");
5958
5959 if (!Op.getNode())
5960 return false;
5961
5962 std::vector<SDValue> ResultOps;
5963 TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
5964 return !ResultOps.empty();
5965}
5966
5967/// Determines the constraint code and constraint type to use for the specific
5968/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
5970 SDValue Op,
5971 SelectionDAG *DAG) const {
5972 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
5973
5974 // Single-letter constraints ('r') are very common.
5975 if (OpInfo.Codes.size() == 1) {
5976 OpInfo.ConstraintCode = OpInfo.Codes[0];
5977 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
5978 } else {
5979 ConstraintGroup G = getConstraintPreferences(OpInfo);
5980 if (G.empty())
5981 return;
5982
5983 unsigned BestIdx = 0;
5984 for (const unsigned E = G.size();
5985 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
5986 G[BestIdx].second == TargetLowering::C_Immediate);
5987 ++BestIdx) {
5988 if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
5989 break;
5990 // If we're out of constraints, just pick the first one.
5991 if (BestIdx + 1 == E) {
5992 BestIdx = 0;
5993 break;
5994 }
5995 }
5996
5997 OpInfo.ConstraintCode = G[BestIdx].first;
5998 OpInfo.ConstraintType = G[BestIdx].second;
5999 }
6000
6001 // 'X' matches anything.
6002 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6003 // Constants are handled elsewhere. For Functions, the type here is the
6004 // type of the result, which is not what we want to look at; leave them
6005 // alone.
6006 Value *v = OpInfo.CallOperandVal;
6007 if (isa<ConstantInt>(v) || isa<Function>(v)) {
6008 return;
6009 }
6010
6011 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6012 OpInfo.ConstraintCode = "i";
6013 return;
6014 }
6015
6016 // Otherwise, try to resolve it to something we know about by looking at
6017 // the actual operand type.
6018 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6019 OpInfo.ConstraintCode = Repl;
6020 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6021 }
6022 }
6023}
6024
6025/// Given an exact SDIV by a constant, create a multiplication
6026/// with the multiplicative inverse of the constant.
6028 const SDLoc &dl, SelectionDAG &DAG,
6029 SmallVectorImpl<SDNode *> &Created) {
6030 SDValue Op0 = N->getOperand(0);
6031 SDValue Op1 = N->getOperand(1);
6032 EVT VT = N->getValueType(0);
6033 EVT SVT = VT.getScalarType();
6034 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6035 EVT ShSVT = ShVT.getScalarType();
6036
6037 bool UseSRA = false;
6038 SmallVector<SDValue, 16> Shifts, Factors;
6039
6040 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6041 if (C->isZero())
6042 return false;
6043 APInt Divisor = C->getAPIntValue();
6044 unsigned Shift = Divisor.countr_zero();
6045 if (Shift) {
6046 Divisor.ashrInPlace(Shift);
6047 UseSRA = true;
6048 }
6049 // Calculate the multiplicative inverse, using Newton's method.
6050 APInt t;
6051 APInt Factor = Divisor;
6052 while ((t = Divisor * Factor) != 1)
6053 Factor *= APInt(Divisor.getBitWidth(), 2) - t;
6054 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6055 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6056 return true;
6057 };
6058
6059 // Collect all magic values from the build vector.
6060 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6061 return SDValue();
6062
6063 SDValue Shift, Factor;
6064 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6065 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6066 Factor = DAG.getBuildVector(VT, dl, Factors);
6067 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6068 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6069 "Expected matchUnaryPredicate to return one element for scalable "
6070 "vectors");
6071 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6072 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6073 } else {
6074 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6075 Shift = Shifts[0];
6076 Factor = Factors[0];
6077 }
6078
6079 SDValue Res = Op0;
6080
6081 // Shift the value upfront if it is even, so the LSB is one.
6082 if (UseSRA) {
6083 // TODO: For UDIV use SRL instead of SRA.
6084 SDNodeFlags Flags;
6085 Flags.setExact(true);
6086 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
6087 Created.push_back(Res.getNode());
6088 }
6089
6090 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6091}
6092
6094 SelectionDAG &DAG,
6095 SmallVectorImpl<SDNode *> &Created) const {
6097 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6098 if (TLI.isIntDivCheap(N->getValueType(0), Attr))
6099 return SDValue(N, 0); // Lower SDIV as SDIV
6100 return SDValue();
6101}
6102
6103SDValue
6105 SelectionDAG &DAG,
6106 SmallVectorImpl<SDNode *> &Created) const {
6108 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6109 if (TLI.isIntDivCheap(N->getValueType(0), Attr))
6110 return SDValue(N, 0); // Lower SREM as SREM
6111 return SDValue();
6112}
6113
6114/// Build sdiv by power-of-2 with conditional move instructions
6115/// Ref: "Hacker's Delight" by Henry Warren 10-1
6116/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6117/// bgez x, label
6118/// add x, x, 2**k-1
6119/// label:
6120/// sra res, x, k
6121/// neg res, res (when the divisor is negative)
6123 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6124 SmallVectorImpl<SDNode *> &Created) const {
6125 unsigned Lg2 = Divisor.countr_zero();
6126 EVT VT = N->getValueType(0);
6127
6128 SDLoc DL(N);
6129 SDValue N0 = N->getOperand(0);
6130 SDValue Zero = DAG.getConstant(0, DL, VT);
6131 APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6132 SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6133
6134 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6135 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6136 SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6137 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6138 SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6139
6140 Created.push_back(Cmp.getNode());
6141 Created.push_back(Add.getNode());
6142 Created.push_back(CMov.getNode());
6143
6144 // Divide by pow2.
6145 SDValue SRA =
6146 DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT));
6147
6148 // If we're dividing by a positive value, we're done. Otherwise, we must
6149 // negate the result.
6150 if (Divisor.isNonNegative())
6151 return SRA;
6152
6153 Created.push_back(SRA.getNode());
6154 return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6155}
6156
6157/// Given an ISD::SDIV node expressing a divide by constant,
6158/// return a DAG expression to select that will generate the same value by
6159/// multiplying by a magic number.
6160/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6162 bool IsAfterLegalization,
6163 SmallVectorImpl<SDNode *> &Created) const {
6164 SDLoc dl(N);
6165 EVT VT = N->getValueType(0);
6166 EVT SVT = VT.getScalarType();
6167 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6168 EVT ShSVT = ShVT.getScalarType();
6169 unsigned EltBits = VT.getScalarSizeInBits();
6170 EVT MulVT;
6171
6172 // Check to see if we can do this.
6173 // FIXME: We should be more aggressive here.
6174 if (!isTypeLegal(VT)) {
6175 // Limit this to simple scalars for now.
6176 if (VT.isVector() || !VT.isSimple())
6177 return SDValue();
6178
6179 // If this type will be promoted to a large enough type with a legal
6180 // multiply operation, we can go ahead and do this transform.
6182 return SDValue();
6183
6184 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6185 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6186 !isOperationLegal(ISD::MUL, MulVT))
6187 return SDValue();
6188 }
6189
6190 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6191 if (N->getFlags().hasExact())
6192 return BuildExactSDIV(*this, N, dl, DAG, Created);
6193
6194 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6195
6196 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6197 if (C->isZero())
6198 return false;
6199
6200 const APInt &Divisor = C->getAPIntValue();
6202 int NumeratorFactor = 0;
6203 int ShiftMask = -1;
6204
6205 if (Divisor.isOne() || Divisor.isAllOnes()) {
6206 // If d is +1/-1, we just multiply the numerator by +1/-1.
6207 NumeratorFactor = Divisor.getSExtValue();
6208 magics.Magic = 0;
6209 magics.ShiftAmount = 0;
6210 ShiftMask = 0;
6211 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6212 // If d > 0 and m < 0, add the numerator.
6213 NumeratorFactor = 1;
6214 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6215 // If d < 0 and m > 0, subtract the numerator.
6216 NumeratorFactor = -1;
6217 }
6218
6219 MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6220 Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
6221 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6222 ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
6223 return true;
6224 };
6225
6226 SDValue N0 = N->getOperand(0);
6227 SDValue N1 = N->getOperand(1);
6228
6229 // Collect the shifts / magic values from each element.
6230 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
6231 return SDValue();
6232
6233 SDValue MagicFactor, Factor, Shift, ShiftMask;
6234 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6235 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6236 Factor = DAG.getBuildVector(VT, dl, Factors);
6237 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6238 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6239 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6240 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6241 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6242 "Expected matchUnaryPredicate to return one element for scalable "
6243 "vectors");
6244 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6245 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6246 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6247 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6248 } else {
6249 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6250 MagicFactor = MagicFactors[0];
6251 Factor = Factors[0];
6252 Shift = Shifts[0];
6253 ShiftMask = ShiftMasks[0];
6254 }
6255
6256 // Multiply the numerator (operand 0) by the magic value.
6257 // FIXME: We should support doing a MUL in a wider type.
6258 auto GetMULHS = [&](SDValue X, SDValue Y) {
6259 // If the type isn't legal, use a wider mul of the type calculated
6260 // earlier.
6261 if (!isTypeLegal(VT)) {
6262 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6263 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6264 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6265 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6266 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6267 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6268 }
6269
6270 if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6271 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6272 if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6273 SDValue LoHi =
6274 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6275 return SDValue(LoHi.getNode(), 1);
6276 }
6277 // If type twice as wide legal, widen and use a mul plus a shift.
6278 unsigned Size = VT.getScalarSizeInBits();
6279 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6280 if (VT.isVector())
6281 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6283 if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6284 X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
6285 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6286 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6287 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6288 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6289 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6290 }
6291 return SDValue();
6292 };
6293
6294 SDValue Q = GetMULHS(N0, MagicFactor);
6295 if (!Q)
6296 return SDValue();
6297
6298 Created.push_back(Q.getNode());
6299
6300 // (Optionally) Add/subtract the numerator using Factor.
6301 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6302 Created.push_back(Factor.getNode());
6303 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6304 Created.push_back(Q.getNode());
6305
6306 // Shift right algebraic by shift value.
6307 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6308 Created.push_back(Q.getNode());
6309
6310 // Extract the sign bit, mask it and add it to the quotient.
6311 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6312 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6313 Created.push_back(T.getNode());
6314 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6315 Created.push_back(T.getNode());
6316 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6317}
6318
6319/// Given an ISD::UDIV node expressing a divide by constant,
6320/// return a DAG expression to select that will generate the same value by
6321/// multiplying by a magic number.
6322/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6324 bool IsAfterLegalization,
6325 SmallVectorImpl<SDNode *> &Created) const {
6326 SDLoc dl(N);
6327 EVT VT = N->getValueType(0);
6328 EVT SVT = VT.getScalarType();
6329 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6330 EVT ShSVT = ShVT.getScalarType();
6331 unsigned EltBits = VT.getScalarSizeInBits();
6332 EVT MulVT;
6333
6334 // Check to see if we can do this.
6335 // FIXME: We should be more aggressive here.
6336 if (!isTypeLegal(VT)) {
6337 // Limit this to simple scalars for now.
6338 if (VT.isVector() || !VT.isSimple())
6339 return SDValue();
6340
6341 // If this type will be promoted to a large enough type with a legal
6342 // multiply operation, we can go ahead and do this transform.
6344 return SDValue();
6345
6346 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6347 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6348 !isOperationLegal(ISD::MUL, MulVT))
6349 return SDValue();
6350 }
6351
6352 SDValue N0 = N->getOperand(0);
6353 SDValue N1 = N->getOperand(1);
6354
6355 // Try to use leading zeros of the dividend to reduce the multiplier and
6356 // avoid expensive fixups.
6357 // TODO: Support vectors.
6358 unsigned LeadingZeros = 0;
6359 if (!VT.isVector() && isa<ConstantSDNode>(N1)) {
6360 assert(!isOneConstant(N1) && "Unexpected divisor");
6361 LeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6362 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros in
6363 // the dividend exceeds the leading zeros for the divisor.
6364 LeadingZeros = std::min(LeadingZeros, N1->getAsAPIntVal().countl_zero());
6365 }
6366
6367 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6368 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6369
6370 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6371 if (C->isZero())
6372 return false;
6373 const APInt& Divisor = C->getAPIntValue();
6374
6375 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6376
6377 // Magic algorithm doesn't work for division by 1. We need to emit a select
6378 // at the end.
6379 if (Divisor.isOne()) {
6380 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6381 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6382 } else {
6384 UnsignedDivisionByConstantInfo::get(Divisor, LeadingZeros);
6385
6386 MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6387
6388 assert(magics.PreShift < Divisor.getBitWidth() &&
6389 "We shouldn't generate an undefined shift!");
6390 assert(magics.PostShift < Divisor.getBitWidth() &&
6391 "We shouldn't generate an undefined shift!");
6392 assert((!magics.IsAdd || magics.PreShift == 0) &&
6393 "Unexpected pre-shift");
6394 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6395 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6396 NPQFactor = DAG.getConstant(
6397 magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
6398 : APInt::getZero(EltBits),
6399 dl, SVT);
6400 UseNPQ |= magics.IsAdd;
6401 UsePreShift |= magics.PreShift != 0;
6402 UsePostShift |= magics.PostShift != 0;
6403 }
6404
6405 PreShifts.push_back(PreShift);
6406 MagicFactors.push_back(MagicFactor);
6407 NPQFactors.push_back(NPQFactor);
6408 PostShifts.push_back(PostShift);
6409 return true;
6410 };
6411
6412 // Collect the shifts/magic values from each element.
6413 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6414 return SDValue();
6415
6416 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6417 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6418 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6419 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6420 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6421 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6422 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6423 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6424 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6425 "Expected matchUnaryPredicate to return one for scalable vectors");
6426 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6427 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6428 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6429 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6430 } else {
6431 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6432 PreShift = PreShifts[0];
6433 MagicFactor = MagicFactors[0];
6434 PostShift = PostShifts[0];
6435 }
6436
6437 SDValue Q = N0;
6438 if (UsePreShift) {
6439 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6440 Created.push_back(Q.getNode());
6441 }
6442
6443 // FIXME: We should support doing a MUL in a wider type.
6444 auto GetMULHU = [&](SDValue X, SDValue Y) {
6445 // If the type isn't legal, use a wider mul of the type calculated
6446 // earlier.
6447 if (!isTypeLegal(VT)) {
6448 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6449 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6450 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6451 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6452 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6453 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6454 }
6455
6456 if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6457 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6458 if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6459 SDValue LoHi =
6460 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6461 return SDValue(LoHi.getNode(), 1);
6462 }
6463 // If type twice as wide legal, widen and use a mul plus a shift.
6464 unsigned Size = VT.getScalarSizeInBits();
6465 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6466 if (VT.isVector())
6467 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6469 if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6470 X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6471 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6472 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6473 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6474 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6475 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6476 }
6477 return SDValue(); // No mulhu or equivalent
6478 };
6479
6480 // Multiply the numerator (operand 0) by the magic value.
6481 Q = GetMULHU(Q, MagicFactor);
6482 if (!Q)
6483 return SDValue();
6484
6485 Created.push_back(Q.getNode());
6486
6487 if (UseNPQ) {
6488 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6489 Created.push_back(NPQ.getNode());
6490
6491 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6492 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6493 if (VT.isVector())
6494 NPQ = GetMULHU(NPQ, NPQFactor);
6495 else
6496 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6497
6498 Created.push_back(NPQ.getNode());
6499
6500 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6501 Created.push_back(Q.getNode());
6502 }
6503
6504 if (UsePostShift) {
6505 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6506 Created.push_back(Q.getNode());
6507 }
6508
6509 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6510
6511 SDValue One = DAG.getConstant(1, dl, VT);
6512 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6513 return DAG.getSelect(dl, VT, IsOne, N0, Q);
6514}
6515
6516/// If all values in Values that *don't* match the predicate are same 'splat'
6517/// value, then replace all values with that splat value.
6518/// Else, if AlternativeReplacement was provided, then replace all values that
6519/// do match predicate with AlternativeReplacement value.
6520static void
6522 std::function<bool(SDValue)> Predicate,
6523 SDValue AlternativeReplacement = SDValue()) {
6524 SDValue Replacement;
6525 // Is there a value for which the Predicate does *NOT* match? What is it?
6526 auto SplatValue = llvm::find_if_not(Values, Predicate);
6527 if (SplatValue != Values.end()) {
6528 // Does Values consist only of SplatValue's and values matching Predicate?
6529 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6530 return Value == *SplatValue || Predicate(Value);
6531 })) // Then we shall replace values matching predicate with SplatValue.
6532 Replacement = *SplatValue;
6533 }
6534 if (!Replacement) {
6535 // Oops, we did not find the "baseline" splat value.
6536 if (!AlternativeReplacement)
6537 return; // Nothing to do.
6538 // Let's replace with provided value then.
6539 Replacement = AlternativeReplacement;
6540 }
6541 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6542}
6543
6544/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6545/// where the divisor is constant and the comparison target is zero,
6546/// return a DAG expression that will generate the same comparison result
6547/// using only multiplications, additions and shifts/rotations.
6548/// Ref: "Hacker's Delight" 10-17.
6549SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6550 SDValue CompTargetNode,
6552 DAGCombinerInfo &DCI,
6553 const SDLoc &DL) const {
6555 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6556 DCI, DL, Built)) {
6557 for (SDNode *N : Built)
6558 DCI.AddToWorklist(N);
6559 return Folded;
6560 }
6561
6562 return SDValue();
6563}
6564
6565SDValue
6566TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6567 SDValue CompTargetNode, ISD::CondCode Cond,
6568 DAGCombinerInfo &DCI, const SDLoc &DL,
6569 SmallVectorImpl<SDNode *> &Created) const {
6570 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6571 // - D must be constant, with D = D0 * 2^K where D0 is odd
6572 // - P is the multiplicative inverse of D0 modulo 2^W
6573 // - Q = floor(((2^W) - 1) / D)
6574 // where W is the width of the common type of N and D.
6575 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6576 "Only applicable for (in)equality comparisons.");
6577
6578 SelectionDAG &DAG = DCI.DAG;
6579
6580 EVT VT = REMNode.getValueType();
6581 EVT SVT = VT.getScalarType();
6582 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
6583 EVT ShSVT = ShVT.getScalarType();
6584
6585 // If MUL is unavailable, we cannot proceed in any case.
6586 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6587 return SDValue();
6588
6589 bool ComparingWithAllZeros = true;
6590 bool AllComparisonsWithNonZerosAreTautological = true;
6591 bool HadTautologicalLanes = false;
6592 bool AllLanesAreTautological = true;
6593 bool HadEvenDivisor = false;
6594 bool AllDivisorsArePowerOfTwo = true;
6595 bool HadTautologicalInvertedLanes = false;
6596 SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
6597
6598 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6599 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6600 if (CDiv->isZero())
6601 return false;
6602
6603 const APInt &D = CDiv->getAPIntValue();
6604 const APInt &Cmp = CCmp->getAPIntValue();
6605
6606 ComparingWithAllZeros &= Cmp.isZero();
6607
6608 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6609 // if C2 is not less than C1, the comparison is always false.
6610 // But we will only be able to produce the comparison that will give the
6611 // opposive tautological answer. So this lane would need to be fixed up.
6612 bool TautologicalInvertedLane = D.ule(Cmp);
6613 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6614
6615 // If all lanes are tautological (either all divisors are ones, or divisor
6616 // is not greater than the constant we are comparing with),
6617 // we will prefer to avoid the fold.
6618 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6619 HadTautologicalLanes |= TautologicalLane;
6620 AllLanesAreTautological &= TautologicalLane;
6621
6622 // If we are comparing with non-zero, we need'll need to subtract said
6623 // comparison value from the LHS. But there is no point in doing that if
6624 // every lane where we are comparing with non-zero is tautological..
6625 if (!Cmp.isZero())
6626 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6627
6628 // Decompose D into D0 * 2^K
6629 unsigned K = D.countr_zero();
6630 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6631 APInt D0 = D.lshr(K);
6632
6633 // D is even if it has trailing zeros.
6634 HadEvenDivisor |= (K != 0);
6635 // D is a power-of-two if D0 is one.
6636 // If all divisors are power-of-two, we will prefer to avoid the fold.
6637 AllDivisorsArePowerOfTwo &= D0.isOne();
6638
6639 // P = inv(D0, 2^W)
6640 // 2^W requires W + 1 bits, so we have to extend and then truncate.
6641 unsigned W = D.getBitWidth();
6642 APInt P = D0.zext(W + 1)
6644 .trunc(W);
6645 assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
6646 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6647
6648 // Q = floor((2^W - 1) u/ D)
6649 // R = ((2^W - 1) u% D)
6650 APInt Q, R;
6652
6653 // If we are comparing with zero, then that comparison constant is okay,
6654 // else it may need to be one less than that.
6655 if (Cmp.ugt(R))
6656 Q -= 1;
6657
6659 "We are expecting that K is always less than all-ones for ShSVT");
6660
6661 // If the lane is tautological the result can be constant-folded.
6662 if (TautologicalLane) {
6663 // Set P and K amount to a bogus values so we can try to splat them.
6664 P = 0;
6665 K = -1;
6666 // And ensure that comparison constant is tautological,
6667 // it will always compare true/false.
6668 Q = -1;
6669 }
6670
6671 PAmts.push_back(DAG.getConstant(P, DL, SVT));
6672 KAmts.push_back(
6673 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
6674 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6675 return true;
6676 };
6677
6678 SDValue N = REMNode.getOperand(0);
6679 SDValue D = REMNode.getOperand(1);
6680
6681 // Collect the values from each element.
6682 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
6683 return SDValue();
6684
6685 // If all lanes are tautological, the result can be constant-folded.
6686 if (AllLanesAreTautological)
6687 return SDValue();
6688
6689 // If this is a urem by a powers-of-two, avoid the fold since it can be
6690 // best implemented as a bit test.
6691 if (AllDivisorsArePowerOfTwo)
6692 return SDValue();
6693
6694 SDValue PVal, KVal, QVal;
6695 if (D.getOpcode() == ISD::BUILD_VECTOR) {
6696 if (HadTautologicalLanes) {
6697 // Try to turn PAmts into a splat, since we don't care about the values
6698 // that are currently '0'. If we can't, just keep '0'`s.
6700 // Try to turn KAmts into a splat, since we don't care about the values
6701 // that are currently '-1'. If we can't, change them to '0'`s.
6703 DAG.getConstant(0, DL, ShSVT));
6704 }
6705
6706 PVal = DAG.getBuildVector(VT, DL, PAmts);
6707 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
6708 QVal = DAG.getBuildVector(VT, DL, QAmts);
6709 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6710 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
6711 "Expected matchBinaryPredicate to return one element for "
6712 "SPLAT_VECTORs");
6713 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
6714 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
6715 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
6716 } else {
6717 PVal = PAmts[0];
6718 KVal = KAmts[0];
6719 QVal = QAmts[0];
6720 }
6721
6722 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6723 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
6724 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
6725 assert(CompTargetNode.getValueType() == N.getValueType() &&
6726 "Expecting that the types on LHS and RHS of comparisons match.");
6727 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
6728 }
6729
6730 // (mul N, P)
6731 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
6732 Created.push_back(Op0.getNode());
6733
6734 // Rotate right only if any divisor was even. We avoid rotates for all-odd
6735 // divisors as a performance improvement, since rotating by 0 is a no-op.
6736 if (HadEvenDivisor) {
6737 // We need ROTR to do this.
6738 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
6739 return SDValue();
6740 // UREM: (rotr (mul N, P), K)
6741 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
6742 Created.push_back(Op0.getNode());
6743 }
6744
6745 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
6746 SDValue NewCC =
6747 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
6749 if (!HadTautologicalInvertedLanes)
6750 return NewCC;
6751
6752 // If any lanes previously compared always-false, the NewCC will give
6753 // always-true result for them, so we need to fixup those lanes.
6754 // Or the other way around for inequality predicate.
6755 assert(VT.isVector() && "Can/should only get here for vectors.");
6756 Created.push_back(NewCC.getNode());
6757
6758 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6759 // if C2 is not less than C1, the comparison is always false.
6760 // But we have produced the comparison that will give the
6761 // opposive tautological answer. So these lanes would need to be fixed up.
6762 SDValue TautologicalInvertedChannels =
6763 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
6764 Created.push_back(TautologicalInvertedChannels.getNode());
6765
6766 // NOTE: we avoid letting illegal types through even if we're before legalize
6767 // ops – legalization has a hard time producing good code for this.
6768 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
6769 // If we have a vector select, let's replace the comparison results in the
6770 // affected lanes with the correct tautological result.
6771 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
6772 DL, SETCCVT, SETCCVT);
6773 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
6774 Replacement, NewCC);
6775 }
6776
6777 // Else, we can just invert the comparison result in the appropriate lanes.
6778 //
6779 // NOTE: see the note above VSELECT above.
6780 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
6781 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
6782 TautologicalInvertedChannels);
6783
6784 return SDValue(); // Don't know how to lower.
6785}
6786
6787/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
6788/// where the divisor is constant and the comparison target is zero,
6789/// return a DAG expression that will generate the same comparison result
6790/// using only multiplications, additions and shifts/rotations.
6791/// Ref: "Hacker's Delight" 10-17.
6792SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
6793 SDValue CompTargetNode,
6795 DAGCombinerInfo &DCI,
6796 const SDLoc &DL) const {
6798 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6799 DCI, DL, Built)) {
6800 assert(Built.size() <= 7 && "Max size prediction failed.");
6801 for (SDNode *N : Built)
6802 DCI.AddToWorklist(N);
6803 return Folded;
6804 }
6805
6806 return SDValue();
6807}
6808
6809SDValue
6810TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
6811 SDValue CompTargetNode, ISD::CondCode Cond,
6812 DAGCombinerInfo &DCI, const SDLoc &DL,
6813 SmallVectorImpl<SDNode *> &Created) const {
6814 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
6815 // Fold:
6816 // (seteq/ne (srem N, D), 0)
6817 // To:
6818 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
6819 //
6820 // - D must be constant, with D = D0 * 2^K where D0 is odd
6821 // - P is the multiplicative inverse of D0 modulo 2^W
6822 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
6823 // - Q = floor((2 * A) / (2^K))
6824 // where W is the width of the common type of N and D.
6825 //
6826 // When D is a power of two (and thus D0 is 1), the normal
6827 // formula for A and Q don't apply, because the derivation
6828 // depends on D not dividing 2^(W-1), and thus theorem ZRS
6829 // does not apply. This specifically fails when N = INT_MIN.
6830 //
6831 // Instead, for power-of-two D, we use:
6832 // - A = 2^(W-1)
6833 // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
6834 // - Q = 2^(W-K) - 1
6835 // |-> Test that the top K bits are zero after rotation
6836 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6837 "Only applicable for (in)equality comparisons.");
6838
6839 SelectionDAG &DAG = DCI.DAG;
6840
6841 EVT VT = REMNode.getValueType();
6842 EVT SVT = VT.getScalarType();
6843 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
6844 EVT ShSVT = ShVT.getScalarType();
6845
6846 // If we are after ops legalization, and MUL is unavailable, we can not
6847 // proceed.
6848 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6849 return SDValue();
6850
6851 // TODO: Could support comparing with non-zero too.
6852 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
6853 if (!CompTarget || !CompTarget->isZero())
6854 return SDValue();
6855
6856 bool HadIntMinDivisor = false;
6857 bool HadOneDivisor = false;
6858 bool AllDivisorsAreOnes = true;
6859 bool HadEvenDivisor = false;
6860 bool NeedToApplyOffset = false;
6861 bool AllDivisorsArePowerOfTwo = true;
6862 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
6863
6864 auto BuildSREMPattern = [&](ConstantSDNode *C) {
6865 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6866 if (C->isZero())
6867 return false;
6868
6869 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
6870
6871 // WARNING: this fold is only valid for positive divisors!
6872 APInt D = C->getAPIntValue();
6873 if (D.isNegative())
6874 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
6875
6876 HadIntMinDivisor |= D.isMinSignedValue();
6877
6878 // If all divisors are ones, we will prefer to avoid the fold.
6879 HadOneDivisor |= D.isOne();
6880 AllDivisorsAreOnes &= D.isOne();
6881
6882 // Decompose D into D0 * 2^K
6883 unsigned K = D.countr_zero();
6884 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6885 APInt D0 = D.lshr(K);
6886
6887 if (!D.isMinSignedValue()) {
6888 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
6889 // we don't care about this lane in this fold, we'll special-handle it.
6890 HadEvenDivisor |= (K != 0);
6891 }
6892
6893 // D is a power-of-two if D0 is one. This includes INT_MIN.
6894 // If all divisors are power-of-two, we will prefer to avoid the fold.
6895 AllDivisorsArePowerOfTwo &= D0.isOne();
6896
6897 // P = inv(D0, 2^W)
6898 // 2^W requires W + 1 bits, so we have to extend and then truncate.
6899 unsigned W = D.getBitWidth();
6900 APInt P = D0.zext(W + 1)
6902 .trunc(W);
6903 assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
6904 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6905
6906 // A = floor((2^(W - 1) - 1) / D0) & -2^K
6908 A.clearLowBits(K);
6909
6910 if (!D.isMinSignedValue()) {
6911 // If divisor INT_MIN, then we don't care about this lane in this fold,
6912 // we'll special-handle it.
6913 NeedToApplyOffset |= A != 0;
6914 }
6915
6916 // Q = floor((2 * A) / (2^K))
6917 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
6918
6920 "We are expecting that A is always less than all-ones for SVT");
6922 "We are expecting that K is always less than all-ones for ShSVT");
6923
6924 // If D was a power of two, apply the alternate constant derivation.
6925 if (D0.isOne()) {
6926 // A = 2^(W-1)
6928 // - Q = 2^(W-K) - 1
6929 Q = APInt::getAllOnes(W - K).zext(W);
6930 }
6931
6932 // If the divisor is 1 the result can be constant-folded. Likewise, we
6933 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
6934 if (D.isOne()) {
6935 // Set P, A and K to a bogus values so we can try to splat them.
6936 P = 0;
6937 A = -1;
6938 K = -1;
6939
6940 // x ?% 1 == 0 <--> true <--> x u<= -1
6941 Q = -1;
6942 }
6943
6944 PAmts.push_back(DAG.getConstant(P, DL, SVT));
6945 AAmts.push_back(DAG.getConstant(A, DL, SVT));
6946 KAmts.push_back(
6947 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
6948 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6949 return true;
6950 };
6951
6952 SDValue N = REMNode.getOperand(0);
6953 SDValue D = REMNode.getOperand(1);
6954
6955 // Collect the values from each element.
6956 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
6957 return SDValue();
6958
6959 // If this is a srem by a one, avoid the fold since it can be constant-folded.
6960 if (AllDivisorsAreOnes)
6961 return SDValue();
6962
6963 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
6964 // since it can be best implemented as a bit test.
6965 if (AllDivisorsArePowerOfTwo)
6966 return SDValue();
6967
6968 SDValue PVal, AVal, KVal, QVal;
6969 if (D.getOpcode() == ISD::BUILD_VECTOR) {
6970 if (HadOneDivisor) {
6971 // Try to turn PAmts into a splat, since we don't care about the values
6972 // that are currently '0'. If we can't, just keep '0'`s.
6974 // Try to turn AAmts into a splat, since we don't care about the
6975 // values that are currently '-1'. If we can't, change them to '0'`s.
6977 DAG.getConstant(0, DL, SVT));
6978 // Try to turn KAmts into a splat, since we don't care about the values
6979 // that are currently '-1'. If we can't, change them to '0'`s.
6981 DAG.getConstant(0, DL, ShSVT));
6982 }
6983
6984 PVal = DAG.getBuildVector(VT, DL, PAmts);
6985 AVal = DAG.getBuildVector(VT, DL, AAmts);
6986 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
6987 QVal = DAG.getBuildVector(VT, DL, QAmts);
6988 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6989 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
6990 QAmts.size() == 1 &&
6991 "Expected matchUnaryPredicate to return one element for scalable "
6992 "vectors");
6993 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
6994 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
6995 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
6996 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
6997 } else {
6998 assert(isa<ConstantSDNode>(D) && "Expected a constant");
6999 PVal = PAmts[0];
7000 AVal = AAmts[0];
7001 KVal = KAmts[0];
7002 QVal = QAmts[0];
7003 }
7004
7005 // (mul N, P)
7006 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7007 Created.push_back(Op0.getNode());
7008
7009 if (NeedToApplyOffset) {
7010 // We need ADD to do this.
7011 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7012 return SDValue();
7013
7014 // (add (mul N, P), A)
7015 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7016 Created.push_back(Op0.getNode());
7017 }
7018
7019 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7020 // divisors as a performance improvement, since rotating by 0 is a no-op.
7021 if (HadEvenDivisor) {
7022 // We need ROTR to do this.
7023 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7024 return SDValue();
7025 // SREM: (rotr (add (mul N, P), A), K)
7026 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7027 Created.push_back(Op0.getNode());
7028 }
7029
7030 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7031 SDValue Fold =
7032 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7034
7035 // If we didn't have lanes with INT_MIN divisor, then we're done.
7036 if (!HadIntMinDivisor)
7037 return Fold;
7038
7039 // That fold is only valid for positive divisors. Which effectively means,
7040 // it is invalid for INT_MIN divisors. So if we have such a lane,
7041 // we must fix-up results for said lanes.
7042 assert(VT.isVector() && "Can/should only get here for vectors.");
7043
7044 // NOTE: we avoid letting illegal types through even if we're before legalize
7045 // ops – legalization has a hard time producing good code for the code that
7046 // follows.
7047 if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7051 return SDValue();
7052
7053 Created.push_back(Fold.getNode());
7054
7055 SDValue IntMin = DAG.getConstant(
7057 SDValue IntMax = DAG.getConstant(
7059 SDValue Zero =
7061
7062 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7063 SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7064 Created.push_back(DivisorIsIntMin.getNode());
7065
7066 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7067 SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7068 Created.push_back(Masked.getNode());
7069 SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7070 Created.push_back(MaskedIsZero.getNode());
7071
7072 // To produce final result we need to blend 2 vectors: 'SetCC' and
7073 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7074 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7075 // constant-folded, select can get lowered to a shuffle with constant mask.
7076 SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7077 MaskedIsZero, Fold);
7078
7079 return Blended;
7080}
7081
7084 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
7085 DAG.getContext()->emitError("argument to '__builtin_return_address' must "
7086 "be a constant integer");
7087 return true;
7088 }
7089
7090 return false;
7091}
7092
7094 const DenormalMode &Mode) const {
7095 SDLoc DL(Op);
7096 EVT VT = Op.getValueType();
7097 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7098 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7099
7100 // This is specifically a check for the handling of denormal inputs, not the
7101 // result.
7102 if (Mode.Input == DenormalMode::PreserveSign ||
7103 Mode.Input == DenormalMode::PositiveZero) {
7104 // Test = X == 0.0
7105 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7106 }
7107
7108 // Testing it with denormal inputs to avoid wrong estimate.
7109 //
7110 // Test = fabs(X) < SmallestNormal
7111 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
7112 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7113 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7114 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
7115 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
7116}
7117
7119 bool LegalOps, bool OptForSize,
7121 unsigned Depth) const {
7122 // fneg is removable even if it has multiple uses.
7123 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7125 return Op.getOperand(0);
7126 }
7127
7128 // Don't recurse exponentially.
7130 return SDValue();
7131
7132 // Pre-increment recursion depth for use in recursive calls.
7133 ++Depth;
7134 const SDNodeFlags Flags = Op->getFlags();
7135 const TargetOptions &Options = DAG.getTarget().Options;
7136 EVT VT = Op.getValueType();
7137 unsigned Opcode = Op.getOpcode();
7138
7139 // Don't allow anything with multiple uses unless we know it is free.
7140 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7141 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7142 isFPExtFree(VT, Op.getOperand(0).getValueType());
7143 if (!IsFreeExtend)
7144 return SDValue();
7145 }
7146
7147 auto RemoveDeadNode = [&](SDValue N) {
7148 if (N && N.getNode()->use_empty())
7149 DAG.RemoveDeadNode(N.getNode());
7150 };
7151
7152 SDLoc DL(Op);
7153
7154 // Because getNegatedExpression can delete nodes we need a handle to keep
7155 // temporary nodes alive in case the recursion manages to create an identical
7156 // node.
7157 std::list<HandleSDNode> Handles;
7158
7159 switch (Opcode) {
7160 case ISD::ConstantFP: {
7161 // Don't invert constant FP values after legalization unless the target says
7162 // the negated constant is legal.
7163 bool IsOpLegal =
7165 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7166 OptForSize);
7167
7168 if (LegalOps && !IsOpLegal)
7169 break;
7170
7171 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7172 V.changeSign();
7173 SDValue CFP = DAG.getConstantFP(V, DL, VT);
7174
7175 // If we already have the use of the negated floating constant, it is free
7176 // to negate it even it has multiple uses.
7177 if (!Op.hasOneUse() && CFP.use_empty())
7178 break;
7180 return CFP;
7181 }
7182 case ISD::BUILD_VECTOR: {
7183 // Only permit BUILD_VECTOR of constants.
7184 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7185 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7186 }))
7187 break;
7188
7189 bool IsOpLegal =
7192 llvm::all_of(Op->op_values(), [&](SDValue N) {
7193 return N.isUndef() ||
7194 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7195 OptForSize);
7196 });
7197
7198 if (LegalOps && !IsOpLegal)
7199 break;
7200
7202 for (SDValue C : Op->op_values()) {
7203 if (C.isUndef()) {
7204 Ops.push_back(C);
7205 continue;
7206 }
7207 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7208 V.changeSign();
7209 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7210 }
7212 return DAG.getBuildVector(VT, DL, Ops);
7213 }
7214 case ISD::FADD: {
7215 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7216 break;
7217
7218 // After operation legalization, it might not be legal to create new FSUBs.
7219 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7220 break;
7221 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7222
7223 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7225 SDValue NegX =
7226 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7227 // Prevent this node from being deleted by the next call.
7228 if (NegX)
7229 Handles.emplace_back(NegX);
7230
7231 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7233 SDValue NegY =
7234 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7235
7236 // We're done with the handles.
7237 Handles.clear();
7238
7239 // Negate the X if its cost is less or equal than Y.
7240 if (NegX && (CostX <= CostY)) {
7241 Cost = CostX;
7242 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7243 if (NegY != N)
7244 RemoveDeadNode(NegY);
7245 return N;
7246 }
7247
7248 // Negate the Y if it is not expensive.
7249 if (NegY) {
7250 Cost = CostY;
7251 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7252 if (NegX != N)
7253 RemoveDeadNode(NegX);
7254 return N;
7255 }
7256 break;
7257 }
7258 case ISD::FSUB: {
7259 // We can't turn -(A-B) into B-A when we honor signed zeros.
7260 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7261 break;
7262
7263 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7264 // fold (fneg (fsub 0, Y)) -> Y
7265 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7266 if (C->isZero()) {
7268 return Y;
7269 }
7270
7271 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7273 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7274 }
7275 case ISD::FMUL:
7276 case ISD::FDIV: {
7277 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7278
7279 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7281 SDValue NegX =
7282 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7283 // Prevent this node from being deleted by the next call.
7284 if (NegX)
7285 Handles.emplace_back(NegX);
7286
7287 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7289 SDValue NegY =
7290 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7291
7292 // We're done with the handles.
7293 Handles.clear();
7294
7295 // Negate the X if its cost is less or equal than Y.
7296 if (NegX && (CostX <= CostY)) {
7297 Cost = CostX;
7298 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7299 if (NegY != N)
7300 RemoveDeadNode(NegY);
7301 return N;
7302 }
7303
7304 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7305 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7306 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7307 break;
7308
7309 // Negate the Y if it is not expensive.
7310 if (NegY) {
7311 Cost = CostY;
7312 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7313 if (NegX != N)
7314 RemoveDeadNode(NegX);
7315 return N;
7316 }
7317 break;
7318 }
7319 case ISD::FMA:
7320 case ISD::FMAD: {
7321 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7322 break;
7323
7324 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7326 SDValue NegZ =
7327 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7328 // Give up if fail to negate the Z.
7329 if (!NegZ)
7330 break;
7331
7332 // Prevent this node from being deleted by the next two calls.
7333 Handles.emplace_back(NegZ);
7334
7335 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7337 SDValue NegX =
7338 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7339 // Prevent this node from being deleted by the next call.
7340 if (NegX)
7341 Handles.emplace_back(NegX);
7342
7343 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7345 SDValue NegY =
7346 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7347
7348 // We're done with the handles.
7349 Handles.clear();
7350
7351 // Negate the X if its cost is less or equal than Y.
7352 if (NegX && (CostX <= CostY)) {
7353 Cost = std::min(CostX, CostZ);
7354 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7355 if (NegY != N)
7356 RemoveDeadNode(NegY);
7357 return N;
7358 }
7359
7360 // Negate the Y if it is not expensive.
7361 if (NegY) {
7362 Cost = std::min(CostY, CostZ);
7363 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7364 if (NegX != N)
7365 RemoveDeadNode(NegX);
7366 return N;
7367 }
7368 break;
7369 }
7370
7371 case ISD::FP_EXTEND:
7372 case ISD::FSIN:
7373 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7374 OptForSize, Cost, Depth))
7375 return DAG.getNode(Opcode, DL, VT, NegV);
7376 break;
7377 case ISD::FP_ROUND:
7378 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7379 OptForSize, Cost, Depth))
7380 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7381 break;
7382 case ISD::SELECT:
7383 case ISD::VSELECT: {
7384 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7385 // iff at least one cost is cheaper and the other is neutral/cheaper
7386 SDValue LHS = Op.getOperand(1);
7388 SDValue NegLHS =
7389 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7390 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7391 RemoveDeadNode(NegLHS);
7392 break;
7393 }
7394
7395 // Prevent this node from being deleted by the next call.
7396 Handles.emplace_back(NegLHS);
7397
7398 SDValue RHS = Op.getOperand(2);
7400 SDValue NegRHS =
7401 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7402
7403 // We're done with the handles.
7404 Handles.clear();
7405
7406 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7407 (CostLHS != NegatibleCost::Cheaper &&
7408 CostRHS != NegatibleCost::Cheaper)) {
7409 RemoveDeadNode(NegLHS);
7410 RemoveDeadNode(NegRHS);
7411 break;
7412 }
7413
7414 Cost = std::min(CostLHS, CostRHS);
7415 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7416 }
7417 }
7418
7419 return SDValue();
7420}
7421
7422//===----------------------------------------------------------------------===//
7423// Legalization Utilities
7424//===----------------------------------------------------------------------===//
7425
7426bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7427 SDValue LHS, SDValue RHS,
7429 EVT HiLoVT, SelectionDAG &DAG,
7430 MulExpansionKind Kind, SDValue LL,
7431 SDValue LH, SDValue RL, SDValue RH) const {
7432 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7433 Opcode == ISD::SMUL_LOHI);
7434
7435 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7437 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7439 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7441 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7443
7444 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7445 return false;
7446
7447 unsigned OuterBitSize = VT.getScalarSizeInBits();
7448 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7449
7450 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7451 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7452 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7453
7454 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7455 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7456 bool Signed) -> bool {
7457 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7458 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7459 Hi = SDValue(Lo.getNode(), 1);
7460 return true;
7461 }
7462 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7463 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7464 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7465 return true;
7466 }
7467 return false;
7468 };
7469
7470 SDValue Lo, Hi;
7471
7472 if (!LL.getNode() && !RL.getNode() &&
7474 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7475 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7476 }
7477
7478 if (!LL.getNode())
7479 return false;
7480
7481 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7482 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7483 DAG.MaskedValueIsZero(RHS, HighMask)) {
7484 // The inputs are both zero-extended.
7485 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7486 Result.push_back(Lo);
7487 Result.push_back(Hi);
7488 if (Opcode != ISD::MUL) {
7489 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7490 Result.push_back(Zero);
7491 Result.push_back(Zero);
7492 }
7493 return true;
7494 }
7495 }
7496
7497 if (!VT.isVector() && Opcode == ISD::MUL &&
7498 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7499 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7500 // The input values are both sign-extended.
7501 // TODO non-MUL case?
7502 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7503 Result.push_back(Lo);
7504 Result.push_back(Hi);
7505 return true;
7506 }
7507 }
7508
7509 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7510 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7511
7512 if (!LH.getNode() && !RH.getNode() &&
7515 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7516 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7517 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7518 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7519 }
7520
7521 if (!LH.getNode())
7522 return false;
7523
7524 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7525 return false;
7526
7527 Result.push_back(Lo);
7528
7529 if (Opcode == ISD::MUL) {
7530 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7531 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7532 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7533 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7534 Result.push_back(Hi);
7535 return true;
7536 }
7537
7538 // Compute the full width result.
7539 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7540 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7541 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7542 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7543 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7544 };
7545
7546 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7547 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7548 return false;
7549
7550 // This is effectively the add part of a multiply-add of half-sized operands,
7551 // so it cannot overflow.
7552 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7553
7554 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7555 return false;
7556
7557 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7558 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7559
7560 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7562 if (UseGlue)
7563 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7564 Merge(Lo, Hi));
7565 else
7566 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7567 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7568
7569 SDValue Carry = Next.getValue(1);
7570 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7571 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7572
7573 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7574 return false;
7575
7576 if (UseGlue)
7577 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7578 Carry);
7579 else
7580 Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
7581 Zero, Carry);
7582
7583 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7584
7585 if (Opcode == ISD::SMUL_LOHI) {
7586 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7587 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7588 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
7589
7590 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7591 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
7592 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
7593 }
7594
7595 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7596 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7597 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7598 return true;
7599}
7600
7602 SelectionDAG &DAG, MulExpansionKind Kind,
7603 SDValue LL, SDValue LH, SDValue RL,
7604 SDValue RH) const {
7606 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
7607 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
7608 DAG, Kind, LL, LH, RL, RH);
7609 if (Ok) {
7610 assert(Result.size() == 2);
7611 Lo = Result[0];
7612 Hi = Result[1];
7613 }
7614 return Ok;
7615}
7616
7617// Optimize unsigned division or remainder by constants for types twice as large
7618// as a legal VT.
7619//
7620// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7621// can be computed
7622// as:
7623// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7624// Remainder = Sum % Constant
7625// This is based on "Remainder by Summing Digits" from Hacker's Delight.
7626//
7627// For division, we can compute the remainder using the algorithm described
7628// above, subtract it from the dividend to get an exact multiple of Constant.
7629// Then multiply that extact multiply by the multiplicative inverse modulo
7630// (1 << (BitWidth / 2)) to get the quotient.
7631
7632// If Constant is even, we can shift right the dividend and the divisor by the
7633// number of trailing zeros in Constant before applying the remainder algorithm.
7634// If we're after the quotient, we can subtract this value from the shifted
7635// dividend and multiply by the multiplicative inverse of the shifted divisor.
7636// If we want the remainder, we shift the value left by the number of trailing
7637// zeros and add the bits that were shifted out of the dividend.
7640 EVT HiLoVT, SelectionDAG &DAG,
7641 SDValue LL, SDValue LH) const {
7642 unsigned Opcode = N->getOpcode();
7643 EVT VT = N->getValueType(0);
7644
7645 // TODO: Support signed division/remainder.
7646 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
7647 return false;
7648 assert(
7649 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
7650 "Unexpected opcode");
7651
7652 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
7653 if (!CN)
7654 return false;
7655
7656 APInt Divisor = CN->getAPIntValue();
7657 unsigned BitWidth = Divisor.getBitWidth();
7658 unsigned HBitWidth = BitWidth / 2;
7660 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
7661
7662 // Divisor needs to less than (1 << HBitWidth).
7663 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
7664 if (Divisor.uge(HalfMaxPlus1))
7665 return false;
7666
7667 // We depend on the UREM by constant optimization in DAGCombiner that requires
7668 // high multiply.
7669 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
7671 return false;
7672
7673 // Don't expand if optimizing for size.
7674 if (DAG.shouldOptForSize())
7675 return false;
7676
7677 // Early out for 0 or 1 divisors.
7678 if (Divisor.ule(1))
7679 return false;
7680
7681 // If the divisor is even, shift it until it becomes odd.
7682 unsigned TrailingZeros = 0;
7683 if (!Divisor[0]) {
7684 TrailingZeros = Divisor.countr_zero();
7685 Divisor.lshrInPlace(TrailingZeros);
7686 }
7687
7688 SDLoc dl(N);
7689 SDValue Sum;
7690 SDValue PartialRem;
7691
7692 // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
7693 // then add in the carry.
7694 // TODO: If we can't split it in half, we might be able to split into 3 or
7695 // more pieces using a smaller bit width.
7696 if (HalfMaxPlus1.urem(Divisor).isOne()) {
7697 assert(!LL == !LH && "Expected both input halves or no input halves!");
7698 if (!LL)
7699 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
7700
7701 // Shift the input by the number of TrailingZeros in the divisor. The
7702 // shifted out bits will be added to the remainder later.
7703 if (TrailingZeros) {
7704 // Save the shifted off bits if we need the remainder.
7705 if (Opcode != ISD::UDIV) {
7706 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7707 PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
7708 DAG.getConstant(Mask, dl, HiLoVT));
7709 }
7710
7711 LL = DAG.getNode(
7712 ISD::OR, dl, HiLoVT,
7713 DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
7714 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
7715 DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
7716 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
7717 HiLoVT, dl)));
7718 LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
7719 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7720 }
7721
7722 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
7723 EVT SetCCType =
7724 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
7726 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
7727 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
7728 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
7729 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
7730 } else {
7731 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
7732 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
7733 // If the boolean for the target is 0 or 1, we can add the setcc result
7734 // directly.
7735 if (getBooleanContents(HiLoVT) ==
7737 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
7738 else
7739 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
7740 DAG.getConstant(0, dl, HiLoVT));
7741 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
7742 }
7743 }
7744
7745 // If we didn't find a sum, we can't do the expansion.
7746 if (!Sum)
7747 return false;
7748
7749 // Perform a HiLoVT urem on the Sum using truncated divisor.
7750 SDValue RemL =
7751 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
7752 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
7753 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
7754
7755 if (Opcode != ISD::UREM) {
7756 // Subtract the remainder from the shifted dividend.
7757 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
7758 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
7759
7760 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
7761
7762 // Multiply by the multiplicative inverse of the divisor modulo
7763 // (1 << BitWidth).
7765 APInt MulFactor = Divisor.zext(BitWidth + 1);
7766 MulFactor = MulFactor.multiplicativeInverse(Mod);
7767 MulFactor = MulFactor.trunc(BitWidth);
7768
7769 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
7770 DAG.getConstant(MulFactor, dl, VT));
7771
7772 // Split the quotient into low and high parts.
7773 SDValue QuotL, QuotH;
7774 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
7775 Result.push_back(QuotL);
7776 Result.push_back(QuotH);
7777 }
7778
7779 if (Opcode != ISD::UDIV) {
7780 // If we shifted the input, shift the remainder left and add the bits we
7781 // shifted off the input.
7782 if (TrailingZeros) {
7783 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7784 RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
7785 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7786 RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
7787 }
7788 Result.push_back(RemL);
7789 Result.push_back(DAG.getConstant(0, dl, HiLoVT));
7790 }
7791
7792 return true;
7793}
7794
7795// Check that (every element of) Z is undef or not an exact multiple of BW.
7796static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
7798 Z,
7799 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
7800 true);
7801}
7802
7804 EVT VT = Node->getValueType(0);
7805 SDValue ShX, ShY;
7806 SDValue ShAmt, InvShAmt;
7807 SDValue X = Node->getOperand(0);
7808 SDValue Y = Node->getOperand(1);
7809 SDValue Z = Node->getOperand(2);
7810 SDValue Mask = Node->getOperand(3);
7811 SDValue VL = Node->getOperand(4);
7812
7813 unsigned BW = VT.getScalarSizeInBits();
7814 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
7815 SDLoc DL(SDValue(Node, 0));
7816
7817 EVT ShVT = Z.getValueType();
7818 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7819 // fshl: X << C | Y >> (BW - C)
7820 // fshr: X << (BW - C) | Y >> C
7821 // where C = Z % BW is not zero
7822 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7823 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7824 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
7825 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
7826 VL);
7827 ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
7828 VL);
7829 } else {
7830 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7831 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7832 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
7833 if (isPowerOf2_32(BW)) {
7834 // Z % BW -> Z & (BW - 1)
7835 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
7836 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7837 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
7838 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
7839 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
7840 } else {
7841 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7842 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7843 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
7844 }
7845
7846 SDValue One = DAG.getConstant(1, DL, ShVT);
7847 if (IsFSHL) {
7848 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
7849 SDValue ShY1 = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, One, Mask, VL);
7850 ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, ShY1, InvShAmt, Mask, VL);
7851 } else {
7852 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
7853 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
7854 ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, ShAmt, Mask, VL);
7855 }
7856 }
7857 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
7858}
7859
7861 SelectionDAG &DAG) const {
7862 if (Node->isVPOpcode())
7863 return expandVPFunnelShift(Node, DAG);
7864
7865 EVT VT = Node->getValueType(0);
7866
7867 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
7871 return SDValue();
7872
7873 SDValue X = Node->getOperand(0);
7874 SDValue Y = Node->getOperand(1);
7875 SDValue Z = Node->getOperand(2);
7876
7877 unsigned BW = VT.getScalarSizeInBits();
7878 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
7879 SDLoc DL(SDValue(Node, 0));
7880
7881 EVT ShVT = Z.getValueType();
7882
7883 // If a funnel shift in the other direction is more supported, use it.
7884 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
7885 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
7886 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
7887 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7888 // fshl X, Y, Z -> fshr X, Y, -Z
7889 // fshr X, Y, Z -> fshl X, Y, -Z
7890 SDValue Zero = DAG.getConstant(0, DL, ShVT);
7891 Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
7892 } else {
7893 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7894 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7895 SDValue One = DAG.getConstant(1, DL, ShVT);
7896 if (IsFSHL) {
7897 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
7898 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
7899 } else {
7900 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
7901 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
7902 }
7903 Z = DAG.getNOT(DL, Z, ShVT);
7904 }
7905 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
7906 }
7907
7908 SDValue ShX, ShY;
7909 SDValue ShAmt, InvShAmt;
7910 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7911 // fshl: X << C | Y >> (BW - C)
7912 // fshr: X << (BW - C) | Y >> C
7913 // where C = Z % BW is not zero
7914 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7915 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
7916 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
7917 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
7918 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
7919 } else {
7920 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7921 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7922 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
7923 if (isPowerOf2_32(BW)) {
7924 // Z % BW -> Z & (BW - 1)
7925 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
7926 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7927 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
7928 } else {
7929 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7930 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
7931 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
7932 }
7933
7934 SDValue One = DAG.getConstant(1, DL, ShVT);
7935 if (IsFSHL) {
7936 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
7937 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
7938 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
7939 } else {
7940 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
7941 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
7942 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
7943 }
7944 }
7945 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
7946}
7947
7948// TODO: Merge with expandFunnelShift.
7949SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
7950 SelectionDAG &DAG) const {
7951 EVT VT = Node->getValueType(0);
7952 unsigned EltSizeInBits = VT.getScalarSizeInBits();
7953 bool IsLeft = Node->getOpcode() == ISD::ROTL;
7954 SDValue Op0 = Node->getOperand(0);
7955 SDValue Op1 = Node->getOperand(1);
7956 SDLoc DL(SDValue(Node, 0));
7957
7958 EVT ShVT = Op1.getValueType();
7959 SDValue Zero = DAG.getConstant(0, DL, ShVT);
7960
7961 // If a rotate in the other direction is more supported, use it.
7962 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
7963 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
7964 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
7965 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
7966 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
7967 }
7968
7969 if (!AllowVectorOps && VT.isVector() &&
7975 return SDValue();
7976
7977 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
7978 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
7979 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
7980 SDValue ShVal;
7981 SDValue HsVal;
7982 if (isPowerOf2_32(EltSizeInBits)) {
7983 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
7984 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
7985 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
7986 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
7987 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
7988 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
7989 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
7990 } else {
7991 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
7992 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
7993 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
7994 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
7995 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
7996 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
7997 SDValue One = DAG.getConstant(1, DL, ShVT);
7998 HsVal =
7999 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8000 }
8001 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8002}
8003
8005 SelectionDAG &DAG) const {
8006 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8007 EVT VT = Node->getValueType(0);
8008 unsigned VTBits = VT.getScalarSizeInBits();
8009 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8010
8011 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8012 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8013 SDValue ShOpLo = Node->getOperand(0);
8014 SDValue ShOpHi = Node->getOperand(1);
8015 SDValue ShAmt = Node->getOperand(2);
8016 EVT ShAmtVT = ShAmt.getValueType();
8017 EVT ShAmtCCVT =
8018 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8019 SDLoc dl(Node);
8020
8021 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8022 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8023 // away during isel.
8024 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8025 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8026 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8027 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8028 : DAG.getConstant(0, dl, VT);
8029
8030 SDValue Tmp2, Tmp3;
8031 if (IsSHL) {
8032 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8033 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8034 } else {
8035 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8036 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8037 }
8038
8039 // If the shift amount is larger or equal than the width of a part we don't
8040 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8041 // values for large shift amounts.
8042 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8043 DAG.getConstant(VTBits, dl, ShAmtVT));
8044 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8045 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8046
8047 if (IsSHL) {
8048 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8049 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8050 } else {
8051 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8052 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8053 }
8054}
8055
8057 SelectionDAG &DAG) const {
8058 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8059 SDValue Src = Node->getOperand(OpNo);
8060 EVT SrcVT = Src.getValueType();
8061 EVT DstVT = Node->getValueType(0);
8062 SDLoc dl(SDValue(Node, 0));
8063
8064 // FIXME: Only f32 to i64 conversions are supported.
8065 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8066 return false;
8067
8068 if (Node->isStrictFPOpcode())
8069 // When a NaN is converted to an integer a trap is allowed. We can't
8070 // use this expansion here because it would eliminate that trap. Other
8071 // traps are also allowed and cannot be eliminated. See
8072 // IEEE 754-2008 sec 5.8.
8073 return false;
8074
8075 // Expand f32 -> i64 conversion
8076 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8077 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8078 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8079 EVT IntVT = SrcVT.changeTypeToInteger();
8080 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8081
8082 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8083 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8084 SDValue Bias = DAG.getConstant(127, dl, IntVT);
8085 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8086 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8087 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8088
8089 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8090
8091 SDValue ExponentBits = DAG.getNode(
8092 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8093 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8094 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8095
8096 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8097 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8098 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8099 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8100
8101 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8102 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8103 DAG.getConstant(0x00800000, dl, IntVT));
8104
8105 R = DAG.getZExtOrTrunc(R, dl, DstVT);
8106
8107 R = DAG.getSelectCC(
8108 dl, Exponent, ExponentLoBit,
8109 DAG.getNode(ISD::SHL, dl, DstVT, R,
8110 DAG.getZExtOrTrunc(
8111 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8112 dl, IntShVT)),
8113 DAG.getNode(ISD::SRL, dl, DstVT, R,
8114 DAG.getZExtOrTrunc(
8115 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8116 dl, IntShVT)),
8117 ISD::SETGT);
8118
8119 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8120 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8121
8122 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8123 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8124 return true;
8125}
8126
8128 SDValue &Chain,
8129 SelectionDAG &DAG) const {
8130 SDLoc dl(SDValue(Node, 0));
8131 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8132 SDValue Src = Node->getOperand(OpNo);
8133
8134 EVT SrcVT = Src.getValueType();
8135 EVT DstVT = Node->getValueType(0);
8136 EVT SetCCVT =
8137 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8138 EVT DstSetCCVT =
8139 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8140
8141 // Only expand vector types if we have the appropriate vector bit operations.
8142 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8144 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8146 return false;
8147
8148 // If the maximum float value is smaller then the signed integer range,
8149 // the destination signmask can't be represented by the float, so we can
8150 // just use FP_TO_SINT directly.
8151 const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
8152 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8153 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8155 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8156 if (Node->isStrictFPOpcode()) {
8157 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8158 { Node->getOperand(0), Src });
8159 Chain = Result.getValue(1);
8160 } else
8161 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8162 return true;
8163 }
8164
8165 // Don't expand it if there isn't cheap fsub instruction.
8167 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8168 return false;
8169
8170 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8171 SDValue Sel;
8172
8173 if (Node->isStrictFPOpcode()) {
8174 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8175 Node->getOperand(0), /*IsSignaling*/ true);
8176 Chain = Sel.getValue(1);
8177 } else {
8178 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8179 }
8180
8181 bool Strict = Node->isStrictFPOpcode() ||
8182 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8183
8184 if (Strict) {
8185 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8186 // signmask then offset (the result of which should be fully representable).
8187 // Sel = Src < 0x8000000000000000
8188 // FltOfs = select Sel, 0, 0x8000000000000000
8189 // IntOfs = select Sel, 0, 0x8000000000000000
8190 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8191
8192 // TODO: Should any fast-math-flags be set for the FSUB?
8193 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8194 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8195 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8196 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8197 DAG.getConstant(0, dl, DstVT),
8198 DAG.getConstant(SignMask, dl, DstVT));
8199 SDValue SInt;
8200 if (Node->isStrictFPOpcode()) {
8201 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8202 { Chain, Src, FltOfs });
8203 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8204 { Val.getValue(1), Val });
8205 Chain = SInt.getValue(1);
8206 } else {
8207 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8208 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8209 }
8210 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8211 } else {
8212 // Expand based on maximum range of FP_TO_SINT:
8213 // True = fp_to_sint(Src)
8214 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8215 // Result = select (Src < 0x8000000000000000), True, False
8216
8217 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8218 // TODO: Should any fast-math-flags be set for the FSUB?
8219 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8220 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8221 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8222 DAG.getConstant(SignMask, dl, DstVT));
8223 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8224 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8225 }
8226 return true;
8227}
8228
8230 SDValue &Chain,
8231 SelectionDAG &DAG) const {
8232 // This transform is not correct for converting 0 when rounding mode is set
8233 // to round toward negative infinity which will produce -0.0. So disable under
8234 // strictfp.
8235 if (Node->isStrictFPOpcode())
8236 return false;
8237
8238 SDValue Src = Node->getOperand(0);
8239 EVT SrcVT = Src.getValueType();
8240 EVT DstVT = Node->getValueType(0);
8241
8242 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8243 return false;
8244
8245 // Only expand vector types if we have the appropriate vector bit operations.
8246 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8251 return false;
8252
8253 SDLoc dl(SDValue(Node, 0));
8254 EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
8255
8256 // Implementation of unsigned i64 to f64 following the algorithm in
8257 // __floatundidf in compiler_rt. This implementation performs rounding
8258 // correctly in all rounding modes with the exception of converting 0
8259 // when rounding toward negative infinity. In that case the fsub will produce
8260 // -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
8261 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8262 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8263 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8264 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8265 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8266 SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
8267
8268 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8269 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8270 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
8271 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
8272 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8273 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8274 SDValue HiSub =
8275 DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8276 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8277 return true;
8278}
8279
8280SDValue
8282 SelectionDAG &DAG) const {
8283 unsigned Opcode = Node->getOpcode();
8284 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8285 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8286 "Wrong opcode");
8287
8288 if (Node->getFlags().hasNoNaNs()) {
8289 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8290 SDValue Op1 = Node->getOperand(0);
8291 SDValue Op2 = Node->getOperand(1);
8292 SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);
8293 // Copy FMF flags, but always set the no-signed-zeros flag
8294 // as this is implied by the FMINNUM/FMAXNUM semantics.
8295 SDNodeFlags Flags = Node->getFlags();
8296 Flags.setNoSignedZeros(true);
8297 SelCC->setFlags(Flags);
8298 return SelCC;
8299 }
8300
8301 return SDValue();
8302}
8303
8305 SelectionDAG &DAG) const {
8306 SDLoc dl(Node);
8307 unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
8309 EVT VT = Node->getValueType(0);
8310
8311 if (VT.isScalableVector())
8313 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8314
8315 if (isOperationLegalOrCustom(NewOp, VT)) {
8316 SDValue Quiet0 = Node->getOperand(0);
8317 SDValue Quiet1 = Node->getOperand(1);
8318
8319 if (!Node->getFlags().hasNoNaNs()) {
8320 // Insert canonicalizes if it's possible we need to quiet to get correct
8321 // sNaN behavior.
8322 if (!DAG.isKnownNeverSNaN(Quiet0)) {
8323 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8324 Node->getFlags());
8325 }
8326 if (!DAG.isKnownNeverSNaN(Quiet1)) {
8327 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8328 Node->getFlags());
8329 }
8330 }
8331
8332 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8333 }
8334
8335 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8336 // instead if there are no NaNs and there can't be an incompatible zero
8337 // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8338 if ((Node->getFlags().hasNoNaNs() ||
8339 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8340 DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
8341 (Node->getFlags().hasNoSignedZeros() ||
8342 DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
8343 DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
8344 unsigned IEEE2018Op =
8345 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8346 if (isOperationLegalOrCustom(IEEE2018Op, VT))
8347 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8348 Node->getOperand(1), Node->getFlags());
8349 }
8350
8351 if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8352 return SelCC;
8353
8354 return SDValue();
8355}
8356
8357/// Returns a true value if if this FPClassTest can be performed with an ordered
8358/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8359/// std::nullopt if it cannot be performed as a compare with 0.
8360static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8361 const fltSemantics &Semantics,
8362 const MachineFunction &MF) {
8363 FPClassTest OrderedMask = Test & ~fcNan;
8364 FPClassTest NanTest = Test & fcNan;
8365 bool IsOrdered = NanTest == fcNone;
8366 bool IsUnordered = NanTest == fcNan;
8367
8368 // Skip cases that are testing for only a qnan or snan.
8369 if (!IsOrdered && !IsUnordered)
8370 return std::nullopt;
8371
8372 if (OrderedMask == fcZero &&
8373 MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
8374 return IsOrdered;
8375 if (OrderedMask == (fcZero | fcSubnormal) &&
8376 MF.getDenormalMode(Semantics).inputsAreZero())
8377 return IsOrdered;
8378 return std::nullopt;
8379}
8380
8383 const SDLoc &DL,
8384 SelectionDAG &DAG) const {
8385 EVT OperandVT = Op.getValueType();
8386 assert(OperandVT.isFloatingPoint());
8387
8388 // Degenerated cases.
8389 if (Test == fcNone)
8390 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8391 if ((Test & fcAllFlags) == fcAllFlags)
8392 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
8393
8394 // PPC double double is a pair of doubles, of which the higher part determines
8395 // the value class.
8396 if (OperandVT == MVT::ppcf128) {
8397 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8398 DAG.getConstant(1, DL, MVT::i32));
8399 OperandVT = MVT::f64;
8400 }
8401
8402 // Some checks may be represented as inversion of simpler check, for example
8403 // "inf|normal|subnormal|zero" => !"nan".
8404 bool IsInverted = false;
8405 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test)) {
8406 IsInverted = true;
8407 Test = InvertedCheck;
8408 }
8409
8410 // Floating-point type properties.
8411 EVT ScalarFloatVT = OperandVT.getScalarType();
8412 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
8413 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8414 bool IsF80 = (ScalarFloatVT == MVT::f80);
8415
8416 // Some checks can be implemented using float comparisons, if floating point
8417 // exceptions are ignored.
8418 if (Flags.hasNoFPExcept() &&
8420 ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
8421 ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
8422
8423 if (std::optional<bool> IsCmp0 =
8424 isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction());
8425 IsCmp0 && (isCondCodeLegalOrCustom(
8426 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8427 OperandVT.getScalarType().getSimpleVT()))) {
8428
8429 // If denormals could be implicitly treated as 0, this is not equivalent
8430 // to a compare with 0 since it will also be true for denormals.
8431 return DAG.getSetCC(DL, ResultVT, Op,
8432 DAG.getConstantFP(0.0, DL, OperandVT),
8433 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8434 }
8435
8436 if (Test == fcNan &&
8438 OperandVT.getScalarType().getSimpleVT())) {
8439 return DAG.getSetCC(DL, ResultVT, Op, Op,
8440 IsInverted ? ISD::SETO : ISD::SETUO);
8441 }
8442
8443 if (Test == fcInf &&
8445 OperandVT.getScalarType().getSimpleVT()) &&
8447 // isinf(x) --> fabs(x) == inf
8448 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8449 SDValue Inf =
8450 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8451 return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8452 IsInverted ? ISD::SETUNE : ISD::SETOEQ);
8453 }
8454 }
8455
8456 // In the general case use integer operations.
8457 unsigned BitSize = OperandVT.getScalarSizeInBits();
8458 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
8459 if (OperandVT.isVector())
8460 IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
8461 OperandVT.getVectorElementCount());
8462 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
8463
8464 // Various masks.
8465 APInt SignBit = APInt::getSignMask(BitSize);
8466 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
8467 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
8468 const unsigned ExplicitIntBitInF80 = 63;
8469 APInt ExpMask = Inf;
8470 if (IsF80)
8471 ExpMask.clearBit(ExplicitIntBitInF80);
8472 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
8473 APInt QNaNBitMask =
8474 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
8475 APInt InvertionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
8476
8477 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
8478 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
8479 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
8480 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
8481 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
8482 SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT);
8483
8484 SDValue Res;
8485 const auto appendResult = [&](SDValue PartialRes) {
8486 if (PartialRes) {
8487 if (Res)
8488 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
8489 else
8490 Res = PartialRes;
8491 }
8492 };
8493
8494 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
8495 const auto getIntBitIsSet = [&]() -> SDValue {
8496 if (!IntBitIsSetV) {
8497 APInt IntBitMask(BitSize, 0);
8498 IntBitMask.setBit(ExplicitIntBitInF80);
8499 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
8500 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
8501 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
8502 }
8503 return IntBitIsSetV;
8504 };
8505
8506 // Split the value into sign bit and absolute value.
8507 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
8508 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
8509 DAG.getConstant(0.0, DL, IntVT), ISD::SETLT);
8510
8511 // Tests that involve more than one class should be processed first.
8512 SDValue PartialRes;
8513
8514 if (IsF80)
8515 ; // Detect finite numbers of f80 by checking individual classes because
8516 // they have different settings of the explicit integer bit.
8517 else if ((Test & fcFinite) == fcFinite) {
8518 // finite(V) ==> abs(V) < exp_mask
8519 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8520 Test &= ~fcFinite;
8521 } else if ((Test & fcFinite) == fcPosFinite) {
8522 // finite(V) && V > 0 ==> V < exp_mask
8523 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
8524 Test &= ~fcPosFinite;
8525 } else if ((Test & fcFinite) == fcNegFinite) {
8526 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
8527 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8528 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8529 Test &= ~fcNegFinite;
8530 }
8531 appendResult(PartialRes);
8532
8533 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
8534 // fcZero | fcSubnormal => test all exponent bits are 0
8535 // TODO: Handle sign bit specific cases
8536 if (PartialCheck == (fcZero | fcSubnormal)) {
8537 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
8538 SDValue ExpIsZero =
8539 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8540 appendResult(ExpIsZero);
8541 Test &= ~PartialCheck & fcAllFlags;
8542 }
8543 }
8544
8545 // Check for individual classes.
8546
8547 if (unsigned PartialCheck = Test & fcZero) {
8548 if (PartialCheck == fcPosZero)
8549 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
8550 else if (PartialCheck == fcZero)
8551 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
8552 else // ISD::fcNegZero
8553 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
8554 appendResult(PartialRes);
8555 }
8556
8557 if (unsigned PartialCheck = Test & fcSubnormal) {
8558 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
8559 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
8560 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
8561 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
8562 SDValue VMinusOneV =
8563 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
8564 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
8565 if (PartialCheck == fcNegSubnormal)
8566 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8567 appendResult(PartialRes);
8568 }
8569
8570 if (unsigned PartialCheck = Test & fcInf) {
8571 if (PartialCheck == fcPosInf)
8572 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
8573 else if (PartialCheck == fcInf)
8574 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
8575 else { // ISD::fcNegInf
8576 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
8577 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
8578 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
8579 }
8580 appendResult(PartialRes);
8581 }
8582
8583 if (unsigned PartialCheck = Test & fcNan) {
8584 APInt InfWithQnanBit = Inf | QNaNBitMask;
8585 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
8586 if (PartialCheck == fcNan) {
8587 // isnan(V) ==> abs(V) > int(inf)
8588 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
8589 if (IsF80) {
8590 // Recognize unsupported values as NaNs for compatibility with glibc.
8591 // In them (exp(V)==0) == int_bit.
8592 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
8593 SDValue ExpIsZero =
8594 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8595 SDValue IsPseudo =
8596 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
8597 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
8598 }
8599 } else if (PartialCheck == fcQNan) {
8600 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
8601 PartialRes =
8602 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
8603 } else { // ISD::fcSNan
8604 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
8605 // abs(V) < (unsigned(Inf) | quiet_bit)
8606 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
8607 SDValue IsNotQnan =
8608 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
8609 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
8610 }
8611 appendResult(PartialRes);
8612 }
8613
8614 if (unsigned PartialCheck = Test & fcNormal) {
8615 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
8616 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
8617 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
8618 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
8619 APInt ExpLimit = ExpMask - ExpLSB;
8620 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
8621 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
8622 if (PartialCheck == fcNegNormal)
8623 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8624 else if (PartialCheck == fcPosNormal) {
8625 SDValue PosSignV =
8626 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask);
8627 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
8628 }
8629 if (IsF80)
8630 PartialRes =
8631 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
8632 appendResult(PartialRes);
8633 }
8634
8635 if (!Res)
8636 return DAG.getConstant(IsInverted, DL, ResultVT);
8637 if (IsInverted)
8638 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask);
8639 return Res;
8640}
8641
8642// Only expand vector types if we have the appropriate vector bit operations.
8643static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
8644 assert(VT.isVector() && "Expected vector type");
8645 unsigned Len = VT.getScalarSizeInBits();
8646 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
8649 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
8651}
8652
8654 SDLoc dl(Node);
8655 EVT VT = Node->getValueType(0);
8656 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8657 SDValue Op = Node->getOperand(0);
8658 unsigned Len = VT.getScalarSizeInBits();
8659 assert(VT.isInteger() && "CTPOP not implemented for this type.");
8660
8661 // TODO: Add support for irregular type lengths.
8662 if (!(Len <= 128 && Len % 8 == 0))
8663 return SDValue();
8664
8665 // Only expand vector types if we have the appropriate vector bit operations.
8666 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
8667 return SDValue();
8668
8669 // This is the "best" algorithm from
8670 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8671 SDValue Mask55 =
8672 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
8673 SDValue Mask33 =
8674 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
8675 SDValue Mask0F =
8676 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
8677
8678 // v = v - ((v >> 1) & 0x55555555...)
8679 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
8680 DAG.getNode(ISD::AND, dl, VT,
8681 DAG.getNode(ISD::SRL, dl, VT, Op,
8682 DAG.getConstant(1, dl, ShVT)),
8683 Mask55));
8684 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8685 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
8686 DAG.getNode(ISD::AND, dl, VT,
8687 DAG.getNode(ISD::SRL, dl, VT, Op,
8688 DAG.getConstant(2, dl, ShVT)),
8689 Mask33));
8690 // v = (v + (v >> 4)) & 0x0F0F0F0F...
8691 Op = DAG.getNode(ISD::AND, dl, VT,
8692 DAG.getNode(ISD::ADD, dl, VT, Op,
8693 DAG.getNode(ISD::SRL, dl, VT, Op,
8694 DAG.getConstant(4, dl, ShVT))),
8695 Mask0F);
8696
8697 if (Len <= 8)
8698 return Op;
8699
8700 // Avoid the multiply if we only have 2 bytes to add.
8701 // TODO: Only doing this for scalars because vectors weren't as obviously
8702 // improved.
8703 if (Len == 16 && !VT.isVector()) {
8704 // v = (v + (v >> 8)) & 0x00FF;
8705 return DAG.getNode(ISD::AND, dl, VT,
8706 DAG.getNode(ISD::ADD, dl, VT, Op,
8707 DAG.getNode(ISD::SRL, dl, VT, Op,
8708 DAG.getConstant(8, dl, ShVT))),
8709 DAG.getConstant(0xFF, dl, VT));
8710 }
8711
8712 // v = (v * 0x01010101...) >> (Len - 8)
8713 SDValue Mask01 =
8714 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
8715 return DAG.getNode(ISD::SRL, dl, VT,
8716 DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
8717 DAG.getConstant(Len - 8, dl, ShVT));
8718}
8719
8721 SDLoc dl(Node);
8722 EVT VT = Node->getValueType(0);
8723 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8724 SDValue Op = Node->getOperand(0);
8725 SDValue Mask = Node->getOperand(1);
8726 SDValue VL = Node->getOperand(2);
8727 unsigned Len = VT.getScalarSizeInBits();
8728 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
8729
8730 // TODO: Add support for irregular type lengths.
8731 if (!(Len <= 128 && Len % 8 == 0))
8732 return SDValue();
8733
8734 // This is same algorithm of expandCTPOP from
8735 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8736 SDValue Mask55 =
8737 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
8738 SDValue Mask33 =
8739 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
8740 SDValue Mask0F =
8741 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
8742
8743 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
8744
8745 // v = v - ((v >> 1) & 0x55555555...)
8746 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
8747 DAG.getNode(ISD::VP_LSHR, dl, VT, Op,
8748 DAG.getConstant(1, dl, ShVT), Mask, VL),
8749 Mask55, Mask, VL);
8750 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
8751
8752 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8753 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
8754 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
8755 DAG.getNode(ISD::VP_LSHR, dl, VT, Op,
8756 DAG.getConstant(2, dl, ShVT), Mask, VL),
8757 Mask33, Mask, VL);
8758 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
8759
8760 // v = (v + (v >> 4)) & 0x0F0F0F0F...
8761 Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
8762 Mask, VL),
8763 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
8764 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
8765
8766 if (Len <= 8)
8767 return Op;
8768
8769 // v = (v * 0x01010101...) >> (Len - 8)
8770 SDValue Mask01 =
8771 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
8772 return DAG.getNode(ISD::VP_LSHR, dl, VT,
8773 DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL),
8774 DAG.getConstant(Len - 8, dl, ShVT), Mask, VL);
8775}
8776
8778 SDLoc dl(Node);
8779 EVT VT = Node->getValueType(0);
8780 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8781 SDValue Op = Node->getOperand(0);
8782 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8783
8784 // If the non-ZERO_UNDEF version is supported we can use that instead.
8785 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
8787 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
8788
8789 // If the ZERO_UNDEF version is supported use that and handle the zero case.
8791 EVT SetCCVT =
8792 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8793 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
8794 SDValue Zero = DAG.getConstant(0, dl, VT);
8795 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
8796 return DAG.getSelect(dl, VT, SrcIsZero,
8797 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
8798 }
8799
8800 // Only expand vector types if we have the appropriate vector bit operations.
8801 // This includes the operations needed to expand CTPOP if it isn't supported.
8802 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
8804 !canExpandVectorCTPOP(*this, VT)) ||
8807 return SDValue();
8808
8809 // for now, we do this:
8810 // x = x | (x >> 1);
8811 // x = x | (x >> 2);
8812 // ...
8813 // x = x | (x >>16);
8814 // x = x | (x >>32); // for 64-bit input
8815 // return popcount(~x);
8816 //
8817 // Ref: "Hacker's Delight" by Henry Warren
8818 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
8819 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
8820 Op = DAG.getNode(ISD::OR, dl, VT, Op,
8821 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
8822 }
8823 Op = DAG.getNOT(dl, Op, VT);
8824 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
8825}
8826
8828 SDLoc dl(Node);
8829 EVT VT = Node->getValueType(0);
8830 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8831 SDValue Op = Node->getOperand(0);
8832 SDValue Mask = Node->getOperand(1);
8833 SDValue VL = Node->getOperand(2);
8834 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8835
8836 // do this:
8837 // x = x | (x >> 1);
8838 // x = x | (x >> 2);
8839 // ...
8840 // x = x | (x >>16);
8841 // x = x | (x >>32); // for 64-bit input
8842 // return popcount(~x);
8843 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
8844 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
8845 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
8846 DAG.getNode(ISD::VP_LSHR, dl, VT, Op, Tmp, Mask, VL), Mask,
8847 VL);
8848 }
8849 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getConstant(-1, dl, VT), Mask,
8850 VL);
8851 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
8852}
8853
8855 const SDLoc &DL, EVT VT, SDValue Op,
8856 unsigned BitWidth) const {
8857 if (BitWidth != 32 && BitWidth != 64)
8858 return SDValue();
8859 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
8860 : APInt(64, 0x0218A392CD3D5DBFULL);
8861 const DataLayout &TD = DAG.getDataLayout();
8862 MachinePointerInfo PtrInfo =
8864 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
8865 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
8866 SDValue Lookup = DAG.getNode(
8867 ISD::SRL, DL, VT,
8868 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
8869 DAG.getConstant(DeBruijn, DL, VT)),
8870 DAG.getConstant(ShiftAmt, DL, VT));
8872
8874 for (unsigned i = 0; i < BitWidth; i++) {
8875 APInt Shl = DeBruijn.shl(i);
8876 APInt Lshr = Shl.lshr(ShiftAmt);
8877 Table[Lshr.getZExtValue()] = i;
8878 }
8879
8880 // Create a ConstantArray in Constant Pool
8881 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
8882 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
8883 TD.getPrefTypeAlign(CA->getType()));
8884 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
8885 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
8886 PtrInfo, MVT::i8);
8887 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
8888 return ExtLoad;
8889
8890 EVT SetCCVT =
8891 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8892 SDValue Zero = DAG.getConstant(0, DL, VT);
8893 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
8894 return DAG.getSelect(DL, VT, SrcIsZero,
8895 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
8896}
8897
8899 SDLoc dl(Node);
8900 EVT VT = Node->getValueType(0);
8901 SDValue Op = Node->getOperand(0);
8902 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8903
8904 // If the non-ZERO_UNDEF version is supported we can use that instead.
8905 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
8907 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
8908
8909 // If the ZERO_UNDEF version is supported use that and handle the zero case.
8911 EVT SetCCVT =
8912 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8913 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
8914 SDValue Zero = DAG.getConstant(0, dl, VT);
8915 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
8916 return DAG.getSelect(dl, VT, SrcIsZero,
8917 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
8918 }
8919
8920 // Only expand vector types if we have the appropriate vector bit operations.
8921 // This includes the operations needed to expand CTPOP if it isn't supported.
8922 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
8925 !canExpandVectorCTPOP(*this, VT)) ||
8929 return SDValue();
8930
8931 // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
8932 if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
8934 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
8935 return V;
8936
8937 // for now, we use: { return popcount(~x & (x - 1)); }
8938 // unless the target has ctlz but not ctpop, in which case we use:
8939 // { return 32 - nlz(~x & (x-1)); }
8940 // Ref: "Hacker's Delight" by Henry Warren
8941 SDValue Tmp = DAG.getNode(
8942 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
8943 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
8944
8945 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
8947 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
8948 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
8949 }
8950
8951 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
8952}
8953
8955 SDValue Op = Node->getOperand(0);
8956 SDValue Mask = Node->getOperand(1);
8957 SDValue VL = Node->getOperand(2);
8958 SDLoc dl(Node);
8959 EVT VT = Node->getValueType(0);
8960
8961 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
8962 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
8963 DAG.getConstant(-1, dl, VT), Mask, VL);
8964 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
8965 DAG.getConstant(1, dl, VT), Mask, VL);
8966 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
8967 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
8968}
8969
8971 bool IsNegative) const {
8972 SDLoc dl(N);
8973 EVT VT = N->getValueType(0);
8974 SDValue Op = N->getOperand(0);
8975
8976 // abs(x) -> smax(x,sub(0,x))
8977 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
8979 SDValue Zero = DAG.getConstant(0, dl, VT);
8980 return DAG.getNode(ISD::SMAX, dl, VT, Op,
8981 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
8982 }
8983
8984 // abs(x) -> umin(x,sub(0,x))
8985 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
8987 SDValue Zero = DAG.getConstant(0, dl, VT);
8988 Op = DAG.getFreeze(Op);
8989 return DAG.getNode(ISD::UMIN, dl, VT, Op,
8990 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
8991 }
8992
8993 // 0 - abs(x) -> smin(x, sub(0,x))
8994 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
8996 Op = DAG.getFreeze(Op);
8997 SDValue Zero = DAG.getConstant(0, dl, VT);
8998 return DAG.getNode(ISD::SMIN, dl, VT, Op,
8999 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9000 }
9001
9002 // Only expand vector types if we have the appropriate vector operations.
9003 if (VT.isVector() &&
9005 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
9006 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
9008 return SDValue();
9009
9010 Op = DAG.getFreeze(Op);
9011 SDValue Shift = DAG.getNode(
9012 ISD::SRA, dl, VT, Op,
9013 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9014 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
9015
9016 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9017 if (!IsNegative)
9018 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
9019
9020 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9021 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
9022}
9023
9025 SDLoc dl(N);
9026 EVT VT = N->getValueType(0);
9027 SDValue LHS = DAG.getFreeze(N->getOperand(0));
9028 SDValue RHS = DAG.getFreeze(N->getOperand(1));
9029 bool IsSigned = N->getOpcode() == ISD::ABDS;
9030
9031 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9032 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9033 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9034 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9035 if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
9036 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
9037 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
9038 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9039 }
9040
9041 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9042 if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT))
9043 return DAG.getNode(ISD::OR, dl, VT,
9044 DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
9045 DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
9046
9047 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9048 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9049 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9051 SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
9052 return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9053 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9054}
9055
9057 SDLoc dl(N);
9058 EVT VT = N->getValueType(0);
9059 SDValue Op = N->getOperand(0);
9060
9061 if (!VT.isSimple())
9062 return SDValue();
9063
9064 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9065 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9066 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9067 default:
9068 return SDValue();
9069 case MVT::i16:
9070 // Use a rotate by 8. This can be further expanded if necessary.
9071 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9072 case MVT::i32:
9073 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9074 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
9075 DAG.getConstant(0xFF00, dl, VT));
9076 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
9077 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9078 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
9079 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9080 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9081 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9082 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9083 case MVT::i64:
9084 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9085 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
9086 DAG.getConstant(255ULL<<8, dl, VT));
9087 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
9088 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
9089 DAG.getConstant(255ULL<<16, dl, VT));
9090 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
9091 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
9092 DAG.getConstant(255ULL<<24, dl, VT));
9093 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
9094 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9095 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
9096 DAG.getConstant(255ULL<<24, dl, VT));
9097 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9098 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
9099 DAG.getConstant(255ULL<<16, dl, VT));
9100 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
9101 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
9102 DAG.getConstant(255ULL<<8, dl, VT));
9103 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9104 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
9105 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
9106 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9107 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9108 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
9109 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9110 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
9111 }
9112}
9113
9115 SDLoc dl(N);
9116 EVT VT = N->getValueType(0);
9117 SDValue Op = N->getOperand(0);
9118 SDValue Mask = N->getOperand(1);
9119 SDValue EVL = N->getOperand(2);
9120
9121 if (!VT.isSimple())
9122 return SDValue();
9123
9124 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9125 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9126 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9127 default:
9128 return SDValue();
9129 case MVT::i16:
9130 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9131 Mask, EVL);
9132 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9133 Mask, EVL);
9134 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
9135 case MVT::i32:
9136 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9137 Mask, EVL);
9138 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
9139 Mask, EVL);
9140 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
9141 Mask, EVL);
9142 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9143 Mask, EVL);
9144 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9145 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
9146 Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9147 Mask, EVL);
9148 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9149 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9150 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9151 case MVT::i64:
9152 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9153 Mask, EVL);
9154 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9155 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9156 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
9157 Mask, EVL);
9158 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9159 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9160 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
9161 Mask, EVL);
9162 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9163 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9164 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
9165 Mask, EVL);
9166 Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9167 Mask, EVL);
9168 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
9169 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9170 Tmp3 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9171 Mask, EVL);
9172 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
9173 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9174 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
9175 Mask, EVL);
9176 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9177 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9178 Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9179 Mask, EVL);
9180 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
9181 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
9182 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9183 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9184 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
9185 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9186 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
9187 }
9188}
9189
9191 SDLoc dl(N);
9192 EVT VT = N->getValueType(0);
9193 SDValue Op = N->getOperand(0);
9194 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9195 unsigned Sz = VT.getScalarSizeInBits();
9196
9197 SDValue Tmp, Tmp2, Tmp3;
9198
9199 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9200 // and finally the i1 pairs.
9201 // TODO: We can easily support i4/i2 legal types if any target ever does.
9202 if (Sz >= 8 && isPowerOf2_32(Sz)) {
9203 // Create the masks - repeating the pattern every byte.
9204 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9205 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9206 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9207
9208 // BSWAP if the type is wider than a single byte.
9209 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
9210
9211 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9212 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
9213 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
9214 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
9215 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
9216 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9217
9218 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9219 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
9220 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
9221 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
9222 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
9223 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9224
9225 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9226 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
9227 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
9228 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
9229 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
9230 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9231 return Tmp;
9232 }
9233
9234 Tmp = DAG.getConstant(0, dl, VT);
9235 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
9236 if (I < J)
9237 Tmp2 =
9238 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
9239 else
9240 Tmp2 =
9241 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
9242
9243 APInt Shift = APInt::getOneBitSet(Sz, J);
9244 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
9245 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
9246 }
9247
9248 return Tmp;
9249}
9250
9252 assert(N->getOpcode() == ISD::VP_BITREVERSE);
9253
9254 SDLoc dl(N);
9255 EVT VT = N->getValueType(0);
9256 SDValue Op = N->getOperand(0);
9257 SDValue Mask = N->getOperand(1);
9258 SDValue EVL = N->getOperand(2);
9259 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9260 unsigned Sz = VT.getScalarSizeInBits();
9261
9262 SDValue Tmp, Tmp2, Tmp3;
9263
9264 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9265 // and finally the i1 pairs.
9266 // TODO: We can easily support i4/i2 legal types if any target ever does.
9267 if (Sz >= 8 && isPowerOf2_32(Sz)) {
9268 // Create the masks - repeating the pattern every byte.
9269 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9270 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9271 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9272
9273 // BSWAP if the type is wider than a single byte.
9274 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
9275
9276 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9277 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
9278 Mask, EVL);
9279 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9280 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
9281 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
9282 Mask, EVL);
9283 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
9284 Mask, EVL);
9285 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9286
9287 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9288 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
9289 Mask, EVL);
9290 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9291 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
9292 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
9293 Mask, EVL);
9294 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
9295 Mask, EVL);
9296 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9297
9298 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9299 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
9300 Mask, EVL);
9301 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9302 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
9303 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
9304 Mask, EVL);
9305 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
9306 Mask, EVL);
9307 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9308 return Tmp;
9309 }
9310 return SDValue();
9311}
9312
9313std::pair<SDValue, SDValue>
9315 SelectionDAG &DAG) const {
9316 SDLoc SL(LD);
9317 SDValue Chain = LD->getChain();
9318 SDValue BasePTR = LD->getBasePtr();
9319 EVT SrcVT = LD->getMemoryVT();
9320 EVT DstVT = LD->getValueType(0);
9321 ISD::LoadExtType ExtType = LD->getExtensionType();
9322
9323 if (SrcVT.isScalableVector())
9324 report_fatal_error("Cannot scalarize scalable vector loads");
9325
9326 unsigned NumElem = SrcVT.getVectorNumElements();
9327
9328 EVT SrcEltVT = SrcVT.getScalarType();
9329 EVT DstEltVT = DstVT.getScalarType();
9330
9331 // A vector must always be stored in memory as-is, i.e. without any padding
9332 // between the elements, since various code depend on it, e.g. in the
9333 // handling of a bitcast of a vector type to int, which may be done with a
9334 // vector store followed by an integer load. A vector that does not have
9335 // elements that are byte-sized must therefore be stored as an integer
9336 // built out of the extracted vector elements.
9337 if (!SrcEltVT.isByteSized()) {
9338 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
9339 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
9340
9341 unsigned NumSrcBits = SrcVT.getSizeInBits();
9342 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
9343
9344 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
9345 SDValue SrcEltBitMask = DAG.getConstant(
9346 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
9347
9348 // Load the whole vector and avoid masking off the top bits as it makes
9349 // the codegen worse.
9350 SDValue Load =
9351 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
9352 LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
9353 LD->getMemOperand()->getFlags(), LD->getAAInfo());
9354
9356 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9357 unsigned ShiftIntoIdx =
9358 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9359 SDValue ShiftAmount =
9360 DAG.getShiftAmountConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(),
9361 LoadVT, SL, /*LegalTypes=*/false);
9362 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
9363 SDValue Elt =
9364 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
9365 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
9366
9367 if (ExtType != ISD::NON_EXTLOAD) {
9368 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
9369 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
9370 }
9371
9372 Vals.push_back(Scalar);
9373 }
9374
9375 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9376 return std::make_pair(Value, Load.getValue(1));
9377 }
9378
9379 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
9380 assert(SrcEltVT.isByteSized());
9381
9383 SmallVector<SDValue, 8> LoadChains;
9384
9385 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9386 SDValue ScalarLoad =
9387 DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
9388 LD->getPointerInfo().getWithOffset(Idx * Stride),
9389 SrcEltVT, LD->getOriginalAlign(),
9390 LD->getMemOperand()->getFlags(), LD->getAAInfo());
9391
9392 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
9393
9394 Vals.push_back(ScalarLoad.getValue(0));
9395 LoadChains.push_back(ScalarLoad.getValue(1));
9396 }
9397
9398 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
9399 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9400
9401 return std::make_pair(Value, NewChain);
9402}
9403
9405 SelectionDAG &DAG) const {
9406 SDLoc SL(ST);
9407
9408 SDValue Chain = ST->getChain();
9409 SDValue BasePtr = ST->getBasePtr();
9410 SDValue Value = ST->getValue();
9411 EVT StVT = ST->getMemoryVT();
9412
9413 if (StVT.isScalableVector())
9414 report_fatal_error("Cannot scalarize scalable vector stores");
9415
9416 // The type of the data we want to save
9417 EVT RegVT = Value.getValueType();
9418 EVT RegSclVT = RegVT.getScalarType();
9419
9420 // The type of data as saved in memory.
9421 EVT MemSclVT = StVT.getScalarType();
9422
9423 unsigned NumElem = StVT.getVectorNumElements();
9424
9425 // A vector must always be stored in memory as-is, i.e. without any padding
9426 // between the elements, since various code depend on it, e.g. in the
9427 // handling of a bitcast of a vector type to int, which may be done with a
9428 // vector store followed by an integer load. A vector that does not have
9429 // elements that are byte-sized must therefore be stored as an integer
9430 // built out of the extracted vector elements.
9431 if (!MemSclVT.isByteSized()) {
9432 unsigned NumBits = StVT.getSizeInBits();
9433 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
9434
9435 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
9436
9437 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9438 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
9439 DAG.getVectorIdxConstant(Idx, SL));
9440 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
9441 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
9442 unsigned ShiftIntoIdx =
9443 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9444 SDValue ShiftAmount =
9445 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
9446 SDValue ShiftedElt =
9447 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
9448 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
9449 }
9450
9451 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
9452 ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
9453 ST->getAAInfo());
9454 }
9455
9456 // Store Stride in bytes
9457 unsigned Stride = MemSclVT.getSizeInBits() / 8;
9458 assert(Stride && "Zero stride!");
9459 // Extract each of the elements from the original vector and save them into
9460 // memory individually.
9462 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9463 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
9464 DAG.getVectorIdxConstant(Idx, SL));
9465
9466 SDValue Ptr =
9467 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
9468
9469 // This scalar TruncStore may be illegal, but we legalize it later.
9470 SDValue Store = DAG.getTruncStore(
9471 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
9472 MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
9473 ST->getAAInfo());
9474
9475 Stores.push_back(Store);
9476 }
9477
9478 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
9479}
9480
9481std::pair<SDValue, SDValue>
9483 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
9484 "unaligned indexed loads not implemented!");
9485 SDValue Chain = LD->getChain();
9486 SDValue Ptr = LD->getBasePtr();
9487 EVT VT = LD->getValueType(0);
9488 EVT LoadedVT = LD->getMemoryVT();
9489 SDLoc dl(LD);
9490 auto &MF = DAG.getMachineFunction();
9491
9492 if (VT.isFloatingPoint() || VT.isVector()) {
9493 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
9494 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
9495 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
9496 LoadedVT.isVector()) {
9497 // Scalarize the load and let the individual components be handled.
9498 return scalarizeVectorLoad(LD, DAG);
9499 }
9500
9501 // Expand to a (misaligned) integer load of the same size,
9502 // then bitconvert to floating point or vector.
9503 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
9504 LD->getMemOperand());
9505 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
9506 if (LoadedVT != VT)
9507 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
9508 ISD::ANY_EXTEND, dl, VT, Result);
9509
9510 return std::make_pair(Result, newLoad.getValue(1));
9511 }
9512
9513 // Copy the value to a (aligned) stack slot using (unaligned) integer
9514 // loads and stores, then do a (aligned) load from the stack slot.
9515 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
9516 unsigned LoadedBytes = LoadedVT.getStoreSize();
9517 unsigned RegBytes = RegVT.getSizeInBits() / 8;
9518 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
9519
9520 // Make sure the stack slot is also aligned for the register type.
9521 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
9522 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
9524 SDValue StackPtr = StackBase;
9525 unsigned Offset = 0;
9526
9527 EVT PtrVT = Ptr.getValueType();
9528 EVT StackPtrVT = StackPtr.getValueType();
9529
9530 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
9531 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
9532
9533 // Do all but one copies using the full register width.
9534 for (unsigned i = 1; i < NumRegs; i++) {
9535 // Load one integer register's worth from the original location.
9536 SDValue Load = DAG.getLoad(
9537 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
9538 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
9539 LD->getAAInfo());
9540 // Follow the load with a store to the stack slot. Remember the store.
9541 Stores.push_back(DAG.getStore(
9542 Load.getValue(1), dl, Load, StackPtr,
9543 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
9544 // Increment the pointers.
9545 Offset += RegBytes;
9546
9547 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
9548 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
9549 }
9550
9551 // The last copy may be partial. Do an extending load.
9552 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
9553 8 * (LoadedBytes - Offset));
9554 SDValue Load =
9555 DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
9556 LD->getPointerInfo().getWithOffset(Offset), MemVT,
9557 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
9558 LD->getAAInfo());
9559 // Follow the load with a store to the stack slot. Remember the store.
9560 // On big-endian machines this requires a truncating store to ensure
9561 // that the bits end up in the right place.
9562 Stores.push_back(DAG.getTruncStore(
9563 Load.getValue(1), dl, Load, StackPtr,
9564 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
9565
9566 // The order of the stores doesn't matter - say it with a TokenFactor.
9567 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9568
9569 // Finally, perform the original load only redirected to the stack slot.
9570 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
9571 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
9572 LoadedVT);
9573
9574 // Callers expect a MERGE_VALUES node.
9575 return std::make_pair(Load, TF);
9576 }
9577
9578 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
9579 "Unaligned load of unsupported type.");
9580
9581 // Compute the new VT that is half the size of the old one. This is an
9582 // integer MVT.
9583 unsigned NumBits = LoadedVT.getSizeInBits();
9584 EVT NewLoadedVT;
9585 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
9586 NumBits >>= 1;
9587
9588 Align Alignment = LD->getOriginalAlign();
9589 unsigned IncrementSize = NumBits / 8;
9590 ISD::LoadExtType HiExtType = LD->getExtensionType();
9591
9592 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
9593 if (HiExtType == ISD::NON_EXTLOAD)
9594 HiExtType = ISD::ZEXTLOAD;
9595
9596 // Load the value in two parts
9597 SDValue Lo, Hi;
9598 if (DAG.getDataLayout().isLittleEndian()) {
9599 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
9600 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9601 LD->getAAInfo());
9602
9603 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
9604 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
9605 LD->getPointerInfo().getWithOffset(IncrementSize),
9606 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9607 LD->getAAInfo());
9608 } else {
9609 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
9610 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9611 LD->getAAInfo());
9612
9613 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
9614 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
9615 LD->getPointerInfo().getWithOffset(IncrementSize),
9616 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9617 LD->getAAInfo());
9618 }
9619
9620 // aggregate the two parts
9621 SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
9622 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
9623 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
9624
9625 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
9626 Hi.getValue(1));
9627
9628 return std::make_pair(Result, TF);
9629}
9630
9632 SelectionDAG &DAG) const {
9633 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
9634 "unaligned indexed stores not implemented!");
9635 SDValue Chain = ST->getChain();
9636 SDValue Ptr = ST->getBasePtr();
9637 SDValue Val = ST->getValue();
9638 EVT VT = Val.getValueType();
9639 Align Alignment = ST->getOriginalAlign();
9640 auto &MF = DAG.getMachineFunction();
9641 EVT StoreMemVT = ST->getMemoryVT();
9642
9643 SDLoc dl(ST);
9644 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
9645 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
9646 if (isTypeLegal(intVT)) {
9647 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
9648 StoreMemVT.isVector()) {
9649 // Scalarize the store and let the individual components be handled.
9650 SDValue Result = scalarizeVectorStore(ST, DAG);
9651 return Result;
9652 }
9653 // Expand to a bitconvert of the value to the integer type of the
9654 // same size, then a (misaligned) int store.
9655 // FIXME: Does not handle truncating floating point stores!
9656 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
9657 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
9658 Alignment, ST->getMemOperand()->getFlags());
9659 return Result;
9660 }
9661 // Do a (aligned) store to a stack slot, then copy from the stack slot
9662 // to the final destination using (unaligned) integer loads and stores.
9663 MVT RegVT = getRegisterType(
9664 *DAG.getContext(),
9665 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
9666 EVT PtrVT = Ptr.getValueType();
9667 unsigned StoredBytes = StoreMemVT.getStoreSize();
9668 unsigned RegBytes = RegVT.getSizeInBits() / 8;
9669 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
9670
9671 // Make sure the stack slot is also aligned for the register type.
9672 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
9673 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
9674
9675 // Perform the original store, only redirected to the stack slot.
9676 SDValue Store = DAG.getTruncStore(
9677 Chain, dl, Val, StackPtr,
9678 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
9679
9680 EVT StackPtrVT = StackPtr.getValueType();
9681
9682 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
9683 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
9685 unsigned Offset = 0;
9686
9687 // Do all but one copies using the full register width.
9688 for (unsigned i = 1; i < NumRegs; i++) {
9689 // Load one integer register's worth from the stack slot.
9690 SDValue Load = DAG.getLoad(
9691 RegVT, dl, Store, StackPtr,
9692 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
9693 // Store it to the final location. Remember the store.
9694 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
9695 ST->getPointerInfo().getWithOffset(Offset),
9696 ST->getOriginalAlign(),
9697 ST->getMemOperand()->getFlags()));
9698 // Increment the pointers.
9699 Offset += RegBytes;
9700 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
9701 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
9702 }
9703
9704 // The last store may be partial. Do a truncating store. On big-endian
9705 // machines this requires an extending load from the stack slot to ensure
9706 // that the bits are in the right place.
9707 EVT LoadMemVT =
9708 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
9709
9710 // Load from the stack slot.
9711 SDValue Load = DAG.getExtLoad(
9712 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
9713 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
9714
9715 Stores.push_back(
9716 DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
9717 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
9718 ST->getOriginalAlign(),
9719 ST->getMemOperand()->getFlags(), ST->getAAInfo()));
9720 // The order of the stores doesn't matter - say it with a TokenFactor.
9721 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9722 return Result;
9723 }
9724
9725 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
9726 "Unaligned store of unknown type.");
9727 // Get the half-size VT
9728 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
9729 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
9730 unsigned IncrementSize = NumBits / 8;
9731
9732 // Divide the stored value in two parts.
9733 SDValue ShiftAmount =
9734 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
9735 SDValue Lo = Val;
9736 // If Val is a constant, replace the upper bits with 0. The SRL will constant
9737 // fold and not use the upper bits. A smaller constant may be easier to
9738 // materialize.
9739 if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
9740 Lo = DAG.getNode(
9741 ISD::AND, dl, VT, Lo,
9742 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
9743 VT));
9744 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
9745
9746 // Store the two parts
9747 SDValue Store1, Store2;
9748 Store1 = DAG.getTruncStore(Chain, dl,
9749 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
9750 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
9751 ST->getMemOperand()->getFlags());
9752
9753 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
9754 Store2 = DAG.getTruncStore(
9755 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
9756 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
9757 ST->getMemOperand()->getFlags(), ST->getAAInfo());
9758
9759 SDValue Result =
9760 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
9761 return Result;
9762}
9763
9764SDValue
9766 const SDLoc &DL, EVT DataVT,
9767 SelectionDAG &DAG,
9768 bool IsCompressedMemory) const {
9769 SDValue Increment;
9770 EVT AddrVT = Addr.getValueType();
9771 EVT MaskVT = Mask.getValueType();
9772 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
9773 "Incompatible types of Data and Mask");
9774 if (IsCompressedMemory) {
9775 if (DataVT.isScalableVector())
9777 "Cannot currently handle compressed memory with scalable vectors");
9778 // Incrementing the pointer according to number of '1's in the mask.
9779 EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
9780 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
9781 if (MaskIntVT.getSizeInBits() < 32) {
9782 MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
9783 MaskIntVT = MVT::i32;
9784 }
9785
9786 // Count '1's with POPCNT.
9787 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
9788 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
9789 // Scale is an element size in bytes.
9790 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
9791 AddrVT);
9792 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
9793 } else if (DataVT.isScalableVector()) {
9794 Increment = DAG.getVScale(DL, AddrVT,
9795 APInt(AddrVT.getFixedSizeInBits(),
9796 DataVT.getStoreSize().getKnownMinValue()));
9797 } else
9798 Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
9799
9800 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
9801}
9802
9804 EVT VecVT, const SDLoc &dl,
9805 ElementCount SubEC) {
9806 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
9807 "Cannot index a scalable vector within a fixed-width vector");
9808
9809 unsigned NElts = VecVT.getVectorMinNumElements();
9810 unsigned NumSubElts = SubEC.getKnownMinValue();
9811 EVT IdxVT = Idx.getValueType();
9812
9813 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
9814 // If this is a constant index and we know the value plus the number of the
9815 // elements in the subvector minus one is less than the minimum number of
9816 // elements then it's safe to return Idx.
9817 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
9818 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
9819 return Idx;
9820 SDValue VS =
9821 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
9822 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
9823 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
9824 DAG.getConstant(NumSubElts, dl, IdxVT));
9825 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
9826 }
9827 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
9828 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
9829 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
9830 DAG.getConstant(Imm, dl, IdxVT));
9831 }
9832 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
9833 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
9834 DAG.getConstant(MaxIndex, dl, IdxVT));
9835}
9836
9838 SDValue VecPtr, EVT VecVT,
9839 SDValue Index) const {
9840 return getVectorSubVecPointer(
9841 DAG, VecPtr, VecVT,
9843 Index);
9844}
9845
9847 SDValue VecPtr, EVT VecVT,
9848 EVT SubVecVT,
9849 SDValue Index) const {
9850 SDLoc dl(Index);
9851 // Make sure the index type is big enough to compute in.
9852 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
9853
9854 EVT EltVT = VecVT.getVectorElementType();
9855
9856 // Calculate the element offset and add it to the pointer.
9857 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
9858 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
9859 "Converting bits to bytes lost precision");
9860 assert(SubVecVT.getVectorElementType() == EltVT &&
9861 "Sub-vector must be a vector with matching element type");
9862 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
9863 SubVecVT.getVectorElementCount());
9864
9865 EVT IdxVT = Index.getValueType();
9866 if (SubVecVT.isScalableVector())
9867 Index =
9868 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
9869 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
9870
9871 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
9872 DAG.getConstant(EltSize, dl, IdxVT));
9873 return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
9874}
9875
9876//===----------------------------------------------------------------------===//
9877// Implementation of Emulated TLS Model
9878//===----------------------------------------------------------------------===//
9879
9881 SelectionDAG &DAG) const {
9882 // Access to address of TLS varialbe xyz is lowered to a function call:
9883 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
9884 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9885 PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
9886 SDLoc dl(GA);
9887
9888 ArgListTy Args;
9889 ArgListEntry Entry;
9890 std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
9891 Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
9892 StringRef EmuTlsVarName(NameString);
9893 GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
9894 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
9895 Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
9896 Entry.Ty = VoidPtrType;
9897 Args.push_back(Entry);
9898
9899 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
9900
9902 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
9903 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
9904 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
9905
9906 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
9907 // At last for X86 targets, maybe good for other targets too?
9909 MFI.setAdjustsStack(true); // Is this only for X86 target?
9910 MFI.setHasCalls(true);
9911
9912 assert((GA->getOffset() == 0) &&
9913 "Emulated TLS must have zero offset in GlobalAddressSDNode");
9914 return CallResult.first;
9915}
9916
9918 SelectionDAG &DAG) const {
9919 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
9920 if (!isCtlzFast())
9921 return SDValue();
9922 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
9923 SDLoc dl(Op);
9924 if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
9925 EVT VT = Op.getOperand(0).getValueType();
9926 SDValue Zext = Op.getOperand(0);
9927 if (VT.bitsLT(MVT::i32)) {
9928 VT = MVT::i32;
9929 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
9930 }
9931 unsigned Log2b = Log2_32(VT.getSizeInBits());
9932 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
9933 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
9934 DAG.getConstant(Log2b, dl, MVT::i32));
9935 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
9936 }
9937 return SDValue();
9938}
9939
9941 SDValue Op0 = Node->getOperand(0);
9942 SDValue Op1 = Node->getOperand(1);
9943 EVT VT = Op0.getValueType();
9944 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9945 unsigned Opcode = Node->getOpcode();
9946 SDLoc DL(Node);
9947
9948 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
9949 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
9951 Op0 = DAG.getFreeze(Op0);
9952 SDValue Zero = DAG.getConstant(0, DL, VT);
9953 return DAG.getNode(ISD::SUB, DL, VT, Op0,
9954 DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
9955 }
9956
9957 // umin(x,y) -> sub(x,usubsat(x,y))
9958 // TODO: Missing freeze(Op0)?
9959 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
9961 return DAG.getNode(ISD::SUB, DL, VT, Op0,
9962 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
9963 }
9964
9965 // umax(x,y) -> add(x,usubsat(y,x))
9966 // TODO: Missing freeze(Op0)?
9967 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
9969 return DAG.getNode(ISD::ADD, DL, VT, Op0,
9970 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
9971 }
9972
9973 // FIXME: Should really try to split the vector in case it's legal on a
9974 // subvector.
9976 return DAG.UnrollVectorOp(Node);
9977
9978 // Attempt to find an existing SETCC node that we can reuse.
9979 // TODO: Do we need a generic doesSETCCNodeExist?
9980 // TODO: Missing freeze(Op0)/freeze(Op1)?
9981 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
9982 ISD::CondCode PrefCommuteCC,
9983 ISD::CondCode AltCommuteCC) {
9984 SDVTList BoolVTList = DAG.getVTList(BoolVT);
9985 for (ISD::CondCode CC : {PrefCC, AltCC}) {
9986 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
9987 {Op0, Op1, DAG.getCondCode(CC)})) {
9988 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
9989 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
9990 }
9991 }
9992 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
9993 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
9994 {Op0, Op1, DAG.getCondCode(CC)})) {
9995 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
9996 return DAG.getSelect(DL, VT, Cond, Op1, Op0);
9997 }
9998 }
9999 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
10000 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10001 };
10002
10003 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10004 // -> Y = (A < B) ? B : A
10005 // -> Y = (A >= B) ? A : B
10006 // -> Y = (A <= B) ? B : A
10007 switch (Opcode) {
10008 case ISD::SMAX:
10009 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10010 case ISD::SMIN:
10011 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10012 case ISD::UMAX:
10013 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10014 case ISD::UMIN:
10015 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10016 }
10017
10018 llvm_unreachable("How did we get here?");
10019}
10020
10022 unsigned Opcode = Node->getOpcode();
10023 SDValue LHS = Node->getOperand(0);
10024 SDValue RHS = Node->getOperand(1);
10025 EVT VT = LHS.getValueType();
10026 SDLoc dl(Node);
10027
10028 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10029 assert(VT.isInteger() && "Expected operands to be integers");
10030
10031 // usub.sat(a, b) -> umax(a, b) - b
10032 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
10033 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
10034 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
10035 }
10036
10037 // uadd.sat(a, b) -> umin(a, ~b) + b
10038 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
10039 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
10040 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
10041 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
10042 }
10043
10044 unsigned OverflowOp;
10045 switch (Opcode) {
10046 case ISD::SADDSAT:
10047 OverflowOp = ISD::SADDO;
10048 break;
10049 case ISD::UADDSAT:
10050 OverflowOp = ISD::UADDO;
10051 break;
10052 case ISD::SSUBSAT:
10053 OverflowOp = ISD::SSUBO;
10054 break;
10055 case ISD::USUBSAT:
10056 OverflowOp = ISD::USUBO;
10057 break;
10058 default:
10059 llvm_unreachable("Expected method to receive signed or unsigned saturation "
10060 "addition or subtraction node.");
10061 }
10062
10063 // FIXME: Should really try to split the vector in case it's legal on a
10064 // subvector.
10066 return DAG.UnrollVectorOp(Node);
10067
10068 unsigned BitWidth = LHS.getScalarValueSizeInBits();
10069 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10070 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10071 SDValue SumDiff = Result.getValue(0);
10072 SDValue Overflow = Result.getValue(1);
10073 SDValue Zero = DAG.getConstant(0, dl, VT);
10074 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
10075
10076 if (Opcode == ISD::UADDSAT) {
10078 // (LHS + RHS) | OverflowMask
10079 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10080 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
10081 }
10082 // Overflow ? 0xffff.... : (LHS + RHS)
10083 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
10084 }
10085
10086 if (Opcode == ISD::USUBSAT) {
10088 // (LHS - RHS) & ~OverflowMask
10089 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10090 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
10091 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
10092 }
10093 // Overflow ? 0 : (LHS - RHS)
10094 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
10095 }
10096
10097 if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10100
10101 KnownBits KnownLHS = DAG.computeKnownBits(LHS);
10102 KnownBits KnownRHS = DAG.computeKnownBits(RHS);
10103
10104 // If either of the operand signs are known, then they are guaranteed to
10105 // only saturate in one direction. If non-negative they will saturate
10106 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10107 //
10108 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10109 // sign of 'y' has to be flipped.
10110
10111 bool LHSIsNonNegative = KnownLHS.isNonNegative();
10112 bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10113 : KnownRHS.isNegative();
10114 if (LHSIsNonNegative || RHSIsNonNegative) {
10115 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10116 return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
10117 }
10118
10119 bool LHSIsNegative = KnownLHS.isNegative();
10120 bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10121 : KnownRHS.isNonNegative();
10122 if (LHSIsNegative || RHSIsNegative) {
10123 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10124 return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
10125 }
10126 }
10127
10128 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10130 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10131 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
10132 DAG.getConstant(BitWidth - 1, dl, VT));
10133 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
10134 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
10135}
10136
10138 unsigned Opcode = Node->getOpcode();
10139 bool IsSigned = Opcode == ISD::SSHLSAT;
10140 SDValue LHS = Node->getOperand(0);
10141 SDValue RHS = Node->getOperand(1);
10142 EVT VT = LHS.getValueType();
10143 SDLoc dl(Node);
10144
10145 assert((Node->getOpcode() == ISD::SSHLSAT ||
10146 Node->getOpcode() == ISD::USHLSAT) &&
10147 "Expected a SHLSAT opcode");
10148 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10149 assert(VT.isInteger() && "Expected operands to be integers");
10150
10152 return DAG.UnrollVectorOp(Node);
10153
10154 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
10155
10156 unsigned BW = VT.getScalarSizeInBits();
10157 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10158 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
10159 SDValue Orig =
10160 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
10161
10162 SDValue SatVal;
10163 if (IsSigned) {
10164 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
10165 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
10166 SDValue Cond =
10167 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
10168 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
10169 } else {
10170 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
10171 }
10172 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
10173 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
10174}
10175
10177 bool Signed, EVT WideVT,
10178 const SDValue LL, const SDValue LH,
10179 const SDValue RL, const SDValue RH,
10180 SDValue &Lo, SDValue &Hi) const {
10181 // We can fall back to a libcall with an illegal type for the MUL if we
10182 // have a libcall big enough.
10183 // Also, we can fall back to a division in some cases, but that's a big
10184 // performance hit in the general case.
10185 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10186 if (WideVT == MVT::i16)
10187 LC = RTLIB::MUL_I16;
10188 else if (WideVT == MVT::i32)
10189 LC = RTLIB::MUL_I32;
10190 else if (WideVT == MVT::i64)
10191 LC = RTLIB::MUL_I64;
10192 else if (WideVT == MVT::i128)
10193 LC = RTLIB::MUL_I128;
10194
10195 if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
10196 // We'll expand the multiplication by brute force because we have no other
10197 // options. This is a trivially-generalized version of the code from
10198 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
10199 // 4.3.1).
10200 EVT VT = LL.getValueType();
10201 unsigned Bits = VT.getSizeInBits();
10202 unsigned HalfBits = Bits >> 1;
10203 SDValue Mask =
10204 DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
10205 SDValue LLL = DAG.getNode(ISD::AND, dl, VT, LL, Mask);
10206 SDValue RLL = DAG.getNode(ISD::AND, dl, VT, RL, Mask);
10207
10208 SDValue T = DAG.getNode(ISD::MUL, dl, VT, LLL, RLL);
10209 SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
10210
10211 SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
10212 SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
10213 SDValue LLH = DAG.getNode(ISD::SRL, dl, VT, LL, Shift);
10214 SDValue RLH = DAG.getNode(ISD::SRL, dl, VT, RL, Shift);
10215
10216 SDValue U = DAG.getNode(ISD::ADD, dl, VT,
10217 DAG.getNode(ISD::MUL, dl, VT, LLH, RLL), TH);
10218 SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
10219 SDValue UH = DAG.getNode(ISD::SRL, dl, VT, U, Shift);
10220
10221 SDValue V = DAG.getNode(ISD::ADD, dl, VT,
10222 DAG.getNode(ISD::MUL, dl, VT, LLL, RLH), UL);
10223 SDValue VH = DAG.getNode(ISD::SRL, dl, VT, V, Shift);
10224
10225 SDValue W =
10226 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LLH, RLH),
10227 DAG.getNode(ISD::ADD, dl, VT, UH, VH));
10228 Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
10229 DAG.getNode(ISD::SHL, dl, VT, V, Shift));
10230
10231 Hi = DAG.getNode(ISD::ADD, dl, VT, W,
10232 DAG.getNode(ISD::ADD, dl, VT,
10233 DAG.getNode(ISD::MUL, dl, VT, RH, LL),
10234 DAG.getNode(ISD::MUL, dl, VT, RL, LH)));
10235 } else {
10236 // Attempt a libcall.
10237 SDValue Ret;
10239 CallOptions.setSExt(Signed);
10240 CallOptions.setIsPostTypeLegalization(true);
10241 if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
10242 // Halves of WideVT are packed into registers in different order
10243 // depending on platform endianness. This is usually handled by
10244 // the C calling convention, but we can't defer to it in
10245 // the legalizer.
10246 SDValue Args[] = {LL, LH, RL, RH};
10247 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10248 } else {
10249 SDValue Args[] = {LH, LL, RH, RL};
10250 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10251 }
10252 assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10253 "Ret value is a collection of constituent nodes holding result.");
10254 if (DAG.getDataLayout().isLittleEndian()) {
10255 // Same as above.
10256 Lo = Ret.getOperand(0);
10257 Hi = Ret.getOperand(1);
10258 } else {
10259 Lo = Ret.getOperand(1);
10260 Hi = Ret.getOperand(0);
10261 }
10262 }
10263}
10264
10266 bool Signed, const SDValue LHS,
10267 const SDValue RHS, SDValue &Lo,
10268 SDValue &Hi) const {
10269 EVT VT = LHS.getValueType();
10270 assert(RHS.getValueType() == VT && "Mismatching operand types");
10271
10272 SDValue HiLHS;
10273 SDValue HiRHS;
10274 if (Signed) {
10275 // The high part is obtained by SRA'ing all but one of the bits of low
10276 // part.
10277 unsigned LoSize = VT.getFixedSizeInBits();
10278 HiLHS = DAG.getNode(
10279 ISD::SRA, dl, VT, LHS,
10280 DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10281 HiRHS = DAG.getNode(
10282 ISD::SRA, dl, VT, RHS,
10283 DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10284 } else {
10285 HiLHS = DAG.getConstant(0, dl, VT);
10286 HiRHS = DAG.getConstant(0, dl, VT);
10287 }
10288 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
10289 forceExpandWideMUL(DAG, dl, Signed, WideVT, LHS, HiLHS, RHS, HiRHS, Lo, Hi);
10290}
10291
10292SDValue
10294 assert((Node->getOpcode() == ISD::SMULFIX ||
10295 Node->getOpcode() == ISD::UMULFIX ||
10296 Node->getOpcode() == ISD::SMULFIXSAT ||
10297 Node->getOpcode() == ISD::UMULFIXSAT) &&
10298 "Expected a fixed point multiplication opcode");
10299
10300 SDLoc dl(Node);
10301 SDValue LHS = Node->getOperand(0);
10302 SDValue RHS = Node->getOperand(1);
10303 EVT VT = LHS.getValueType();
10304 unsigned Scale = Node->getConstantOperandVal(2);
10305 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
10306 Node->getOpcode() == ISD::UMULFIXSAT);
10307 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
10308 Node->getOpcode() == ISD::SMULFIXSAT);
10309 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10310 unsigned VTSize = VT.getScalarSizeInBits();
10311
10312 if (!Scale) {
10313 // [us]mul.fix(a, b, 0) -> mul(a, b)
10314 if (!Saturating) {
10316 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10317 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
10318 SDValue Result =
10319 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10320 SDValue Product = Result.getValue(0);
10321 SDValue Overflow = Result.getValue(1);
10322 SDValue Zero = DAG.getConstant(0, dl, VT);
10323
10324 APInt MinVal = APInt::getSignedMinValue(VTSize);
10325 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
10326 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10327 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10328 // Xor the inputs, if resulting sign bit is 0 the product will be
10329 // positive, else negative.
10330 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
10331 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
10332 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
10333 return DAG.getSelect(dl, VT, Overflow, Result, Product);
10334 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
10335 SDValue Result =
10336 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10337 SDValue Product = Result.getValue(0);
10338 SDValue Overflow = Result.getValue(1);
10339
10340 APInt MaxVal = APInt::getMaxValue(VTSize);
10341 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10342 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
10343 }
10344 }
10345
10346 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
10347 "Expected scale to be less than the number of bits if signed or at "
10348 "most the number of bits if unsigned.");
10349 assert(LHS.getValueType() == RHS.getValueType() &&
10350 "Expected both operands to be the same type");
10351
10352 // Get the upper and lower bits of the result.
10353 SDValue Lo, Hi;
10354 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10355 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
10356 if (isOperationLegalOrCustom(LoHiOp, VT)) {
10357 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
10358 Lo = Result.getValue(0);
10359 Hi = Result.getValue(1);
10360 } else if (isOperationLegalOrCustom(HiOp, VT)) {
10361 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10362 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
10363 } else if (VT.isVector()) {
10364 return SDValue();
10365 } else {
10366 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
10367 }
10368
10369 if (Scale == VTSize)
10370 // Result is just the top half since we'd be shifting by the width of the
10371 // operand. Overflow impossible so this works for both UMULFIX and
10372 // UMULFIXSAT.
10373 return Hi;
10374
10375 // The result will need to be shifted right by the scale since both operands
10376 // are scaled. The result is given to us in 2 halves, so we only want part of
10377 // both in the result.
10378 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
10379 DAG.getShiftAmountConstant(Scale, VT, dl));
10380 if (!Saturating)
10381 return Result;
10382
10383 if (!Signed) {
10384 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
10385 // widened multiplication) aren't all zeroes.
10386
10387 // Saturate to max if ((Hi >> Scale) != 0),
10388 // which is the same as if (Hi > ((1 << Scale) - 1))
10389 APInt MaxVal = APInt::getMaxValue(VTSize);
10390 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
10391 dl, VT);
10392 Result = DAG.getSelectCC(dl, Hi, LowMask,
10393 DAG.getConstant(MaxVal, dl, VT), Result,
10394 ISD::SETUGT);
10395
10396 return Result;
10397 }
10398
10399 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
10400 // widened multiplication) aren't all ones or all zeroes.
10401
10402 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
10403 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
10404
10405 if (Scale == 0) {
10406 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
10407 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
10408 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
10409 // Saturated to SatMin if wide product is negative, and SatMax if wide
10410 // product is positive ...
10411 SDValue Zero = DAG.getConstant(0, dl, VT);
10412 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
10413 ISD::SETLT);
10414 // ... but only if we overflowed.
10415 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
10416 }
10417
10418 // We handled Scale==0 above so all the bits to examine is in Hi.
10419
10420 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
10421 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
10422 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
10423 dl, VT);
10424 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
10425 // Saturate to min if (Hi >> (Scale - 1)) < -1),
10426 // which is the same as if (HI < (-1 << (Scale - 1))
10427 SDValue HighMask =
10428 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
10429 dl, VT);
10430 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
10431 return Result;
10432}
10433
10434SDValue
10436 SDValue LHS, SDValue RHS,
10437 unsigned Scale, SelectionDAG &DAG) const {
10438 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
10439 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
10440 "Expected a fixed point division opcode");
10441
10442 EVT VT = LHS.getValueType();
10443 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
10444 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
10445 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10446
10447 // If there is enough room in the type to upscale the LHS or downscale the
10448 // RHS before the division, we can perform it in this type without having to
10449 // resize. For signed operations, the LHS headroom is the number of
10450 // redundant sign bits, and for unsigned ones it is the number of zeroes.
10451 // The headroom for the RHS is the number of trailing zeroes.
10452 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
10454 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
10455
10456 // For signed saturating operations, we need to be able to detect true integer
10457 // division overflow; that is, when you have MIN / -EPS. However, this
10458 // is undefined behavior and if we emit divisions that could take such
10459 // values it may cause undesired behavior (arithmetic exceptions on x86, for
10460 // example).
10461 // Avoid this by requiring an extra bit so that we never get this case.
10462 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
10463 // signed saturating division, we need to emit a whopping 32-bit division.
10464 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
10465 return SDValue();
10466
10467 unsigned LHSShift = std::min(LHSLead, Scale);
10468 unsigned RHSShift = Scale - LHSShift;
10469
10470 // At this point, we know that if we shift the LHS up by LHSShift and the
10471 // RHS down by RHSShift, we can emit a regular division with a final scaling
10472 // factor of Scale.
10473
10474 if (LHSShift)
10475 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
10476 DAG.getShiftAmountConstant(LHSShift, VT, dl));
10477 if (RHSShift)
10478 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
10479 DAG.getShiftAmountConstant(RHSShift, VT, dl));
10480
10481 SDValue Quot;
10482 if (Signed) {
10483 // For signed operations, if the resulting quotient is negative and the
10484 // remainder is nonzero, subtract 1 from the quotient to round towards
10485 // negative infinity.
10486 SDValue Rem;
10487 // FIXME: Ideally we would always produce an SDIVREM here, but if the
10488 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
10489 // we couldn't just form a libcall, but the type legalizer doesn't do it.
10490 if (isTypeLegal(VT) &&
10492 Quot = DAG.getNode(ISD::SDIVREM, dl,
10493 DAG.getVTList(VT, VT),
10494 LHS, RHS);
10495 Rem = Quot.getValue(1);
10496 Quot = Quot.getValue(0);
10497 } else {
10498 Quot = DAG.getNode(ISD::SDIV, dl, VT,
10499 LHS, RHS);
10500 Rem = DAG.getNode(ISD::SREM, dl, VT,
10501 LHS, RHS);
10502 }
10503 SDValue Zero = DAG.getConstant(0, dl, VT);
10504 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
10505 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
10506 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
10507 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
10508 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
10509 DAG.getConstant(1, dl, VT));
10510 Quot = DAG.getSelect(dl, VT,
10511 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
10512 Sub1, Quot);
10513 } else
10514 Quot = DAG.getNode(ISD::UDIV, dl, VT,
10515 LHS, RHS);
10516
10517 return Quot;
10518}
10519
10521 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
10522 SDLoc dl(Node);
10523 SDValue LHS = Node->getOperand(0);
10524 SDValue RHS = Node->getOperand(1);
10525 bool IsAdd = Node->getOpcode() == ISD::UADDO;
10526
10527 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
10528 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
10529 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
10530 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
10531 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
10532 { LHS, RHS, CarryIn });
10533 Result = SDValue(NodeCarry.getNode(), 0);
10534 Overflow = SDValue(NodeCarry.getNode(), 1);
10535 return;
10536 }
10537
10538 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
10539 LHS.getValueType(), LHS, RHS);
10540
10541 EVT ResultType = Node->getValueType(1);
10542 EVT SetCCType = getSetCCResultType(
10543 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
10544 SDValue SetCC;
10545 if (IsAdd && isOneConstant(RHS)) {
10546 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
10547 // the live range of X. We assume comparing with 0 is cheap.
10548 // The general case (X + C) < C is not necessarily beneficial. Although we
10549 // reduce the live range of X, we may introduce the materialization of
10550 // constant C.
10551 SetCC =
10552 DAG.getSetCC(dl, SetCCType, Result,
10553 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
10554 } else if (IsAdd && isAllOnesConstant(RHS)) {
10555 // Special case: uaddo X, -1 overflows if X != 0.
10556 SetCC =
10557 DAG.getSetCC(dl, SetCCType, LHS,
10558 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
10559 } else {
10561 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
10562 }
10563 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
10564}
10565
10567 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
10568 SDLoc dl(Node);
10569 SDValue LHS = Node->getOperand(0);
10570 SDValue RHS = Node->getOperand(1);
10571 bool IsAdd = Node->getOpcode() == ISD::SADDO;
10572
10573 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
10574 LHS.getValueType(), LHS, RHS);
10575
10576 EVT ResultType = Node->getValueType(1);
10577 EVT OType = getSetCCResultType(
10578 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
10579
10580 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
10581 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
10582 if (isOperationLegal(OpcSat, LHS.getValueType())) {
10583 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
10584 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
10585 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
10586 return;
10587 }
10588
10589 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
10590
10591 // For an addition, the result should be less than one of the operands (LHS)
10592 // if and only if the other operand (RHS) is negative, otherwise there will
10593 // be overflow.
10594 // For a subtraction, the result should be less than one of the operands
10595 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
10596 // otherwise there will be overflow.
10597 SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
10598 SDValue ConditionRHS =
10599 DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
10600
10601 Overflow = DAG.getBoolExtOrTrunc(
10602 DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
10603 ResultType, ResultType);
10604}
10605
10607 SDValue &Overflow, SelectionDAG &DAG) const {
10608 SDLoc dl(Node);
10609 EVT VT = Node->getValueType(0);
10610 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10611 SDValue LHS = Node->getOperand(0);
10612 SDValue RHS = Node->getOperand(1);
10613 bool isSigned = Node->getOpcode() == ISD::SMULO;
10614
10615 // For power-of-two multiplications we can use a simpler shift expansion.
10616 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
10617 const APInt &C = RHSC->getAPIntValue();
10618 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
10619 if (C.isPowerOf2()) {
10620 // smulo(x, signed_min) is same as umulo(x, signed_min).
10621 bool UseArithShift = isSigned && !C.isMinSignedValue();
10622 SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
10623 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
10624 Overflow = DAG.getSetCC(dl, SetCCVT,
10625 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
10626 dl, VT, Result, ShiftAmt),
10627 LHS, ISD::SETNE);
10628 return true;
10629 }
10630 }
10631
10632 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
10633 if (VT.isVector())
10634 WideVT =
10636
10637 SDValue BottomHalf;
10638 SDValue TopHalf;
10639 static const unsigned Ops[2][3] =
10642 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
10643 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10644 TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
10645 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
10646 BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
10647 RHS);
10648 TopHalf = BottomHalf.getValue(1);
10649 } else if (isTypeLegal(WideVT)) {
10650 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
10651 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
10652 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
10653 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
10654 SDValue ShiftAmt =
10655 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
10656 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
10657 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
10658 } else {
10659 if (VT.isVector())
10660 return false;
10661
10662 forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
10663 }
10664
10665 Result = BottomHalf;
10666 if (isSigned) {
10667 SDValue ShiftAmt = DAG.getShiftAmountConstant(
10668 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
10669 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
10670 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
10671 } else {
10672 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
10673 DAG.getConstant(0, dl, VT), ISD::SETNE);
10674 }
10675
10676 // Truncate the result if SetCC returns a larger type than needed.
10677 EVT RType = Node->getValueType(1);
10678 if (RType.bitsLT(Overflow.getValueType()))
10679 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
10680
10681 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
10682 "Unexpected result type for S/UMULO legalization");
10683 return true;
10684}
10685
10687 SDLoc dl(Node);
10688 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
10689 SDValue Op = Node->getOperand(0);
10690 EVT VT = Op.getValueType();
10691
10692 if (VT.isScalableVector())
10694 "Expanding reductions for scalable vectors is undefined.");
10695
10696 // Try to use a shuffle reduction for power of two vectors.
10697 if (VT.isPow2VectorType()) {
10698 while (VT.getVectorNumElements() > 1) {
10699 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
10700 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
10701 break;
10702
10703 SDValue Lo, Hi;
10704 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
10705 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
10706 VT = HalfVT;
10707 }
10708 }
10709
10710 EVT EltVT = VT.getVectorElementType();
10711 unsigned NumElts = VT.getVectorNumElements();
10712
10714 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
10715
10716 SDValue Res = Ops[0];
10717 for (unsigned i = 1; i < NumElts; i++)
10718 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
10719
10720 // Result type may be wider than element type.
10721 if (EltVT != Node->getValueType(0))
10722 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
10723 return Res;
10724}
10725
10727 SDLoc dl(Node);
10728 SDValue AccOp = Node->getOperand(0);
10729 SDValue VecOp = Node->getOperand(1);
10730 SDNodeFlags Flags = Node->getFlags();
10731
10732 EVT VT = VecOp.getValueType();
10733 EVT EltVT = VT.getVectorElementType();
10734
10735 if (VT.isScalableVector())
10737 "Expanding reductions for scalable vectors is undefined.");
10738
10739 unsigned NumElts = VT.getVectorNumElements();
10740
10742 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
10743
10744 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
10745
10746 SDValue Res = AccOp;
10747 for (unsigned i = 0; i < NumElts; i++)
10748 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
10749
10750 return Res;
10751}
10752
10754 SelectionDAG &DAG) const {
10755 EVT VT = Node->getValueType(0);
10756 SDLoc dl(Node);
10757 bool isSigned = Node->getOpcode() == ISD::SREM;
10758 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
10759 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
10760 SDValue Dividend = Node->getOperand(0);
10761 SDValue Divisor = Node->getOperand(1);
10762 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
10763 SDVTList VTs = DAG.getVTList(VT, VT);
10764 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
10765 return true;
10766 }
10767 if (isOperationLegalOrCustom(DivOpc, VT)) {
10768 // X % Y -> X-X/Y*Y
10769 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
10770 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
10771 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
10772 return true;
10773 }
10774 return false;
10775}
10776
10778 SelectionDAG &DAG) const {
10779 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
10780 SDLoc dl(SDValue(Node, 0));
10781 SDValue Src = Node->getOperand(0);
10782
10783 // DstVT is the result type, while SatVT is the size to which we saturate
10784 EVT SrcVT = Src.getValueType();
10785 EVT DstVT = Node->getValueType(0);
10786
10787 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
10788 unsigned SatWidth = SatVT.getScalarSizeInBits();
10789 unsigned DstWidth = DstVT.getScalarSizeInBits();
10790 assert(SatWidth <= DstWidth &&
10791 "Expected saturation width smaller than result width");
10792
10793 // Determine minimum and maximum integer values and their corresponding
10794 // floating-point values.
10795 APInt MinInt, MaxInt;
10796 if (IsSigned) {
10797 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
10798 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
10799 } else {
10800 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
10801 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
10802 }
10803
10804 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
10805 // libcall emission cannot handle this. Large result types will fail.
10806 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
10807 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
10808 SrcVT = Src.getValueType();
10809 }
10810
10811 APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT));
10812 APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT));
10813
10814 APFloat::opStatus MinStatus =
10815 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
10816 APFloat::opStatus MaxStatus =
10817 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
10818 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
10819 !(MaxStatus & APFloat::opStatus::opInexact);
10820
10821 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
10822 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
10823
10824 // If the integer bounds are exactly representable as floats and min/max are
10825 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
10826 // of comparisons and selects.
10827 bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
10829 if (AreExactFloatBounds && MinMaxLegal) {
10830 SDValue Clamped = Src;
10831
10832 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
10833 Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
10834 // Clamp by MaxFloat from above. NaN cannot occur.
10835 Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
10836 // Convert clamped value to integer.
10837 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
10838 dl, DstVT, Clamped);
10839
10840 // In the unsigned case we're done, because we mapped NaN to MinFloat,
10841 // which will cast to zero.
10842 if (!IsSigned)
10843 return FpToInt;
10844
10845 // Otherwise, select 0 if Src is NaN.
10846 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
10847 EVT SetCCVT =
10848 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
10849 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
10850 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
10851 }
10852
10853 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
10854 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
10855
10856 // Result of direct conversion. The assumption here is that the operation is
10857 // non-trapping and it's fine to apply it to an out-of-range value if we
10858 // select it away later.
10859 SDValue FpToInt =
10860 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
10861
10862 SDValue Select = FpToInt;
10863
10864 EVT SetCCVT =
10865 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
10866
10867 // If Src ULT MinFloat, select MinInt. In particular, this also selects
10868 // MinInt if Src is NaN.
10869 SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
10870 Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
10871 // If Src OGT MaxFloat, select MaxInt.
10872 SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
10873 Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
10874
10875 // In the unsigned case we are done, because we mapped NaN to MinInt, which
10876 // is already zero.
10877 if (!IsSigned)
10878 return Select;
10879
10880 // Otherwise, select 0 if Src is NaN.
10881 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
10882 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
10883 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
10884}
10885
10887 const SDLoc &dl,
10888 SelectionDAG &DAG) const {
10889 EVT OperandVT = Op.getValueType();
10890 if (OperandVT.getScalarType() == ResultVT.getScalarType())
10891 return Op;
10892 EVT ResultIntVT = ResultVT.changeTypeToInteger();
10893 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
10894 // can induce double-rounding which may alter the results. We can
10895 // correct for this using a trick explained in: Boldo, Sylvie, and
10896 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
10897 // World Congress. 2005.
10898 unsigned BitSize = OperandVT.getScalarSizeInBits();
10899 EVT WideIntVT = OperandVT.changeTypeToInteger();
10900 SDValue OpAsInt = DAG.getBitcast(WideIntVT, Op);
10901 SDValue SignBit =
10902 DAG.getNode(ISD::AND, dl, WideIntVT, OpAsInt,
10903 DAG.getConstant(APInt::getSignMask(BitSize), dl, WideIntVT));
10904 SDValue AbsWide;
10905 if (isOperationLegalOrCustom(ISD::FABS, OperandVT)) {
10906 AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
10907 } else {
10908 SDValue ClearedSign = DAG.getNode(
10909 ISD::AND, dl, WideIntVT, OpAsInt,
10910 DAG.getConstant(APInt::getSignedMaxValue(BitSize), dl, WideIntVT));
10911 AbsWide = DAG.getBitcast(OperandVT, ClearedSign);
10912 }
10913 SDValue AbsNarrow = DAG.getFPExtendOrRound(AbsWide, dl, ResultVT);
10914 SDValue AbsNarrowAsWide = DAG.getFPExtendOrRound(AbsNarrow, dl, OperandVT);
10915
10916 // We can keep the narrow value as-is if narrowing was exact (no
10917 // rounding error), the wide value was NaN (the narrow value is also
10918 // NaN and should be preserved) or if we rounded to the odd value.
10919 SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, AbsNarrow);
10920 SDValue One = DAG.getConstant(1, dl, ResultIntVT);
10921 SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
10922 SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
10923 EVT ResultIntVTCCVT = getSetCCResultType(
10924 DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
10925 SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
10926 // The result is already odd so we don't need to do anything.
10927 SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
10928
10929 EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
10930 AbsWide.getValueType());
10931 // We keep results which are exact, odd or NaN.
10932 SDValue KeepNarrow =
10933 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETUEQ);
10934 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
10935 // We morally performed a round-down if AbsNarrow is smaller than
10936 // AbsWide.
10937 SDValue NarrowIsRd =
10938 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
10939 // If the narrow value is odd or exact, pick it.
10940 // Otherwise, narrow is even and corresponds to either the rounded-up
10941 // or rounded-down value. If narrow is the rounded-down value, we want
10942 // the rounded-up value as it will be odd.
10943 SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
10944 SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
10945 Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
10946 int ShiftAmount = BitSize - ResultVT.getScalarSizeInBits();
10947 SDValue ShiftCnst = DAG.getShiftAmountConstant(ShiftAmount, WideIntVT, dl);
10948 SignBit = DAG.getNode(ISD::SRL, dl, WideIntVT, SignBit, ShiftCnst);
10949 SignBit = DAG.getNode(ISD::TRUNCATE, dl, ResultIntVT, SignBit);
10950 Op = DAG.getNode(ISD::OR, dl, ResultIntVT, Op, SignBit);
10951 return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
10952}
10953
10955 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
10956 SDValue Op = Node->getOperand(0);
10957 EVT VT = Node->getValueType(0);
10958 SDLoc dl(Node);
10959 if (VT.getScalarType() == MVT::bf16) {
10960 if (Node->getConstantOperandVal(1) == 1) {
10961 return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
10962 }
10963 EVT OperandVT = Op.getValueType();
10964 SDValue IsNaN = DAG.getSetCC(
10965 dl,
10966 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
10967 Op, Op, ISD::SETUO);
10968
10969 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
10970 // can induce double-rounding which may alter the results. We can
10971 // correct for this using a trick explained in: Boldo, Sylvie, and
10972 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
10973 // World Congress. 2005.
10974 EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
10975 EVT I32 = F32.changeTypeToInteger();
10976 Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
10977 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
10978
10979 // Conversions should set NaN's quiet bit. This also prevents NaNs from
10980 // turning into infinities.
10981 SDValue NaN =
10982 DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
10983
10984 // Factor in the contribution of the low 16 bits.
10985 SDValue One = DAG.getConstant(1, dl, I32);
10986 SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
10987 DAG.getShiftAmountConstant(16, I32, dl));
10988 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
10989 SDValue RoundingBias =
10990 DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
10991 SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
10992
10993 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
10994 // 0x80000000.
10995 Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
10996
10997 // Now that we have rounded, shift the bits into position.
10998 Op = DAG.getNode(ISD::SRL, dl, I32, Op,
10999 DAG.getShiftAmountConstant(16, I32, dl));
11000 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11001 EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
11002 Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
11003 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
11004 }
11005 return SDValue();
11006}
11007
11009 SelectionDAG &DAG) const {
11010 assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11011 assert(Node->getValueType(0).isScalableVector() &&
11012 "Fixed length vector types expected to use SHUFFLE_VECTOR!");
11013
11014 EVT VT = Node->getValueType(0);
11015 SDValue V1 = Node->getOperand(0);
11016 SDValue V2 = Node->getOperand(1);
11017 int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
11018 SDLoc DL(Node);
11019
11020 // Expand through memory thusly:
11021 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11022 // Store V1, Ptr
11023 // Store V2, Ptr + sizeof(V1)
11024 // If (Imm < 0)
11025 // TrailingElts = -Imm
11026 // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
11027 // else
11028 // Ptr = Ptr + (Imm * sizeof(VT.Elt))
11029 // Res = Load Ptr
11030
11031 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
11032
11034 VT.getVectorElementCount() * 2);
11035 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
11036 EVT PtrVT = StackPtr.getValueType();
11037 auto &MF = DAG.getMachineFunction();
11038 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11039 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
11040
11041 // Store the lo part of CONCAT_VECTORS(V1, V2)
11042 SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
11043 // Store the hi part of CONCAT_VECTORS(V1, V2)
11044 SDValue OffsetToV2 = DAG.getVScale(
11045 DL, PtrVT,
11047 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
11048 SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
11049
11050 if (Imm >= 0) {
11051 // Load back the required element. getVectorElementPointer takes care of
11052 // clamping the index if it's out-of-bounds.
11053 StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
11054 // Load the spliced result
11055 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
11057 }
11058
11059 uint64_t TrailingElts = -Imm;
11060
11061 // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11062 TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11063 SDValue TrailingBytes =
11064 DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
11065
11066 if (TrailingElts > VT.getVectorMinNumElements()) {
11067 SDValue VLBytes =
11068 DAG.getVScale(DL, PtrVT,
11069 APInt(PtrVT.getFixedSizeInBits(),
11071 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
11072 }
11073
11074 // Calculate the start address of the spliced result.
11075 StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
11076
11077 // Load the spliced result
11078 return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
11080}
11081
11083 SDValue &LHS, SDValue &RHS,
11084 SDValue &CC, SDValue Mask,
11085 SDValue EVL, bool &NeedInvert,
11086 const SDLoc &dl, SDValue &Chain,
11087 bool IsSignaling) const {
11088 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11089 MVT OpVT = LHS.getSimpleValueType();
11090 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
11091 NeedInvert = false;
11092 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
11093 bool IsNonVP = !EVL;
11094 switch (TLI.getCondCodeAction(CCCode, OpVT)) {
11095 default:
11096 llvm_unreachable("Unknown condition code action!");
11098 // Nothing to do.
11099 break;
11102 if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
11103 std::swap(LHS, RHS);
11104 CC = DAG.getCondCode(InvCC);
11105 return true;
11106 }
11107 // Swapping operands didn't work. Try inverting the condition.
11108 bool NeedSwap = false;
11109 InvCC = getSetCCInverse(CCCode, OpVT);
11110 if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
11111 // If inverting the condition is not enough, try swapping operands
11112 // on top of it.
11113 InvCC = ISD::getSetCCSwappedOperands(InvCC);
11114 NeedSwap = true;
11115 }
11116 if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
11117 CC = DAG.getCondCode(InvCC);
11118 NeedInvert = true;
11119 if (NeedSwap)
11120 std::swap(LHS, RHS);
11121 return true;
11122 }
11123
11125 unsigned Opc = 0;
11126 switch (CCCode) {
11127 default:
11128 llvm_unreachable("Don't know how to expand this condition!");
11129 case ISD::SETUO:
11130 if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
11131 CC1 = ISD::SETUNE;
11132 CC2 = ISD::SETUNE;
11133 Opc = ISD::OR;
11134 break;
11135 }
11136 assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11137 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
11138 NeedInvert = true;
11139 [[fallthrough]];
11140 case ISD::SETO:
11141 assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11142 "If SETO is expanded, SETOEQ must be legal!");
11143 CC1 = ISD::SETOEQ;
11144 CC2 = ISD::SETOEQ;
11145 Opc = ISD::AND;
11146 break;
11147 case ISD::SETONE:
11148 case ISD::SETUEQ:
11149 // If the SETUO or SETO CC isn't legal, we might be able to use
11150 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
11151 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
11152 // the operands.
11153 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11154 if (!TLI.isCondCodeLegal(CC2, OpVT) &&
11155 (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
11156 TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
11157 CC1 = ISD::SETOGT;
11158 CC2 = ISD::SETOLT;
11159 Opc = ISD::OR;
11160 NeedInvert = ((unsigned)CCCode & 0x8U);
11161 break;
11162 }
11163 [[fallthrough]];
11164 case ISD::SETOEQ:
11165 case ISD::SETOGT:
11166 case ISD::SETOGE:
11167 case ISD::SETOLT:
11168 case ISD::SETOLE:
11169 case ISD::SETUNE:
11170 case ISD::SETUGT:
11171 case ISD::SETUGE:
11172 case ISD::SETULT:
11173 case ISD::SETULE:
11174 // If we are floating point, assign and break, otherwise fall through.
11175 if (!OpVT.isInteger()) {
11176 // We can use the 4th bit to tell if we are the unordered
11177 // or ordered version of the opcode.
11178 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11179 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
11180 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
11181 break;
11182 }
11183 // Fallthrough if we are unsigned integer.
11184 [[fallthrough]];
11185 case ISD::SETLE:
11186 case ISD::SETGT:
11187 case ISD::SETGE:
11188 case ISD::SETLT:
11189 case ISD::SETNE:
11190 case ISD::SETEQ:
11191 // If all combinations of inverting the condition and swapping operands
11192 // didn't work then we have no means to expand the condition.
11193 llvm_unreachable("Don't know how to expand this condition!");
11194 }
11195
11196 SDValue SetCC1, SetCC2;
11197 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
11198 // If we aren't the ordered or unorder operation,
11199 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
11200 if (IsNonVP) {
11201 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
11202 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
11203 } else {
11204 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
11205 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
11206 }
11207 } else {
11208 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
11209 if (IsNonVP) {
11210 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
11211 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
11212 } else {
11213 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
11214 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
11215 }
11216 }
11217 if (Chain)
11218 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
11219 SetCC2.getValue(1));
11220 if (IsNonVP)
11221 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
11222 else {
11223 // Transform the binary opcode to the VP equivalent.
11224 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
11225 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
11226 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
11227 }
11228 RHS = SDValue();
11229 CC = SDValue();
11230 return true;
11231 }
11232 }
11233 return false;
11234}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
basic Basic Alias true
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
static bool isUndef(ArrayRef< int > Mask)
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:531
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
#define T1
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const char LLVMTargetMachineRef TM
const char * Passes
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
This file describes how to lower LLVM code to machine code.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1193
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition: APFloat.h:1026
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition: APFloat.h:1006
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition: APFloat.h:966
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition: APInt.cpp:1579
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1764
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1385
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:427
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:207
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition: APInt.h:401
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition: APInt.h:1370
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1364
APInt multiplicativeInverse(const APInt &modulo) const
Computes the multiplicative inverse of this APInt for a given modulo.
Definition: APInt.cpp:1250
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:184
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:236
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1672
void setSignBit()
Set the sign bit to 1.
Definition: APInt.h:1318
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:187
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition: APInt.h:194
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:307
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1227
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1375
APInt reverseBits() const
Definition: APInt.cpp:737
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:812
void negate()
Negate this APInt in place.
Definition: APInt.h:1421
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1589
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1548
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:197
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1482
unsigned countLeadingZeros() const
Definition: APInt.h:1556
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:334
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
unsigned logBase2() const
Definition: APInt.h:1703
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:453
void setAllBits()
Set every bit to 1.
Definition: APInt.h:1297
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition: APInt.h:383
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:312
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1128
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition: APInt.h:1345
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:851
APInt byteSwap() const
Definition: APInt.cpp:715
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1367
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:453
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:367
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1606
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1199
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
Definition: APInt.h:1321
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool hasAttributes() const
Return true if the builder has IR-level attributes.
Definition: Attributes.h:1070
bool contains(Attribute::AttrKind A) const
Return true if the builder has the specified attribute.
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
A "pseudo-class" with methods for operating on BUILD_VECTORs.
ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1461
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:705
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
bool isBigEndian() const
Definition: DataLayout.h:239
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:338
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:655
std::vector< std::string > ConstraintCodeVector
Definition: InlineAsm.h:102
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Context object for machine code objects.
Definition: MCContext.h:76
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:397
Machine Value Type.
SimpleValueType SimpleTy
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Function & getFunction()
Return the LLVM function that this machine code represents.
@ EK_GPRel32BlockAddress
EK_GPRel32BlockAddress - Each entry is an address of block, encoded with a relocation as gp-relative,...
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
@ EK_GPRel64BlockAddress
EK_GPRel64BlockAddress - Each entry is an address of block, encoded with a relocation as gp-relative,...
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition: Module.h:455
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
iterator end() const
Definition: ArrayRef.h:357
iterator begin() const
Definition: ArrayRef.h:356
Class to represent pointers.
Definition: DerivedTypes.h:646
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
static SDNodeIterator end(const SDNode *N)
static SDNodeIterator begin(const SDNode *N)
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setFlags(SDNodeFlags NewFlags)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
Definition: SelectionDAG.h:954
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const APInt * getValidMaximumShiftAmountConstant(SDValue V, const APInt &DemandedElts) const
If a SHL/SRA/SRL node V has constant shift amounts that are all less than the element bit-width of th...
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
bool shouldOptForSize() const
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:448
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:659
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
const APInt * getValidShiftAmountConstant(SDValue V, const APInt &DemandedElts) const
If a SHL/SRA/SRL node V has a constant or splat constant shift amount that is less than the element b...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:862
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
bool isKnownNeverZeroFloat(SDValue Op) const
Test whether the given floating point SDValue is known to never be positive or negative zero.
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes=true)
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:567
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:257
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
iterator end() const
Definition: StringRef.h:113
Class to represent struct types.
Definition: DerivedTypes.h:216
void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
Returns the type for the shift amount of a shift opcode.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
MVT getSimpleValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the MVT corresponding to this LLVM type. See getValueType.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
BooleanContent
Enum that describes how the target represents true/false values.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const
Get the CondCode that's to be used to test the result of the comparison libcall against zero.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom on this target.
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_UNDEF nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_UNDEF nodes.
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
virtual Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isPositionIndependent() const
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, EVT WideVT, const SDValue LL, const SDValue LH, const SDValue RL, const SDValue RH, SDValue &Lo, SDValue &Hi) const
forceExpandWideMUL - Unconditionally expand a MUL into either a libcall or brute force via a wide mul...
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
TargetLowering(const TargetLowering &)=delete
virtual bool shouldSimplifyDemandedVectorElts(SDValue Op, const TargetLoweringOpt &TLO) const
Return true if the target supports simplifying demanded vector elements by converting them to undefs.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
bool isPositionIndependent() const
const Triple & getTargetTriple() const
TargetOptions Options
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< regclass_iterator > regclasses() const
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition: Triple.h:708
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
const fltSemantics & getFltSemantics() const
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition: Type.h:287
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:302
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition: APInt.cpp:3011
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:750
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:723
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:497
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:559
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:714
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:367
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:487
@ FMAXNUM_IEEE
Definition: ISDOpcodes.h:979
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1031
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:373
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:783
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:483
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:543
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:820
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:903
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition: ISDOpcodes.h:380
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1400
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:662
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:620
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:722
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:930
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:327
@ BRIND
BRIND - Indirect branch.
Definition: ISDOpcodes.h:1052
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:500
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:507
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:349
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:727
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:222
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:627
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:208
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:323
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:651
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:600
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:573
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on two values,...
Definition: ISDOpcodes.h:978
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:424
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:535
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:780
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:425
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:742
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:971
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:359
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:331
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:809
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:798
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:674
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:386
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:888
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:736
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:303
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:442
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:984
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:836
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:158
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:657
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:279
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:524
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:612
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:869
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:831
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:855
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:786
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:763
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:340
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:515
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantSDNode predicate.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
Definition: ISDOpcodes.h:1581
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
Definition: ISDOpcodes.h:1586
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1556
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1523
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1503
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1738
bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:313
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
auto find_if_not(R &&Range, UnaryPredicate P)
Definition: STLExtras.h:1763
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Mod
The access may modify the value stored in memory.
@ Other
Any other memory.
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isConstFalseVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Definition: Utils.cpp:1517
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition: APFloat.h:1387
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:428
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:349
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:234
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:246
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
Extended Value Type.
Definition: ValueTypes.h:34
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: ValueTypes.h:120
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:233
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:349
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:415
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:455
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:397
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:238
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:101
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:438
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
ConstraintPrefix Type
Type - The basic type of the constraint: input/output/clobber/label.
Definition: InlineAsm.h:126
int MatchingInput
MatchingInput - If this is not -1, this is an output constraint where an input constraint is required...
Definition: InlineAsm.h:136
ConstraintCodeVector Codes
Code - The constraint code, either the register name (in braces) or the constraint letter/number.
Definition: InlineAsm.h:154
SubConstraintInfoVector multipleAlternatives
multipleAlternatives - If there are multiple alternative constraints, this array will contain them.
Definition: InlineAsm.h:161
bool isIndirect
isIndirect - True if this operand is an indirect operand.
Definition: InlineAsm.h:150
bool hasMatchingInput() const
hasMatchingInput - Return true if this is an output constraint that has a matching input constraint.
Definition: InlineAsm.h:140
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition: KnownBits.h:297
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition: KnownBits.h:182
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition: KnownBits.h:251
static KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
Definition: KnownBits.cpp:208
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:104
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:238
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:63
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:157
bool hasConflict() const
Returns true if there is conflicting information.
Definition: KnownBits.h:47
static std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
Definition: KnownBits.cpp:530
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition: KnownBits.h:285
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
Definition: KnownBits.h:229
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
static KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
Definition: KnownBits.cpp:184
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:168
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:71
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition: KnownBits.h:317
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:307
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:176
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:244
static KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
Definition: KnownBits.cpp:221
static std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
Definition: KnownBits.cpp:496
static std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
Definition: KnownBits.cpp:536
static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition: KnownBits.cpp:57
static std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
Definition: KnownBits.cpp:512
static std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
Definition: KnownBits.cpp:516
bool isNegative() const
Returns true if this value is known to be negative.
Definition: KnownBits.h:101
static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
Definition: KnownBits.cpp:765
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition: KnownBits.h:163
static std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
Definition: KnownBits.cpp:540
static std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
Definition: KnownBits.cpp:520
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition: KnownBits.h:282
static std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
Definition: KnownBits.cpp:506
static KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
Definition: KnownBits.cpp:202
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Magic data for optimising signed division by a constant.
static SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This contains information for each constraint that we are lowering.
MVT ConstraintVT
The ValueType for the operand value.
TargetLowering::ConstraintType ConstraintType
Information about the constraint code, e.g.
std::string ConstraintCode
This contains the actual string for the code, like "m".
Value * CallOperandVal
If this is the result output operand or a clobber, this is null, otherwise it is the incoming operand...
unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
CallLoweringInfo & setChain(SDValue InChain)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
MakeLibCallOptions & setSExt(bool Value=true)
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)
Magic data for optimising unsigned division by a constant.
static UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...