LLVM 23.0.0git
GISelValueTracking.cpp
Go to the documentation of this file.
1//===- lib/CodeGen/GlobalISel/GISelValueTracking.cpp --------------*- C++
2//*-===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9//
10/// Provides analysis for querying information about KnownBits during GISel
11/// passes.
12//
13//===----------------------------------------------------------------------===//
15#include "llvm/ADT/APFloat.h"
17#include "llvm/ADT/ScopeExit.h"
35#include "llvm/IR/FMF.h"
41
42#define DEBUG_TYPE "gisel-known-bits"
43
44using namespace llvm;
45using namespace MIPatternMatch;
46
48
50 "Analysis for ComputingKnownBits", false, true)
51
53 : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()),
54 DL(MF.getFunction().getDataLayout()), MaxDepth(MaxDepth) {}
55
57 const MachineInstr *MI = MRI.getVRegDef(R);
58 switch (MI->getOpcode()) {
59 case TargetOpcode::COPY:
60 return computeKnownAlignment(MI->getOperand(1).getReg(), Depth);
61 case TargetOpcode::G_ASSERT_ALIGN: {
62 // TODO: Min with source
63 return Align(MI->getOperand(2).getImm());
64 }
65 case TargetOpcode::G_FRAME_INDEX: {
66 int FrameIdx = MI->getOperand(1).getIndex();
67 return MF.getFrameInfo().getObjectAlign(FrameIdx);
68 }
69 case TargetOpcode::G_INTRINSIC:
70 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
71 case TargetOpcode::G_INTRINSIC_CONVERGENT:
72 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
73 default:
74 return TL.computeKnownAlignForTargetInstr(*this, R, MRI, Depth + 1);
75 }
76}
77
79 assert(MI.getNumExplicitDefs() == 1 &&
80 "expected single return generic instruction");
81 return getKnownBits(MI.getOperand(0).getReg());
82}
83
85 const LLT Ty = MRI.getType(R);
86 // Since the number of lanes in a scalable vector is unknown at compile time,
87 // we track one bit which is implicitly broadcast to all lanes. This means
88 // that all lanes in a scalable vector are considered demanded.
89 APInt DemandedElts =
90 Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
91 return getKnownBits(R, DemandedElts);
92}
93
95 const APInt &DemandedElts,
96 unsigned Depth) {
97 KnownBits Known;
98 computeKnownBitsImpl(R, Known, DemandedElts, Depth);
99 return Known;
100}
101
103 LLT Ty = MRI.getType(R);
104 unsigned BitWidth = Ty.getScalarSizeInBits();
106}
107
111
115
116[[maybe_unused]] static void
117dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) {
118 dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth
119 << "] Computed for: " << MI << "[" << Depth << "] Known: 0x"
120 << toString(Known.Zero | Known.One, 16, false) << "\n"
121 << "[" << Depth << "] Zero: 0x" << toString(Known.Zero, 16, false)
122 << "\n"
123 << "[" << Depth << "] One: 0x" << toString(Known.One, 16, false)
124 << "\n";
125}
126
127/// Compute known bits for the intersection of \p Src0 and \p Src1
128void GISelValueTracking::computeKnownBitsMin(Register Src0, Register Src1,
129 KnownBits &Known,
130 const APInt &DemandedElts,
131 unsigned Depth) {
132 // Test src1 first, since we canonicalize simpler expressions to the RHS.
133 computeKnownBitsImpl(Src1, Known, DemandedElts, Depth);
134
135 // If we don't know any bits, early out.
136 if (Known.isUnknown())
137 return;
138
139 KnownBits Known2;
140 computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth);
141
142 // Only known if known in both the LHS and RHS.
143 Known = Known.intersectWith(Known2);
144}
145
146// Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is
147// created using Width. Use this function when the inputs are KnownBits
148// objects. TODO: Move this KnownBits.h if this is usable in more cases.
149static KnownBits extractBits(unsigned BitWidth, const KnownBits &SrcOpKnown,
150 const KnownBits &OffsetKnown,
151 const KnownBits &WidthKnown) {
152 KnownBits Mask(BitWidth);
153 Mask.Zero = APInt::getBitsSetFrom(
155 Mask.One = APInt::getLowBitsSet(
157 return KnownBits::lshr(SrcOpKnown, OffsetKnown) & Mask;
158}
159
161 const APInt &DemandedElts,
162 unsigned Depth) {
163 MachineInstr &MI = *MRI.getVRegDef(R);
164 unsigned Opcode = MI.getOpcode();
165 LLT DstTy = MRI.getType(R);
166
167 // Handle the case where this is called on a register that does not have a
168 // type constraint. For example, it may be post-ISel or this target might not
169 // preserve the type when early-selecting instructions.
170 if (!DstTy.isValid()) {
171 Known = KnownBits();
172 return;
173 }
174
175#ifndef NDEBUG
176 if (DstTy.isFixedVector()) {
177 assert(
178 DstTy.getNumElements() == DemandedElts.getBitWidth() &&
179 "DemandedElt width should equal the fixed vector number of elements");
180 } else {
181 assert(DemandedElts.getBitWidth() == 1 && DemandedElts == APInt(1, 1) &&
182 "DemandedElt width should be 1 for scalars or scalable vectors");
183 }
184#endif
185
186 unsigned BitWidth = DstTy.getScalarSizeInBits();
187 Known = KnownBits(BitWidth); // Don't know anything
188
189 // Depth may get bigger than max depth if it gets passed to a different
190 // GISelValueTracking object.
191 // This may happen when say a generic part uses a GISelValueTracking object
192 // with some max depth, but then we hit TL.computeKnownBitsForTargetInstr
193 // which creates a new GISelValueTracking object with a different and smaller
194 // depth. If we just check for equality, we would never exit if the depth
195 // that is passed down to the target specific GISelValueTracking object is
196 // already bigger than its max depth.
197 if (Depth >= getMaxDepth())
198 return;
199
200 if (!DemandedElts)
201 return; // No demanded elts, better to assume we don't know anything.
202
203 KnownBits Known2;
204
205 switch (Opcode) {
206 default:
207 TL.computeKnownBitsForTargetInstr(*this, R, Known, DemandedElts, MRI,
208 Depth);
209 break;
210 case TargetOpcode::G_BUILD_VECTOR: {
211 // Collect the known bits that are shared by every demanded vector element.
212 Known.Zero.setAllBits();
213 Known.One.setAllBits();
214 for (const auto &[I, MO] : enumerate(drop_begin(MI.operands()))) {
215 if (!DemandedElts[I])
216 continue;
217
218 computeKnownBitsImpl(MO.getReg(), Known2, APInt(1, 1), Depth + 1);
219
220 // Known bits are the values that are shared by every demanded element.
221 Known = Known.intersectWith(Known2);
222
223 // If we don't know any bits, early out.
224 if (Known.isUnknown())
225 break;
226 }
227 break;
228 }
229 case TargetOpcode::G_SPLAT_VECTOR: {
230 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, APInt(1, 1),
231 Depth + 1);
232 // Implicitly truncate the bits to match the official semantics of
233 // G_SPLAT_VECTOR.
234 Known = Known.trunc(BitWidth);
235 break;
236 }
237 case TargetOpcode::COPY:
238 case TargetOpcode::G_PHI:
239 case TargetOpcode::PHI: {
242 // Destination registers should not have subregisters at this
243 // point of the pipeline, otherwise the main live-range will be
244 // defined more than once, which is against SSA.
245 assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?");
246 // PHI's operand are a mix of registers and basic blocks interleaved.
247 // We only care about the register ones.
248 for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
249 const MachineOperand &Src = MI.getOperand(Idx);
250 Register SrcReg = Src.getReg();
251 LLT SrcTy = MRI.getType(SrcReg);
252 // Look through trivial copies and phis but don't look through trivial
253 // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits
254 // analysis is currently unable to determine the bit width of a
255 // register class.
256 //
257 // We can't use NoSubRegister by name as it's defined by each target but
258 // it's always defined to be 0 by tablegen.
259 if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
260 SrcTy.isValid()) {
261 // In case we're forwarding from a vector register to a non-vector
262 // register we need to update the demanded elements to reflect this
263 // before recursing.
264 APInt NowDemandedElts = SrcTy.isFixedVector() && !DstTy.isFixedVector()
265 ? APInt::getAllOnes(SrcTy.getNumElements())
266 : DemandedElts; // Known to be APInt(1, 1)
267 // For COPYs we don't do anything, don't increase the depth.
268 computeKnownBitsImpl(SrcReg, Known2, NowDemandedElts,
269 Depth + (Opcode != TargetOpcode::COPY));
270 Known2 = Known2.anyextOrTrunc(BitWidth);
271 Known = Known.intersectWith(Known2);
272 // If we reach a point where we don't know anything
273 // just stop looking through the operands.
274 if (Known.isUnknown())
275 break;
276 } else {
277 // We know nothing.
278 Known = KnownBits(BitWidth);
279 break;
280 }
281 }
282 break;
283 }
284 case TargetOpcode::G_CONSTANT: {
285 Known = KnownBits::makeConstant(MI.getOperand(1).getCImm()->getValue());
286 break;
287 }
288 case TargetOpcode::G_FRAME_INDEX: {
289 int FrameIdx = MI.getOperand(1).getIndex();
290 TL.computeKnownBitsForFrameIndex(FrameIdx, Known, MF);
291 break;
292 }
293 case TargetOpcode::G_SUB: {
294 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
295 Depth + 1);
296 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
297 Depth + 1);
298 Known = KnownBits::sub(Known, Known2);
299 break;
300 }
301 case TargetOpcode::G_XOR: {
302 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
303 Depth + 1);
304 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
305 Depth + 1);
306
307 Known ^= Known2;
308 break;
309 }
310 case TargetOpcode::G_PTR_ADD: {
311 if (DstTy.isVector())
312 break;
313 // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets?
314 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
315 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
316 break;
317 [[fallthrough]];
318 }
319 case TargetOpcode::G_ADD: {
320 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
321 Depth + 1);
322 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
323 Depth + 1);
324 Known = KnownBits::add(Known, Known2);
325 break;
326 }
327 case TargetOpcode::G_AND: {
328 // If either the LHS or the RHS are Zero, the result is zero.
329 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
330 Depth + 1);
331 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
332 Depth + 1);
333
334 Known &= Known2;
335 break;
336 }
337 case TargetOpcode::G_OR: {
338 // If either the LHS or the RHS are Zero, the result is zero.
339 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
340 Depth + 1);
341 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
342 Depth + 1);
343
344 Known |= Known2;
345 break;
346 }
347 case TargetOpcode::G_MUL: {
348 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
349 Depth + 1);
350 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
351 Depth + 1);
352 Known = KnownBits::mul(Known, Known2);
353 break;
354 }
355 case TargetOpcode::G_UMULH: {
356 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
357 Depth + 1);
358 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
359 Depth + 1);
360 Known = KnownBits::mulhu(Known, Known2);
361 break;
362 }
363 case TargetOpcode::G_SMULH: {
364 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
365 Depth + 1);
366 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
367 Depth + 1);
368 Known = KnownBits::mulhs(Known, Known2);
369 break;
370 }
371 case TargetOpcode::G_SELECT: {
372 computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(),
373 Known, DemandedElts, Depth + 1);
374 break;
375 }
376 case TargetOpcode::G_SMIN: {
377 // TODO: Handle clamp pattern with number of sign bits
378 KnownBits KnownRHS;
379 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
380 Depth + 1);
381 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
382 Depth + 1);
383 Known = KnownBits::smin(Known, KnownRHS);
384 break;
385 }
386 case TargetOpcode::G_SMAX: {
387 // TODO: Handle clamp pattern with number of sign bits
388 KnownBits KnownRHS;
389 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
390 Depth + 1);
391 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
392 Depth + 1);
393 Known = KnownBits::smax(Known, KnownRHS);
394 break;
395 }
396 case TargetOpcode::G_UMIN: {
397 KnownBits KnownRHS;
398 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
399 Depth + 1);
400 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
401 Depth + 1);
402 Known = KnownBits::umin(Known, KnownRHS);
403 break;
404 }
405 case TargetOpcode::G_UMAX: {
406 KnownBits KnownRHS;
407 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
408 Depth + 1);
409 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
410 Depth + 1);
411 Known = KnownBits::umax(Known, KnownRHS);
412 break;
413 }
414 case TargetOpcode::G_FCMP:
415 case TargetOpcode::G_ICMP: {
416 if (DstTy.isVector())
417 break;
418 if (TL.getBooleanContents(DstTy.isVector(),
419 Opcode == TargetOpcode::G_FCMP) ==
421 BitWidth > 1)
422 Known.Zero.setBitsFrom(1);
423 break;
424 }
425 case TargetOpcode::G_SEXT: {
426 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
427 Depth + 1);
428 // If the sign bit is known to be zero or one, then sext will extend
429 // it to the top bits, else it will just zext.
430 Known = Known.sext(BitWidth);
431 break;
432 }
433 case TargetOpcode::G_ASSERT_SEXT:
434 case TargetOpcode::G_SEXT_INREG: {
435 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
436 Depth + 1);
437 Known = Known.sextInReg(MI.getOperand(2).getImm());
438 break;
439 }
440 case TargetOpcode::G_ANYEXT: {
441 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
442 Depth + 1);
443 Known = Known.anyext(BitWidth);
444 break;
445 }
446 case TargetOpcode::G_LOAD: {
447 const MachineMemOperand *MMO = *MI.memoperands_begin();
448 KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
449 if (const MDNode *Ranges = MMO->getRanges())
450 computeKnownBitsFromRangeMetadata(*Ranges, KnownRange);
451 Known = KnownRange.anyext(Known.getBitWidth());
452 break;
453 }
454 case TargetOpcode::G_SEXTLOAD:
455 case TargetOpcode::G_ZEXTLOAD: {
456 if (DstTy.isVector())
457 break;
458 const MachineMemOperand *MMO = *MI.memoperands_begin();
459 KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
460 if (const MDNode *Ranges = MMO->getRanges())
461 computeKnownBitsFromRangeMetadata(*Ranges, KnownRange);
462 Known = Opcode == TargetOpcode::G_SEXTLOAD
463 ? KnownRange.sext(Known.getBitWidth())
464 : KnownRange.zext(Known.getBitWidth());
465 break;
466 }
467 case TargetOpcode::G_ASHR: {
468 KnownBits LHSKnown, RHSKnown;
469 computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
470 Depth + 1);
471 computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
472 Depth + 1);
473 Known = KnownBits::ashr(LHSKnown, RHSKnown);
474 break;
475 }
476 case TargetOpcode::G_LSHR: {
477 KnownBits LHSKnown, RHSKnown;
478 computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
479 Depth + 1);
480 computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
481 Depth + 1);
482 Known = KnownBits::lshr(LHSKnown, RHSKnown);
483 break;
484 }
485 case TargetOpcode::G_SHL: {
486 KnownBits LHSKnown, RHSKnown;
487 computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
488 Depth + 1);
489 computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
490 Depth + 1);
491 Known = KnownBits::shl(LHSKnown, RHSKnown);
492 break;
493 }
494 case TargetOpcode::G_INTTOPTR:
495 case TargetOpcode::G_PTRTOINT:
496 if (DstTy.isVector())
497 break;
498 // Fall through and handle them the same as zext/trunc.
499 [[fallthrough]];
500 case TargetOpcode::G_ZEXT:
501 case TargetOpcode::G_TRUNC: {
502 Register SrcReg = MI.getOperand(1).getReg();
503 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
504 Known = Known.zextOrTrunc(BitWidth);
505 break;
506 }
507 case TargetOpcode::G_ASSERT_ZEXT: {
508 Register SrcReg = MI.getOperand(1).getReg();
509 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
510
511 unsigned SrcBitWidth = MI.getOperand(2).getImm();
512 assert(SrcBitWidth && "SrcBitWidth can't be zero");
513 APInt InMask = APInt::getLowBitsSet(BitWidth, SrcBitWidth);
514 Known.Zero |= (~InMask);
515 Known.One &= (~Known.Zero);
516 break;
517 }
518 case TargetOpcode::G_ASSERT_ALIGN: {
519 int64_t LogOfAlign = Log2_64(MI.getOperand(2).getImm());
520
521 // TODO: Should use maximum with source
522 // If a node is guaranteed to be aligned, set low zero bits accordingly as
523 // well as clearing one bits.
524 Known.Zero.setLowBits(LogOfAlign);
525 Known.One.clearLowBits(LogOfAlign);
526 break;
527 }
528 case TargetOpcode::G_MERGE_VALUES: {
529 unsigned NumOps = MI.getNumOperands();
530 unsigned OpSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
531
532 for (unsigned I = 0; I != NumOps - 1; ++I) {
533 KnownBits SrcOpKnown;
534 computeKnownBitsImpl(MI.getOperand(I + 1).getReg(), SrcOpKnown,
535 DemandedElts, Depth + 1);
536 Known.insertBits(SrcOpKnown, I * OpSize);
537 }
538 break;
539 }
540 case TargetOpcode::G_UNMERGE_VALUES: {
541 unsigned NumOps = MI.getNumOperands();
542 Register SrcReg = MI.getOperand(NumOps - 1).getReg();
543 LLT SrcTy = MRI.getType(SrcReg);
544
545 if (SrcTy.isVector() && SrcTy.getScalarType() != DstTy.getScalarType())
546 return; // TODO: Handle vector->subelement unmerges
547
548 // Figure out the result operand index
549 unsigned DstIdx = 0;
550 for (; DstIdx != NumOps - 1 && MI.getOperand(DstIdx).getReg() != R;
551 ++DstIdx)
552 ;
553
554 APInt SubDemandedElts = DemandedElts;
555 if (SrcTy.isVector()) {
556 unsigned DstLanes = DstTy.isVector() ? DstTy.getNumElements() : 1;
557 SubDemandedElts =
558 DemandedElts.zext(SrcTy.getNumElements()).shl(DstIdx * DstLanes);
559 }
560
561 KnownBits SrcOpKnown;
562 computeKnownBitsImpl(SrcReg, SrcOpKnown, SubDemandedElts, Depth + 1);
563
564 if (SrcTy.isVector())
565 Known = std::move(SrcOpKnown);
566 else
567 Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx);
568 break;
569 }
570 case TargetOpcode::G_BSWAP: {
571 Register SrcReg = MI.getOperand(1).getReg();
572 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
573 Known = Known.byteSwap();
574 break;
575 }
576 case TargetOpcode::G_BITREVERSE: {
577 Register SrcReg = MI.getOperand(1).getReg();
578 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
579 Known = Known.reverseBits();
580 break;
581 }
582 case TargetOpcode::G_CTPOP: {
583 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
584 Depth + 1);
585 // We can bound the space the count needs. Also, bits known to be zero
586 // can't contribute to the population.
587 unsigned BitsPossiblySet = Known2.countMaxPopulation();
588 unsigned LowBits = llvm::bit_width(BitsPossiblySet);
589 Known.Zero.setBitsFrom(LowBits);
590 // TODO: we could bound Known.One using the lower bound on the number of
591 // bits which might be set provided by popcnt KnownOne2.
592 break;
593 }
594 case TargetOpcode::G_UBFX: {
595 KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
596 computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
597 Depth + 1);
598 computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts,
599 Depth + 1);
600 computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts,
601 Depth + 1);
602 Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
603 break;
604 }
605 case TargetOpcode::G_SBFX: {
606 KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
607 computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
608 Depth + 1);
609 computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts,
610 Depth + 1);
611 computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts,
612 Depth + 1);
613 OffsetKnown = OffsetKnown.sext(BitWidth);
614 WidthKnown = WidthKnown.sext(BitWidth);
615 Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
616 // Sign extend the extracted value using shift left and arithmetic shift
617 // right.
619 KnownBits ShiftKnown = KnownBits::sub(ExtKnown, WidthKnown);
620 Known = KnownBits::ashr(KnownBits::shl(Known, ShiftKnown), ShiftKnown);
621 break;
622 }
623 case TargetOpcode::G_UADDO:
624 case TargetOpcode::G_UADDE:
625 case TargetOpcode::G_SADDO:
626 case TargetOpcode::G_SADDE: {
627 if (MI.getOperand(1).getReg() == R) {
628 // If we know the result of a compare has the top bits zero, use this
629 // info.
630 if (TL.getBooleanContents(DstTy.isVector(), false) ==
632 BitWidth > 1)
633 Known.Zero.setBitsFrom(1);
634 break;
635 }
636
637 assert(MI.getOperand(0).getReg() == R &&
638 "We only compute knownbits for the sum here.");
639 // With [US]ADDE, a carry bit may be added in.
640 KnownBits Carry(1);
641 if (Opcode == TargetOpcode::G_UADDE || Opcode == TargetOpcode::G_SADDE) {
642 computeKnownBitsImpl(MI.getOperand(4).getReg(), Carry, DemandedElts,
643 Depth + 1);
644 // Carry has bit width 1
645 Carry = Carry.trunc(1);
646 } else {
647 Carry.setAllZero();
648 }
649
650 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
651 Depth + 1);
652 computeKnownBitsImpl(MI.getOperand(3).getReg(), Known2, DemandedElts,
653 Depth + 1);
654 Known = KnownBits::computeForAddCarry(Known, Known2, Carry);
655 break;
656 }
657 case TargetOpcode::G_USUBO:
658 case TargetOpcode::G_USUBE:
659 case TargetOpcode::G_SSUBO:
660 case TargetOpcode::G_SSUBE:
661 case TargetOpcode::G_UMULO:
662 case TargetOpcode::G_SMULO: {
663 if (MI.getOperand(1).getReg() == R) {
664 // If we know the result of a compare has the top bits zero, use this
665 // info.
666 if (TL.getBooleanContents(DstTy.isVector(), false) ==
668 BitWidth > 1)
669 Known.Zero.setBitsFrom(1);
670 }
671 break;
672 }
673 case TargetOpcode::G_CTLZ:
674 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
675 KnownBits SrcOpKnown;
676 computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
677 Depth + 1);
678 // If we have a known 1, its position is our upper bound.
679 unsigned PossibleLZ = SrcOpKnown.countMaxLeadingZeros();
680 unsigned LowBits = llvm::bit_width(PossibleLZ);
681 Known.Zero.setBitsFrom(LowBits);
682 break;
683 }
684 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
686 Register InVec = Extract.getVectorReg();
687 Register EltNo = Extract.getIndexReg();
688
689 auto ConstEltNo = getIConstantVRegVal(EltNo, MRI);
690
691 LLT VecVT = MRI.getType(InVec);
692 // computeKnownBits not yet implemented for scalable vectors.
693 if (VecVT.isScalableVector())
694 break;
695
696 const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
697 const unsigned NumSrcElts = VecVT.getNumElements();
698 // A return type different from the vector's element type may lead to
699 // issues with pattern selection. Bail out to avoid that.
700 if (BitWidth > EltBitWidth)
701 break;
702
703 Known.Zero.setAllBits();
704 Known.One.setAllBits();
705
706 // If we know the element index, just demand that vector element, else for
707 // an unknown element index, ignore DemandedElts and demand them all.
708 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
709 if (ConstEltNo && ConstEltNo->ult(NumSrcElts))
710 DemandedSrcElts =
711 APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue());
712
713 computeKnownBitsImpl(InVec, Known, DemandedSrcElts, Depth + 1);
714 break;
715 }
716 case TargetOpcode::G_SHUFFLE_VECTOR: {
717 APInt DemandedLHS, DemandedRHS;
718 // Collect the known bits that are shared by every vector element referenced
719 // by the shuffle.
720 unsigned NumElts = MRI.getType(MI.getOperand(1).getReg()).getNumElements();
721 if (!getShuffleDemandedElts(NumElts, MI.getOperand(3).getShuffleMask(),
722 DemandedElts, DemandedLHS, DemandedRHS))
723 break;
724
725 // Known bits are the values that are shared by every demanded element.
726 Known.Zero.setAllBits();
727 Known.One.setAllBits();
728 if (!!DemandedLHS) {
729 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedLHS,
730 Depth + 1);
731 Known = Known.intersectWith(Known2);
732 }
733 // If we don't know any bits, early out.
734 if (Known.isUnknown())
735 break;
736 if (!!DemandedRHS) {
737 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedRHS,
738 Depth + 1);
739 Known = Known.intersectWith(Known2);
740 }
741 break;
742 }
743 case TargetOpcode::G_CONCAT_VECTORS: {
744 if (MRI.getType(MI.getOperand(0).getReg()).isScalableVector())
745 break;
746 // Split DemandedElts and test each of the demanded subvectors.
747 Known.Zero.setAllBits();
748 Known.One.setAllBits();
749 unsigned NumSubVectorElts =
750 MRI.getType(MI.getOperand(1).getReg()).getNumElements();
751
752 for (const auto &[I, MO] : enumerate(drop_begin(MI.operands()))) {
753 APInt DemandedSub =
754 DemandedElts.extractBits(NumSubVectorElts, I * NumSubVectorElts);
755 if (!!DemandedSub) {
756 computeKnownBitsImpl(MO.getReg(), Known2, DemandedSub, Depth + 1);
757
758 Known = Known.intersectWith(Known2);
759 }
760 // If we don't know any bits, early out.
761 if (Known.isUnknown())
762 break;
763 }
764 break;
765 }
766 case TargetOpcode::G_ABS: {
767 Register SrcReg = MI.getOperand(1).getReg();
768 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
769 Known = Known.abs();
770 Known.Zero.setHighBits(computeNumSignBits(SrcReg, DemandedElts, Depth + 1) -
771 1);
772 break;
773 }
774 }
775
776 LLVM_DEBUG(dumpResult(MI, Known, Depth));
777}
778
780 Ty = Ty.getScalarType();
782 return Mode.Output == DenormalMode::IEEE ||
784}
785
786void GISelValueTracking::computeKnownFPClass(Register R, KnownFPClass &Known,
787 FPClassTest InterestedClasses,
788 unsigned Depth) {
789 LLT Ty = MRI.getType(R);
790 APInt DemandedElts =
791 Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
792 computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth);
793}
794
795void GISelValueTracking::computeKnownFPClassForFPTrunc(
796 const MachineInstr &MI, const APInt &DemandedElts,
797 FPClassTest InterestedClasses, KnownFPClass &Known, unsigned Depth) {
798 if ((InterestedClasses & (KnownFPClass::OrderedLessThanZeroMask | fcNan)) ==
799 fcNone)
800 return;
801
802 Register Val = MI.getOperand(1).getReg();
803 KnownFPClass KnownSrc;
804 computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
805 Depth + 1);
806
807 // Sign should be preserved
808 // TODO: Handle cannot be ordered greater than zero
809 if (KnownSrc.cannotBeOrderedLessThanZero())
811
812 Known.propagateNaN(KnownSrc, true);
813
814 // Infinity needs a range check.
815}
816
817void GISelValueTracking::computeKnownFPClass(Register R,
818 const APInt &DemandedElts,
819 FPClassTest InterestedClasses,
820 KnownFPClass &Known,
821 unsigned Depth) {
822 assert(Known.isUnknown() && "should not be called with known information");
823
824 if (!DemandedElts) {
825 // No demanded elts, better to assume we don't know anything.
826 Known.resetAll();
827 return;
828 }
829
830 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
831
832 MachineInstr &MI = *MRI.getVRegDef(R);
833 unsigned Opcode = MI.getOpcode();
834 LLT DstTy = MRI.getType(R);
835
836 if (!DstTy.isValid()) {
837 Known.resetAll();
838 return;
839 }
840
841 if (auto Cst = GFConstant::getConstant(R, MRI)) {
842 switch (Cst->getKind()) {
844 auto APF = Cst->getScalarValue();
845 Known.KnownFPClasses = APF.classify();
846 Known.SignBit = APF.isNegative();
847 break;
848 }
850 Known.KnownFPClasses = fcNone;
851 bool SignBitAllZero = true;
852 bool SignBitAllOne = true;
853
854 for (auto C : *Cst) {
855 Known.KnownFPClasses |= C.classify();
856 if (C.isNegative())
857 SignBitAllZero = false;
858 else
859 SignBitAllOne = false;
860 }
861
862 if (SignBitAllOne != SignBitAllZero)
863 Known.SignBit = SignBitAllOne;
864
865 break;
866 }
868 Known.resetAll();
869 break;
870 }
871 }
872
873 return;
874 }
875
876 FPClassTest KnownNotFromFlags = fcNone;
878 KnownNotFromFlags |= fcNan;
880 KnownNotFromFlags |= fcInf;
881
882 // We no longer need to find out about these bits from inputs if we can
883 // assume this from flags/attributes.
884 InterestedClasses &= ~KnownNotFromFlags;
885
886 llvm::scope_exit ClearClassesFromFlags(
887 [=, &Known] { Known.knownNot(KnownNotFromFlags); });
888
889 // All recursive calls that increase depth must come after this.
891 return;
892
893 const MachineFunction *MF = MI.getMF();
894
895 switch (Opcode) {
896 default:
897 TL.computeKnownFPClassForTargetInstr(*this, R, Known, DemandedElts, MRI,
898 Depth);
899 break;
900 case TargetOpcode::G_FNEG: {
901 Register Val = MI.getOperand(1).getReg();
902 computeKnownFPClass(Val, DemandedElts, InterestedClasses, Known, Depth + 1);
903 Known.fneg();
904 break;
905 }
906 case TargetOpcode::G_SELECT: {
907 GSelect &SelMI = cast<GSelect>(MI);
908 Register Cond = SelMI.getCondReg();
909 Register LHS = SelMI.getTrueReg();
910 Register RHS = SelMI.getFalseReg();
911
912 FPClassTest FilterLHS = fcAllFlags;
913 FPClassTest FilterRHS = fcAllFlags;
914
915 Register TestedValue;
916 FPClassTest MaskIfTrue = fcAllFlags;
917 FPClassTest MaskIfFalse = fcAllFlags;
918 FPClassTest ClassVal = fcNone;
919
921 Register CmpLHS, CmpRHS;
922 if (mi_match(Cond, MRI,
923 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) {
924 // If the select filters out a value based on the class, it no longer
925 // participates in the class of the result
926
927 // TODO: In some degenerate cases we can infer something if we try again
928 // without looking through sign operations.
929 bool LookThroughFAbsFNeg = CmpLHS != LHS && CmpLHS != RHS;
930 std::tie(TestedValue, MaskIfTrue, MaskIfFalse) =
931 fcmpImpliesClass(Pred, *MF, CmpLHS, CmpRHS, LookThroughFAbsFNeg);
932 } else if (mi_match(
933 Cond, MRI,
934 m_GIsFPClass(m_Reg(TestedValue), m_FPClassTest(ClassVal)))) {
935 FPClassTest TestedMask = ClassVal;
936 MaskIfTrue = TestedMask;
937 MaskIfFalse = ~TestedMask;
938 }
939
940 if (TestedValue == LHS) {
941 // match !isnan(x) ? x : y
942 FilterLHS = MaskIfTrue;
943 } else if (TestedValue == RHS) { // && IsExactClass
944 // match !isnan(x) ? y : x
945 FilterRHS = MaskIfFalse;
946 }
947
948 KnownFPClass Known2;
949 computeKnownFPClass(LHS, DemandedElts, InterestedClasses & FilterLHS, Known,
950 Depth + 1);
951 Known.KnownFPClasses &= FilterLHS;
952
953 computeKnownFPClass(RHS, DemandedElts, InterestedClasses & FilterRHS,
954 Known2, Depth + 1);
955 Known2.KnownFPClasses &= FilterRHS;
956
957 Known |= Known2;
958 break;
959 }
960 case TargetOpcode::G_FCOPYSIGN: {
961 Register Magnitude = MI.getOperand(1).getReg();
962 Register Sign = MI.getOperand(2).getReg();
963
964 KnownFPClass KnownSign;
965
966 computeKnownFPClass(Magnitude, DemandedElts, InterestedClasses, Known,
967 Depth + 1);
968 computeKnownFPClass(Sign, DemandedElts, InterestedClasses, KnownSign,
969 Depth + 1);
970 Known.copysign(KnownSign);
971 break;
972 }
973 case TargetOpcode::G_FMA:
974 case TargetOpcode::G_STRICT_FMA:
975 case TargetOpcode::G_FMAD: {
976 if ((InterestedClasses & fcNegative) == fcNone)
977 break;
978
979 Register A = MI.getOperand(1).getReg();
980 Register B = MI.getOperand(2).getReg();
981 Register C = MI.getOperand(3).getReg();
982
983 if (A != B)
984 break;
985
986 // The multiply cannot be -0 and therefore the add can't be -0
987 Known.knownNot(fcNegZero);
988
989 // x * x + y is non-negative if y is non-negative.
990 KnownFPClass KnownAddend;
991 computeKnownFPClass(C, DemandedElts, InterestedClasses, KnownAddend,
992 Depth + 1);
993
994 if (KnownAddend.cannotBeOrderedLessThanZero())
995 Known.knownNot(fcNegative);
996 break;
997 }
998 case TargetOpcode::G_FSQRT:
999 case TargetOpcode::G_STRICT_FSQRT: {
1000 KnownFPClass KnownSrc;
1001 FPClassTest InterestedSrcs = InterestedClasses;
1002 if (InterestedClasses & fcNan)
1003 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
1004
1005 Register Val = MI.getOperand(1).getReg();
1006
1007 computeKnownFPClass(Val, DemandedElts, InterestedSrcs, KnownSrc, Depth + 1);
1008
1009 if (KnownSrc.isKnownNeverPosInfinity())
1010 Known.knownNot(fcPosInf);
1011 if (KnownSrc.isKnownNever(fcSNan))
1012 Known.knownNot(fcSNan);
1013
1014 // Any negative value besides -0 returns a nan.
1015 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
1016 Known.knownNot(fcNan);
1017
1018 // The only negative value that can be returned is -0 for -0 inputs.
1020 break;
1021 }
1022 case TargetOpcode::G_FABS: {
1023 if ((InterestedClasses & (fcNan | fcPositive)) != fcNone) {
1024 Register Val = MI.getOperand(1).getReg();
1025 // If we only care about the sign bit we don't need to inspect the
1026 // operand.
1027 computeKnownFPClass(Val, DemandedElts, InterestedClasses, Known,
1028 Depth + 1);
1029 }
1030 Known.fabs();
1031 break;
1032 }
1033 case TargetOpcode::G_FSIN:
1034 case TargetOpcode::G_FCOS:
1035 case TargetOpcode::G_FSINCOS: {
1036 // Return NaN on infinite inputs.
1037 Register Val = MI.getOperand(1).getReg();
1038 KnownFPClass KnownSrc;
1039
1040 computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
1041 Depth + 1);
1042 Known.knownNot(fcInf);
1043
1044 if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity())
1045 Known.knownNot(fcNan);
1046 break;
1047 }
1048 case TargetOpcode::G_FMAXNUM:
1049 case TargetOpcode::G_FMINNUM:
1050 case TargetOpcode::G_FMINNUM_IEEE:
1051 case TargetOpcode::G_FMAXIMUM:
1052 case TargetOpcode::G_FMINIMUM:
1053 case TargetOpcode::G_FMAXNUM_IEEE:
1054 case TargetOpcode::G_FMAXIMUMNUM:
1055 case TargetOpcode::G_FMINIMUMNUM: {
1056 Register LHS = MI.getOperand(1).getReg();
1057 Register RHS = MI.getOperand(2).getReg();
1058 KnownFPClass KnownLHS, KnownRHS;
1059
1060 computeKnownFPClass(LHS, DemandedElts, InterestedClasses, KnownLHS,
1061 Depth + 1);
1062 computeKnownFPClass(RHS, DemandedElts, InterestedClasses, KnownRHS,
1063 Depth + 1);
1064
1065 bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN();
1066 Known = KnownLHS | KnownRHS;
1067
1068 // If either operand is not NaN, the result is not NaN.
1069 if (NeverNaN && (Opcode == TargetOpcode::G_FMINNUM ||
1070 Opcode == TargetOpcode::G_FMAXNUM ||
1071 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1072 Opcode == TargetOpcode::G_FMAXIMUMNUM))
1073 Known.knownNot(fcNan);
1074
1075 if (Opcode == TargetOpcode::G_FMAXNUM ||
1076 Opcode == TargetOpcode::G_FMAXIMUMNUM ||
1077 Opcode == TargetOpcode::G_FMAXNUM_IEEE) {
1078 // If at least one operand is known to be positive, the result must be
1079 // positive.
1080 if ((KnownLHS.cannotBeOrderedLessThanZero() &&
1081 KnownLHS.isKnownNeverNaN()) ||
1082 (KnownRHS.cannotBeOrderedLessThanZero() &&
1083 KnownRHS.isKnownNeverNaN()))
1085 } else if (Opcode == TargetOpcode::G_FMAXIMUM) {
1086 // If at least one operand is known to be positive, the result must be
1087 // positive.
1088 if (KnownLHS.cannotBeOrderedLessThanZero() ||
1089 KnownRHS.cannotBeOrderedLessThanZero())
1091 } else if (Opcode == TargetOpcode::G_FMINNUM ||
1092 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1093 Opcode == TargetOpcode::G_FMINNUM_IEEE) {
1094 // If at least one operand is known to be negative, the result must be
1095 // negative.
1096 if ((KnownLHS.cannotBeOrderedGreaterThanZero() &&
1097 KnownLHS.isKnownNeverNaN()) ||
1098 (KnownRHS.cannotBeOrderedGreaterThanZero() &&
1099 KnownRHS.isKnownNeverNaN()))
1101 } else if (Opcode == TargetOpcode::G_FMINIMUM) {
1102 // If at least one operand is known to be negative, the result must be
1103 // negative.
1104 if (KnownLHS.cannotBeOrderedGreaterThanZero() ||
1107 } else {
1108 llvm_unreachable("unhandled intrinsic");
1109 }
1110
1111 // Fixup zero handling if denormals could be returned as a zero.
1112 //
1113 // As there's no spec for denormal flushing, be conservative with the
1114 // treatment of denormals that could be flushed to zero. For older
1115 // subtargets on AMDGPU the min/max instructions would not flush the
1116 // output and return the original value.
1117 //
1118 if ((Known.KnownFPClasses & fcZero) != fcNone &&
1119 !Known.isKnownNeverSubnormal()) {
1120 DenormalMode Mode =
1121 MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType()));
1122 if (Mode != DenormalMode::getIEEE())
1123 Known.KnownFPClasses |= fcZero;
1124 }
1125
1126 if (Known.isKnownNeverNaN()) {
1127 if (KnownLHS.SignBit && KnownRHS.SignBit &&
1128 *KnownLHS.SignBit == *KnownRHS.SignBit) {
1129 if (*KnownLHS.SignBit)
1130 Known.signBitMustBeOne();
1131 else
1132 Known.signBitMustBeZero();
1133 } else if ((Opcode == TargetOpcode::G_FMAXIMUM ||
1134 Opcode == TargetOpcode::G_FMINIMUM) ||
1135 Opcode == TargetOpcode::G_FMAXIMUMNUM ||
1136 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1137 Opcode == TargetOpcode::G_FMAXNUM_IEEE ||
1138 Opcode == TargetOpcode::G_FMINNUM_IEEE ||
1139 // FIXME: Should be using logical zero versions
1140 ((KnownLHS.isKnownNeverNegZero() ||
1141 KnownRHS.isKnownNeverPosZero()) &&
1142 (KnownLHS.isKnownNeverPosZero() ||
1143 KnownRHS.isKnownNeverNegZero()))) {
1144 if ((Opcode == TargetOpcode::G_FMAXIMUM ||
1145 Opcode == TargetOpcode::G_FMAXNUM ||
1146 Opcode == TargetOpcode::G_FMAXIMUMNUM ||
1147 Opcode == TargetOpcode::G_FMAXNUM_IEEE) &&
1148 (KnownLHS.SignBit == false || KnownRHS.SignBit == false))
1149 Known.signBitMustBeZero();
1150 else if ((Opcode == TargetOpcode::G_FMINIMUM ||
1151 Opcode == TargetOpcode::G_FMINNUM ||
1152 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1153 Opcode == TargetOpcode::G_FMINNUM_IEEE) &&
1154 (KnownLHS.SignBit == true || KnownRHS.SignBit == true))
1155 Known.signBitMustBeOne();
1156 }
1157 }
1158 break;
1159 }
1160 case TargetOpcode::G_FCANONICALIZE: {
1161 Register Val = MI.getOperand(1).getReg();
1162 KnownFPClass KnownSrc;
1163 computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
1164 Depth + 1);
1165
1166 // This is essentially a stronger form of
1167 // propagateCanonicalizingSrc. Other "canonicalizing" operations don't
1168 // actually have an IR canonicalization guarantee.
1169
1170 // Canonicalize may flush denormals to zero, so we have to consider the
1171 // denormal mode to preserve known-not-0 knowledge.
1172 Known.KnownFPClasses = KnownSrc.KnownFPClasses | fcZero | fcQNan;
1173
1174 // Stronger version of propagateNaN
1175 // Canonicalize is guaranteed to quiet signaling nans.
1176 if (KnownSrc.isKnownNeverNaN())
1177 Known.knownNot(fcNan);
1178 else
1179 Known.knownNot(fcSNan);
1180
1181 // If the parent function flushes denormals, the canonical output cannot
1182 // be a denormal.
1183 LLT Ty = MRI.getType(Val).getScalarType();
1184 const fltSemantics &FPType = getFltSemanticForLLT(Ty);
1185 DenormalMode DenormMode = MF->getDenormalMode(FPType);
1186 if (DenormMode == DenormalMode::getIEEE()) {
1187 if (KnownSrc.isKnownNever(fcPosZero))
1188 Known.knownNot(fcPosZero);
1189 if (KnownSrc.isKnownNever(fcNegZero))
1190 Known.knownNot(fcNegZero);
1191 break;
1192 }
1193
1194 if (DenormMode.inputsAreZero() || DenormMode.outputsAreZero())
1195 Known.knownNot(fcSubnormal);
1196
1197 if (DenormMode.Input == DenormalMode::PositiveZero ||
1198 (DenormMode.Output == DenormalMode::PositiveZero &&
1199 DenormMode.Input == DenormalMode::IEEE))
1200 Known.knownNot(fcNegZero);
1201
1202 break;
1203 }
1204 case TargetOpcode::G_VECREDUCE_FMAX:
1205 case TargetOpcode::G_VECREDUCE_FMIN:
1206 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
1207 case TargetOpcode::G_VECREDUCE_FMINIMUM: {
1208 Register Val = MI.getOperand(1).getReg();
1209 // reduce min/max will choose an element from one of the vector elements,
1210 // so we can infer and class information that is common to all elements.
1211
1212 Known =
1213 computeKnownFPClass(Val, MI.getFlags(), InterestedClasses, Depth + 1);
1214 // Can only propagate sign if output is never NaN.
1215 if (!Known.isKnownNeverNaN())
1216 Known.SignBit.reset();
1217 break;
1218 }
1219 case TargetOpcode::G_TRUNC:
1220 case TargetOpcode::G_FFLOOR:
1221 case TargetOpcode::G_FCEIL:
1222 case TargetOpcode::G_FRINT:
1223 case TargetOpcode::G_FNEARBYINT:
1224 case TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND:
1225 case TargetOpcode::G_INTRINSIC_ROUND: {
1226 Register Val = MI.getOperand(1).getReg();
1227 KnownFPClass KnownSrc;
1228 FPClassTest InterestedSrcs = InterestedClasses;
1229 if (InterestedSrcs & fcPosFinite)
1230 InterestedSrcs |= fcPosFinite;
1231 if (InterestedSrcs & fcNegFinite)
1232 InterestedSrcs |= fcNegFinite;
1233 computeKnownFPClass(Val, DemandedElts, InterestedSrcs, KnownSrc, Depth + 1);
1234
1235 // Integer results cannot be subnormal.
1236 Known.knownNot(fcSubnormal);
1237
1238 Known.propagateNaN(KnownSrc, true);
1239
1240 // TODO: handle multi unit FPTypes once LLT FPInfo lands
1241
1242 // Negative round ups to 0 produce -0
1243 if (KnownSrc.isKnownNever(fcPosFinite))
1244 Known.knownNot(fcPosFinite);
1245 if (KnownSrc.isKnownNever(fcNegFinite))
1246 Known.knownNot(fcNegFinite);
1247
1248 break;
1249 }
1250 case TargetOpcode::G_FEXP:
1251 case TargetOpcode::G_FEXP2:
1252 case TargetOpcode::G_FEXP10: {
1253 Known.knownNot(fcNegative);
1254 if ((InterestedClasses & fcNan) == fcNone)
1255 break;
1256
1257 Register Val = MI.getOperand(1).getReg();
1258 KnownFPClass KnownSrc;
1259 computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
1260 Depth + 1);
1261 if (KnownSrc.isKnownNeverNaN()) {
1262 Known.knownNot(fcNan);
1263 Known.signBitMustBeZero();
1264 }
1265
1266 break;
1267 }
1268 case TargetOpcode::G_FLOG:
1269 case TargetOpcode::G_FLOG2:
1270 case TargetOpcode::G_FLOG10: {
1271 // log(+inf) -> +inf
1272 // log([+-]0.0) -> -inf
1273 // log(-inf) -> nan
1274 // log(-x) -> nan
1275 if ((InterestedClasses & (fcNan | fcInf)) == fcNone)
1276 break;
1277
1278 FPClassTest InterestedSrcs = InterestedClasses;
1279 if ((InterestedClasses & fcNegInf) != fcNone)
1280 InterestedSrcs |= fcZero | fcSubnormal;
1281 if ((InterestedClasses & fcNan) != fcNone)
1282 InterestedSrcs |= fcNan | (fcNegative & ~fcNan);
1283
1284 Register Val = MI.getOperand(1).getReg();
1285 KnownFPClass KnownSrc;
1286 computeKnownFPClass(Val, DemandedElts, InterestedSrcs, KnownSrc, Depth + 1);
1287
1288 if (KnownSrc.isKnownNeverPosInfinity())
1289 Known.knownNot(fcPosInf);
1290
1291 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
1292 Known.knownNot(fcNan);
1293
1294 LLT Ty = MRI.getType(Val).getScalarType();
1295 const fltSemantics &FltSem = getFltSemanticForLLT(Ty);
1296 DenormalMode Mode = MF->getDenormalMode(FltSem);
1297
1298 if (KnownSrc.isKnownNeverLogicalZero(Mode))
1299 Known.knownNot(fcNegInf);
1300
1301 break;
1302 }
1303 case TargetOpcode::G_FPOWI: {
1304 if ((InterestedClasses & fcNegative) == fcNone)
1305 break;
1306
1307 Register Exp = MI.getOperand(2).getReg();
1308 LLT ExpTy = MRI.getType(Exp);
1309 KnownBits ExponentKnownBits = getKnownBits(
1310 Exp, ExpTy.isVector() ? DemandedElts : APInt(1, 1), Depth + 1);
1311
1312 if (ExponentKnownBits.Zero[0]) { // Is even
1313 Known.knownNot(fcNegative);
1314 break;
1315 }
1316
1317 // Given that exp is an integer, here are the
1318 // ways that pow can return a negative value:
1319 //
1320 // pow(-x, exp) --> negative if exp is odd and x is negative.
1321 // pow(-0, exp) --> -inf if exp is negative odd.
1322 // pow(-0, exp) --> -0 if exp is positive odd.
1323 // pow(-inf, exp) --> -0 if exp is negative odd.
1324 // pow(-inf, exp) --> -inf if exp is positive odd.
1325 Register Val = MI.getOperand(1).getReg();
1326 KnownFPClass KnownSrc;
1327 computeKnownFPClass(Val, DemandedElts, fcNegative, KnownSrc, Depth + 1);
1328 if (KnownSrc.isKnownNever(fcNegative))
1329 Known.knownNot(fcNegative);
1330 break;
1331 }
1332 case TargetOpcode::G_FLDEXP:
1333 case TargetOpcode::G_STRICT_FLDEXP: {
1334 Register Val = MI.getOperand(1).getReg();
1335 KnownFPClass KnownSrc;
1336 computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
1337 Depth + 1);
1338 Known.propagateNaN(KnownSrc, /*PropagateSign=*/true);
1339
1340 // Sign is preserved, but underflows may produce zeroes.
1341 if (KnownSrc.isKnownNever(fcNegative))
1342 Known.knownNot(fcNegative);
1343 else if (KnownSrc.cannotBeOrderedLessThanZero())
1345
1346 if (KnownSrc.isKnownNever(fcPositive))
1347 Known.knownNot(fcPositive);
1348 else if (KnownSrc.cannotBeOrderedGreaterThanZero())
1350
1351 // Can refine inf/zero handling based on the exponent operand.
1352 const FPClassTest ExpInfoMask = fcZero | fcSubnormal | fcInf;
1353 if ((InterestedClasses & ExpInfoMask) == fcNone)
1354 break;
1355 if ((KnownSrc.KnownFPClasses & ExpInfoMask) == fcNone)
1356 break;
1357
1358 // TODO: Handle constant range of Exp
1359
1360 break;
1361 }
1362 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1363 computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known,
1364 Depth);
1365 break;
1366 }
1367 case TargetOpcode::G_FADD:
1368 case TargetOpcode::G_STRICT_FADD:
1369 case TargetOpcode::G_FSUB:
1370 case TargetOpcode::G_STRICT_FSUB: {
1371 Register LHS = MI.getOperand(1).getReg();
1372 Register RHS = MI.getOperand(2).getReg();
1373 KnownFPClass KnownLHS, KnownRHS;
1374 bool WantNegative =
1375 (Opcode == TargetOpcode::G_FADD ||
1376 Opcode == TargetOpcode::G_STRICT_FADD) &&
1377 (InterestedClasses & KnownFPClass::OrderedLessThanZeroMask) != fcNone;
1378 bool WantNaN = (InterestedClasses & fcNan) != fcNone;
1379 bool WantNegZero = (InterestedClasses & fcNegZero) != fcNone;
1380
1381 if (!WantNaN && !WantNegative && !WantNegZero)
1382 break;
1383
1384 FPClassTest InterestedSrcs = InterestedClasses;
1385 if (WantNegative)
1386 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
1387 if (InterestedClasses & fcNan)
1388 InterestedSrcs |= fcInf;
1389 computeKnownFPClass(RHS, DemandedElts, InterestedSrcs, KnownRHS, Depth + 1);
1390
1391 if ((WantNaN && KnownRHS.isKnownNeverNaN()) ||
1392 (WantNegative && KnownRHS.cannotBeOrderedLessThanZero()) ||
1393 WantNegZero ||
1394 (Opcode == TargetOpcode::G_FSUB ||
1395 Opcode == TargetOpcode::G_STRICT_FSUB)) {
1396
1397 // RHS is canonically cheaper to compute. Skip inspecting the LHS if
1398 // there's no point.
1399 computeKnownFPClass(LHS, DemandedElts, InterestedSrcs, KnownLHS,
1400 Depth + 1);
1401 // Adding positive and negative infinity produces NaN.
1402 // TODO: Check sign of infinities.
1403 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
1404 (KnownLHS.isKnownNeverInfinity() || KnownRHS.isKnownNeverInfinity()))
1405 Known.knownNot(fcNan);
1406
1407 if (Opcode == TargetOpcode::G_FADD ||
1408 Opcode == TargetOpcode::G_STRICT_FADD) {
1409 if (KnownLHS.cannotBeOrderedLessThanZero() &&
1410 KnownRHS.cannotBeOrderedLessThanZero())
1412
1413 // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0.
1414 if ((KnownLHS.isKnownNeverLogicalNegZero(MF->getDenormalMode(
1416 KnownRHS.isKnownNeverLogicalNegZero(MF->getDenormalMode(
1417 getFltSemanticForLLT(DstTy.getScalarType())))) &&
1418 // Make sure output negative denormal can't flush to -0
1420 Known.knownNot(fcNegZero);
1421 } else {
1422 // Only fsub -0, +0 can return -0
1423 if ((KnownLHS.isKnownNeverLogicalNegZero(MF->getDenormalMode(
1425 KnownRHS.isKnownNeverLogicalPosZero(MF->getDenormalMode(
1426 getFltSemanticForLLT(DstTy.getScalarType())))) &&
1427 // Make sure output negative denormal can't flush to -0
1429 Known.knownNot(fcNegZero);
1430 }
1431 }
1432
1433 break;
1434 }
1435 case TargetOpcode::G_FMUL:
1436 case TargetOpcode::G_STRICT_FMUL: {
1437 Register LHS = MI.getOperand(1).getReg();
1438 Register RHS = MI.getOperand(2).getReg();
1439 // X * X is always non-negative or a NaN.
1440 if (LHS == RHS)
1441 Known.knownNot(fcNegative);
1442
1443 if ((InterestedClasses & fcNan) != fcNan)
1444 break;
1445
1446 // fcSubnormal is only needed in case of DAZ.
1447 const FPClassTest NeedForNan = fcNan | fcInf | fcZero | fcSubnormal;
1448
1449 KnownFPClass KnownLHS, KnownRHS;
1450 computeKnownFPClass(RHS, DemandedElts, NeedForNan, KnownRHS, Depth + 1);
1451 if (!KnownRHS.isKnownNeverNaN())
1452 break;
1453
1454 computeKnownFPClass(LHS, DemandedElts, NeedForNan, KnownLHS, Depth + 1);
1455 if (!KnownLHS.isKnownNeverNaN())
1456 break;
1457
1458 if (KnownLHS.SignBit && KnownRHS.SignBit) {
1459 if (*KnownLHS.SignBit == *KnownRHS.SignBit)
1460 Known.signBitMustBeZero();
1461 else
1462 Known.signBitMustBeOne();
1463 }
1464
1465 // If 0 * +/-inf produces NaN.
1466 if (KnownLHS.isKnownNeverInfinity() && KnownRHS.isKnownNeverInfinity()) {
1467 Known.knownNot(fcNan);
1468 break;
1469 }
1470
1471 if ((KnownRHS.isKnownNeverInfinity() ||
1472 KnownLHS.isKnownNeverLogicalZero(MF->getDenormalMode(
1473 getFltSemanticForLLT(DstTy.getScalarType())))) &&
1474 (KnownLHS.isKnownNeverInfinity() ||
1475 KnownRHS.isKnownNeverLogicalZero(
1476 MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType())))))
1477 Known.knownNot(fcNan);
1478
1479 break;
1480 }
1481 case TargetOpcode::G_FDIV:
1482 case TargetOpcode::G_FREM: {
1483 Register LHS = MI.getOperand(1).getReg();
1484 Register RHS = MI.getOperand(2).getReg();
1485
1486 if (LHS == RHS) {
1487 // TODO: Could filter out snan if we inspect the operand
1488 if (Opcode == TargetOpcode::G_FDIV) {
1489 // X / X is always exactly 1.0 or a NaN.
1491 } else {
1492 // X % X is always exactly [+-]0.0 or a NaN.
1493 Known.KnownFPClasses = fcNan | fcZero;
1494 }
1495
1496 break;
1497 }
1498
1499 const bool WantNan = (InterestedClasses & fcNan) != fcNone;
1500 const bool WantNegative = (InterestedClasses & fcNegative) != fcNone;
1501 const bool WantPositive = Opcode == TargetOpcode::G_FREM &&
1502 (InterestedClasses & fcPositive) != fcNone;
1503 if (!WantNan && !WantNegative && !WantPositive)
1504 break;
1505
1506 KnownFPClass KnownLHS, KnownRHS;
1507
1508 computeKnownFPClass(RHS, DemandedElts, fcNan | fcInf | fcZero | fcNegative,
1509 KnownRHS, Depth + 1);
1510
1511 bool KnowSomethingUseful =
1512 KnownRHS.isKnownNeverNaN() || KnownRHS.isKnownNever(fcNegative);
1513
1514 if (KnowSomethingUseful || WantPositive) {
1515 const FPClassTest InterestedLHS =
1516 WantPositive ? fcAllFlags
1518
1519 computeKnownFPClass(LHS, DemandedElts, InterestedClasses & InterestedLHS,
1520 KnownLHS, Depth + 1);
1521 }
1522
1523 if (Opcode == TargetOpcode::G_FDIV) {
1524 // Only 0/0, Inf/Inf produce NaN.
1525 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
1526 (KnownLHS.isKnownNeverInfinity() ||
1527 KnownRHS.isKnownNeverInfinity()) &&
1528 ((KnownLHS.isKnownNeverLogicalZero(MF->getDenormalMode(
1529 getFltSemanticForLLT(DstTy.getScalarType())))) ||
1530 (KnownRHS.isKnownNeverLogicalZero(MF->getDenormalMode(
1531 getFltSemanticForLLT(DstTy.getScalarType())))))) {
1532 Known.knownNot(fcNan);
1533 }
1534
1535 // X / -0.0 is -Inf (or NaN).
1536 // +X / +X is +X
1537 if (KnownLHS.isKnownNever(fcNegative) &&
1538 KnownRHS.isKnownNever(fcNegative))
1539 Known.knownNot(fcNegative);
1540 } else {
1541 // Inf REM x and x REM 0 produce NaN.
1542 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
1543 KnownLHS.isKnownNeverInfinity() &&
1544 KnownRHS.isKnownNeverLogicalZero(MF->getDenormalMode(
1546 Known.knownNot(fcNan);
1547 }
1548
1549 // The sign for frem is the same as the first operand.
1550 if (KnownLHS.cannotBeOrderedLessThanZero())
1552 if (KnownLHS.cannotBeOrderedGreaterThanZero())
1554
1555 // See if we can be more aggressive about the sign of 0.
1556 if (KnownLHS.isKnownNever(fcNegative))
1557 Known.knownNot(fcNegative);
1558 if (KnownLHS.isKnownNever(fcPositive))
1559 Known.knownNot(fcPositive);
1560 }
1561
1562 break;
1563 }
1564 case TargetOpcode::G_FPEXT: {
1565 Register Dst = MI.getOperand(0).getReg();
1566 Register Src = MI.getOperand(1).getReg();
1567 // Infinity, nan and zero propagate from source.
1568 computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth + 1);
1569
1570 LLT DstTy = MRI.getType(Dst).getScalarType();
1571 const fltSemantics &DstSem = getFltSemanticForLLT(DstTy);
1572 LLT SrcTy = MRI.getType(Src).getScalarType();
1573 const fltSemantics &SrcSem = getFltSemanticForLLT(SrcTy);
1574
1575 // All subnormal inputs should be in the normal range in the result type.
1576 if (APFloat::isRepresentableAsNormalIn(SrcSem, DstSem)) {
1577 if (Known.KnownFPClasses & fcPosSubnormal)
1578 Known.KnownFPClasses |= fcPosNormal;
1579 if (Known.KnownFPClasses & fcNegSubnormal)
1580 Known.KnownFPClasses |= fcNegNormal;
1581 Known.knownNot(fcSubnormal);
1582 }
1583
1584 // Sign bit of a nan isn't guaranteed.
1585 if (!Known.isKnownNeverNaN())
1586 Known.SignBit = std::nullopt;
1587 break;
1588 }
1589 case TargetOpcode::G_FPTRUNC: {
1590 computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known,
1591 Depth);
1592 break;
1593 }
1594 case TargetOpcode::G_SITOFP:
1595 case TargetOpcode::G_UITOFP: {
1596 // Cannot produce nan
1597 Known.knownNot(fcNan);
1598
1599 // Integers cannot be subnormal
1600 Known.knownNot(fcSubnormal);
1601
1602 // sitofp and uitofp turn into +0.0 for zero.
1603 Known.knownNot(fcNegZero);
1604 if (Opcode == TargetOpcode::G_UITOFP)
1605 Known.signBitMustBeZero();
1606
1607 Register Val = MI.getOperand(1).getReg();
1608 LLT Ty = MRI.getType(Val);
1609
1610 if (InterestedClasses & fcInf) {
1611 // Get width of largest magnitude integer (remove a bit if signed).
1612 // This still works for a signed minimum value because the largest FP
1613 // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx).;
1614 int IntSize = Ty.getScalarSizeInBits();
1615 if (Opcode == TargetOpcode::G_SITOFP)
1616 --IntSize;
1617
1618 // If the exponent of the largest finite FP value can hold the largest
1619 // integer, the result of the cast must be finite.
1620 LLT FPTy = DstTy.getScalarType();
1621 const fltSemantics &FltSem = getFltSemanticForLLT(FPTy);
1622 if (ilogb(APFloat::getLargest(FltSem)) >= IntSize)
1623 Known.knownNot(fcInf);
1624 }
1625
1626 break;
1627 }
1628 // case TargetOpcode::G_MERGE_VALUES:
1629 case TargetOpcode::G_BUILD_VECTOR:
1630 case TargetOpcode::G_CONCAT_VECTORS: {
1631 GMergeLikeInstr &Merge = cast<GMergeLikeInstr>(MI);
1632
1633 if (!DstTy.isFixedVector())
1634 break;
1635
1636 bool First = true;
1637 for (unsigned Idx = 0; Idx < Merge.getNumSources(); ++Idx) {
1638 // We know the index we are inserting to, so clear it from Vec check.
1639 bool NeedsElt = DemandedElts[Idx];
1640
1641 // Do we demand the inserted element?
1642 if (NeedsElt) {
1643 Register Src = Merge.getSourceReg(Idx);
1644 if (First) {
1645 computeKnownFPClass(Src, Known, InterestedClasses, Depth + 1);
1646 First = false;
1647 } else {
1648 KnownFPClass Known2;
1649 computeKnownFPClass(Src, Known2, InterestedClasses, Depth + 1);
1650 Known |= Known2;
1651 }
1652
1653 // If we don't know any bits, early out.
1654 if (Known.isUnknown())
1655 break;
1656 }
1657 }
1658
1659 break;
1660 }
1661 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1662 // Look through extract element. If the index is non-constant or
1663 // out-of-range demand all elements, otherwise just the extracted
1664 // element.
1665 GExtractVectorElement &Extract = cast<GExtractVectorElement>(MI);
1666 Register Vec = Extract.getVectorReg();
1667 Register Idx = Extract.getIndexReg();
1668
1669 auto CIdx = getIConstantVRegVal(Idx, MRI);
1670
1671 LLT VecTy = MRI.getType(Vec);
1672
1673 if (VecTy.isFixedVector()) {
1674 unsigned NumElts = VecTy.getNumElements();
1675 APInt DemandedVecElts = APInt::getAllOnes(NumElts);
1676 if (CIdx && CIdx->ult(NumElts))
1677 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
1678 return computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known,
1679 Depth + 1);
1680 }
1681
1682 break;
1683 }
1684 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1685 GInsertVectorElement &Insert = cast<GInsertVectorElement>(MI);
1686 Register Vec = Insert.getVectorReg();
1687 Register Elt = Insert.getElementReg();
1688 Register Idx = Insert.getIndexReg();
1689
1690 LLT VecTy = MRI.getType(Vec);
1691
1692 if (VecTy.isScalableVector())
1693 return;
1694
1695 auto CIdx = getIConstantVRegVal(Idx, MRI);
1696
1697 unsigned NumElts = DemandedElts.getBitWidth();
1698 APInt DemandedVecElts = DemandedElts;
1699 bool NeedsElt = true;
1700 // If we know the index we are inserting to, clear it from Vec check.
1701 if (CIdx && CIdx->ult(NumElts)) {
1702 DemandedVecElts.clearBit(CIdx->getZExtValue());
1703 NeedsElt = DemandedElts[CIdx->getZExtValue()];
1704 }
1705
1706 // Do we demand the inserted element?
1707 if (NeedsElt) {
1708 computeKnownFPClass(Elt, Known, InterestedClasses, Depth + 1);
1709 // If we don't know any bits, early out.
1710 if (Known.isUnknown())
1711 break;
1712 } else {
1713 Known.KnownFPClasses = fcNone;
1714 }
1715
1716 // Do we need anymore elements from Vec?
1717 if (!DemandedVecElts.isZero()) {
1718 KnownFPClass Known2;
1719 computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known2,
1720 Depth + 1);
1721 Known |= Known2;
1722 }
1723
1724 break;
1725 }
1726 case TargetOpcode::G_SHUFFLE_VECTOR: {
1727 // For undef elements, we don't know anything about the common state of
1728 // the shuffle result.
1729 GShuffleVector &Shuf = cast<GShuffleVector>(MI);
1730 APInt DemandedLHS, DemandedRHS;
1731 if (DstTy.isScalableVector()) {
1732 assert(DemandedElts == APInt(1, 1));
1733 DemandedLHS = DemandedRHS = DemandedElts;
1734 } else {
1736 DemandedElts, DemandedLHS,
1737 DemandedRHS)) {
1738 Known.resetAll();
1739 return;
1740 }
1741 }
1742
1743 if (!!DemandedLHS) {
1744 Register LHS = Shuf.getSrc1Reg();
1745 computeKnownFPClass(LHS, DemandedLHS, InterestedClasses, Known,
1746 Depth + 1);
1747
1748 // If we don't know any bits, early out.
1749 if (Known.isUnknown())
1750 break;
1751 } else {
1752 Known.KnownFPClasses = fcNone;
1753 }
1754
1755 if (!!DemandedRHS) {
1756 KnownFPClass Known2;
1757 Register RHS = Shuf.getSrc2Reg();
1758 computeKnownFPClass(RHS, DemandedRHS, InterestedClasses, Known2,
1759 Depth + 1);
1760 Known |= Known2;
1761 }
1762 break;
1763 }
1764 case TargetOpcode::COPY: {
1765 Register Src = MI.getOperand(1).getReg();
1766
1767 if (!Src.isVirtual())
1768 return;
1769
1770 computeKnownFPClass(Src, DemandedElts, InterestedClasses, Known, Depth + 1);
1771 break;
1772 }
1773 }
1774}
1775
1777GISelValueTracking::computeKnownFPClass(Register R, const APInt &DemandedElts,
1778 FPClassTest InterestedClasses,
1779 unsigned Depth) {
1780 KnownFPClass KnownClasses;
1781 computeKnownFPClass(R, DemandedElts, InterestedClasses, KnownClasses, Depth);
1782 return KnownClasses;
1783}
1784
1785KnownFPClass GISelValueTracking::computeKnownFPClass(
1786 Register R, FPClassTest InterestedClasses, unsigned Depth) {
1787 KnownFPClass Known;
1788 computeKnownFPClass(R, Known, InterestedClasses, Depth);
1789 return Known;
1790}
1791
1792KnownFPClass GISelValueTracking::computeKnownFPClass(
1793 Register R, const APInt &DemandedElts, uint32_t Flags,
1794 FPClassTest InterestedClasses, unsigned Depth) {
1796 InterestedClasses &= ~fcNan;
1798 InterestedClasses &= ~fcInf;
1799
1800 KnownFPClass Result =
1801 computeKnownFPClass(R, DemandedElts, InterestedClasses, Depth);
1802
1804 Result.KnownFPClasses &= ~fcNan;
1806 Result.KnownFPClasses &= ~fcInf;
1807 return Result;
1808}
1809
1810KnownFPClass GISelValueTracking::computeKnownFPClass(
1811 Register R, uint32_t Flags, FPClassTest InterestedClasses, unsigned Depth) {
1812 LLT Ty = MRI.getType(R);
1813 APInt DemandedElts =
1814 Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
1815 return computeKnownFPClass(R, DemandedElts, Flags, InterestedClasses, Depth);
1816}
1817
1818/// Compute number of sign bits for the intersection of \p Src0 and \p Src1
1819unsigned GISelValueTracking::computeNumSignBitsMin(Register Src0, Register Src1,
1820 const APInt &DemandedElts,
1821 unsigned Depth) {
1822 // Test src1 first, since we canonicalize simpler expressions to the RHS.
1823 unsigned Src1SignBits = computeNumSignBits(Src1, DemandedElts, Depth);
1824 if (Src1SignBits == 1)
1825 return 1;
1826 return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits);
1827}
1828
1829/// Compute the known number of sign bits with attached range metadata in the
1830/// memory operand. If this is an extending load, accounts for the behavior of
1831/// the high bits.
1833 unsigned TyBits) {
1834 const MDNode *Ranges = Ld->getRanges();
1835 if (!Ranges)
1836 return 1;
1837
1839 if (TyBits > CR.getBitWidth()) {
1840 switch (Ld->getOpcode()) {
1841 case TargetOpcode::G_SEXTLOAD:
1842 CR = CR.signExtend(TyBits);
1843 break;
1844 case TargetOpcode::G_ZEXTLOAD:
1845 CR = CR.zeroExtend(TyBits);
1846 break;
1847 default:
1848 break;
1849 }
1850 }
1851
1852 return std::min(CR.getSignedMin().getNumSignBits(),
1854}
1855
1857 const APInt &DemandedElts,
1858 unsigned Depth) {
1859 MachineInstr &MI = *MRI.getVRegDef(R);
1860 unsigned Opcode = MI.getOpcode();
1861
1862 if (Opcode == TargetOpcode::G_CONSTANT)
1863 return MI.getOperand(1).getCImm()->getValue().getNumSignBits();
1864
1865 if (Depth == getMaxDepth())
1866 return 1;
1867
1868 if (!DemandedElts)
1869 return 1; // No demanded elts, better to assume we don't know anything.
1870
1871 LLT DstTy = MRI.getType(R);
1872 const unsigned TyBits = DstTy.getScalarSizeInBits();
1873
1874 // Handle the case where this is called on a register that does not have a
1875 // type constraint. This is unlikely to occur except by looking through copies
1876 // but it is possible for the initial register being queried to be in this
1877 // state.
1878 if (!DstTy.isValid())
1879 return 1;
1880
1881 unsigned FirstAnswer = 1;
1882 switch (Opcode) {
1883 case TargetOpcode::COPY: {
1884 MachineOperand &Src = MI.getOperand(1);
1885 if (Src.getReg().isVirtual() && Src.getSubReg() == 0 &&
1886 MRI.getType(Src.getReg()).isValid()) {
1887 // Don't increment Depth for this one since we didn't do any work.
1888 return computeNumSignBits(Src.getReg(), DemandedElts, Depth);
1889 }
1890
1891 return 1;
1892 }
1893 case TargetOpcode::G_SEXT: {
1894 Register Src = MI.getOperand(1).getReg();
1895 LLT SrcTy = MRI.getType(Src);
1896 unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
1897 return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp;
1898 }
1899 case TargetOpcode::G_ASSERT_SEXT:
1900 case TargetOpcode::G_SEXT_INREG: {
1901 // Max of the input and what this extends.
1902 Register Src = MI.getOperand(1).getReg();
1903 unsigned SrcBits = MI.getOperand(2).getImm();
1904 unsigned InRegBits = TyBits - SrcBits + 1;
1905 return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1),
1906 InRegBits);
1907 }
1908 case TargetOpcode::G_LOAD: {
1909 GLoad *Ld = cast<GLoad>(&MI);
1910 if (DemandedElts != 1 || !getDataLayout().isLittleEndian())
1911 break;
1912
1913 return computeNumSignBitsFromRangeMetadata(Ld, TyBits);
1914 }
1915 case TargetOpcode::G_SEXTLOAD: {
1917
1918 // FIXME: We need an in-memory type representation.
1919 if (DstTy.isVector())
1920 return 1;
1921
1922 unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
1923 if (NumBits != 1)
1924 return NumBits;
1925
1926 // e.g. i16->i32 = '17' bits known.
1927 const MachineMemOperand *MMO = *MI.memoperands_begin();
1928 return TyBits - MMO->getSizeInBits().getValue() + 1;
1929 }
1930 case TargetOpcode::G_ZEXTLOAD: {
1932
1933 // FIXME: We need an in-memory type representation.
1934 if (DstTy.isVector())
1935 return 1;
1936
1937 unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
1938 if (NumBits != 1)
1939 return NumBits;
1940
1941 // e.g. i16->i32 = '16' bits known.
1942 const MachineMemOperand *MMO = *MI.memoperands_begin();
1943 return TyBits - MMO->getSizeInBits().getValue();
1944 }
1945 case TargetOpcode::G_AND:
1946 case TargetOpcode::G_OR:
1947 case TargetOpcode::G_XOR: {
1948 Register Src1 = MI.getOperand(1).getReg();
1949 unsigned Src1NumSignBits =
1950 computeNumSignBits(Src1, DemandedElts, Depth + 1);
1951 if (Src1NumSignBits != 1) {
1952 Register Src2 = MI.getOperand(2).getReg();
1953 unsigned Src2NumSignBits =
1954 computeNumSignBits(Src2, DemandedElts, Depth + 1);
1955 FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits);
1956 }
1957 break;
1958 }
1959 case TargetOpcode::G_ASHR: {
1960 Register Src1 = MI.getOperand(1).getReg();
1961 Register Src2 = MI.getOperand(2).getReg();
1962 FirstAnswer = computeNumSignBits(Src1, DemandedElts, Depth + 1);
1963 if (auto C = getValidMinimumShiftAmount(Src2, DemandedElts, Depth + 1))
1964 FirstAnswer = std::min<uint64_t>(FirstAnswer + *C, TyBits);
1965 break;
1966 }
1967 case TargetOpcode::G_SHL: {
1968 Register Src1 = MI.getOperand(1).getReg();
1969 Register Src2 = MI.getOperand(2).getReg();
1970 if (std::optional<ConstantRange> ShAmtRange =
1971 getValidShiftAmountRange(Src2, DemandedElts, Depth + 1)) {
1972 uint64_t MaxShAmt = ShAmtRange->getUnsignedMax().getZExtValue();
1973 uint64_t MinShAmt = ShAmtRange->getUnsignedMin().getZExtValue();
1974
1975 MachineInstr &ExtMI = *MRI.getVRegDef(Src1);
1976 unsigned ExtOpc = ExtMI.getOpcode();
1977
1978 // Try to look through ZERO/SIGN/ANY_EXTEND. If all extended bits are
1979 // shifted out, then we can compute the number of sign bits for the
1980 // operand being extended. A future improvement could be to pass along the
1981 // "shifted left by" information in the recursive calls to
1982 // ComputeKnownSignBits. Allowing us to handle this more generically.
1983 if (ExtOpc == TargetOpcode::G_SEXT || ExtOpc == TargetOpcode::G_ZEXT ||
1984 ExtOpc == TargetOpcode::G_ANYEXT) {
1985 LLT ExtTy = MRI.getType(Src1);
1986 Register Extendee = ExtMI.getOperand(1).getReg();
1987 LLT ExtendeeTy = MRI.getType(Extendee);
1988 uint64_t SizeDiff =
1989 ExtTy.getScalarSizeInBits() - ExtendeeTy.getScalarSizeInBits();
1990
1991 if (SizeDiff <= MinShAmt) {
1992 unsigned Tmp =
1993 SizeDiff + computeNumSignBits(Extendee, DemandedElts, Depth + 1);
1994 if (MaxShAmt < Tmp)
1995 return Tmp - MaxShAmt;
1996 }
1997 }
1998 // shl destroys sign bits, ensure it doesn't shift out all sign bits.
1999 unsigned Tmp = computeNumSignBits(Src1, DemandedElts, Depth + 1);
2000 if (MaxShAmt < Tmp)
2001 return Tmp - MaxShAmt;
2002 }
2003 break;
2004 }
2005 case TargetOpcode::G_TRUNC: {
2006 Register Src = MI.getOperand(1).getReg();
2007 LLT SrcTy = MRI.getType(Src);
2008
2009 // Check if the sign bits of source go down as far as the truncated value.
2010 unsigned DstTyBits = DstTy.getScalarSizeInBits();
2011 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
2012 unsigned NumSrcSignBits = computeNumSignBits(Src, DemandedElts, Depth + 1);
2013 if (NumSrcSignBits > (NumSrcBits - DstTyBits))
2014 return NumSrcSignBits - (NumSrcBits - DstTyBits);
2015 break;
2016 }
2017 case TargetOpcode::G_SELECT: {
2018 return computeNumSignBitsMin(MI.getOperand(2).getReg(),
2019 MI.getOperand(3).getReg(), DemandedElts,
2020 Depth + 1);
2021 }
2022 case TargetOpcode::G_SMIN:
2023 case TargetOpcode::G_SMAX:
2024 case TargetOpcode::G_UMIN:
2025 case TargetOpcode::G_UMAX:
2026 // TODO: Handle clamp pattern with number of sign bits for SMIN/SMAX.
2027 return computeNumSignBitsMin(MI.getOperand(1).getReg(),
2028 MI.getOperand(2).getReg(), DemandedElts,
2029 Depth + 1);
2030 case TargetOpcode::G_SADDO:
2031 case TargetOpcode::G_SADDE:
2032 case TargetOpcode::G_UADDO:
2033 case TargetOpcode::G_UADDE:
2034 case TargetOpcode::G_SSUBO:
2035 case TargetOpcode::G_SSUBE:
2036 case TargetOpcode::G_USUBO:
2037 case TargetOpcode::G_USUBE:
2038 case TargetOpcode::G_SMULO:
2039 case TargetOpcode::G_UMULO: {
2040 // If compares returns 0/-1, all bits are sign bits.
2041 // We know that we have an integer-based boolean since these operations
2042 // are only available for integer.
2043 if (MI.getOperand(1).getReg() == R) {
2044 if (TL.getBooleanContents(DstTy.isVector(), false) ==
2046 return TyBits;
2047 }
2048
2049 break;
2050 }
2051 case TargetOpcode::G_SUB: {
2052 Register Src2 = MI.getOperand(2).getReg();
2053 unsigned Src2NumSignBits =
2054 computeNumSignBits(Src2, DemandedElts, Depth + 1);
2055 if (Src2NumSignBits == 1)
2056 return 1; // Early out.
2057
2058 // Handle NEG.
2059 Register Src1 = MI.getOperand(1).getReg();
2060 KnownBits Known1 = getKnownBits(Src1, DemandedElts, Depth);
2061 if (Known1.isZero()) {
2062 KnownBits Known2 = getKnownBits(Src2, DemandedElts, Depth);
2063 // If the input is known to be 0 or 1, the output is 0/-1, which is all
2064 // sign bits set.
2065 if ((Known2.Zero | 1).isAllOnes())
2066 return TyBits;
2067
2068 // If the input is known to be positive (the sign bit is known clear),
2069 // the output of the NEG has, at worst, the same number of sign bits as
2070 // the input.
2071 if (Known2.isNonNegative()) {
2072 FirstAnswer = Src2NumSignBits;
2073 break;
2074 }
2075
2076 // Otherwise, we treat this like a SUB.
2077 }
2078
2079 unsigned Src1NumSignBits =
2080 computeNumSignBits(Src1, DemandedElts, Depth + 1);
2081 if (Src1NumSignBits == 1)
2082 return 1; // Early Out.
2083
2084 // Sub can have at most one carry bit. Thus we know that the output
2085 // is, at worst, one more bit than the inputs.
2086 FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits) - 1;
2087 break;
2088 }
2089 case TargetOpcode::G_ADD: {
2090 Register Src2 = MI.getOperand(2).getReg();
2091 unsigned Src2NumSignBits =
2092 computeNumSignBits(Src2, DemandedElts, Depth + 1);
2093 if (Src2NumSignBits <= 2)
2094 return 1; // Early out.
2095
2096 Register Src1 = MI.getOperand(1).getReg();
2097 unsigned Src1NumSignBits =
2098 computeNumSignBits(Src1, DemandedElts, Depth + 1);
2099 if (Src1NumSignBits == 1)
2100 return 1; // Early Out.
2101
2102 // Special case decrementing a value (ADD X, -1):
2103 KnownBits Known2 = getKnownBits(Src2, DemandedElts, Depth);
2104 if (Known2.isAllOnes()) {
2105 KnownBits Known1 = getKnownBits(Src1, DemandedElts, Depth);
2106 // If the input is known to be 0 or 1, the output is 0/-1, which is all
2107 // sign bits set.
2108 if ((Known1.Zero | 1).isAllOnes())
2109 return TyBits;
2110
2111 // If we are subtracting one from a positive number, there is no carry
2112 // out of the result.
2113 if (Known1.isNonNegative()) {
2114 FirstAnswer = Src1NumSignBits;
2115 break;
2116 }
2117
2118 // Otherwise, we treat this like an ADD.
2119 }
2120
2121 // Add can have at most one carry bit. Thus we know that the output
2122 // is, at worst, one more bit than the inputs.
2123 FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits) - 1;
2124 break;
2125 }
2126 case TargetOpcode::G_FCMP:
2127 case TargetOpcode::G_ICMP: {
2128 bool IsFP = Opcode == TargetOpcode::G_FCMP;
2129 if (TyBits == 1)
2130 break;
2131 auto BC = TL.getBooleanContents(DstTy.isVector(), IsFP);
2133 return TyBits; // All bits are sign bits.
2135 return TyBits - 1; // Every always-zero bit is a sign bit.
2136 break;
2137 }
2138 case TargetOpcode::G_BUILD_VECTOR: {
2139 // Collect the known bits that are shared by every demanded vector element.
2140 FirstAnswer = TyBits;
2141 APInt SingleDemandedElt(1, 1);
2142 for (const auto &[I, MO] : enumerate(drop_begin(MI.operands()))) {
2143 if (!DemandedElts[I])
2144 continue;
2145
2146 unsigned Tmp2 =
2147 computeNumSignBits(MO.getReg(), SingleDemandedElt, Depth + 1);
2148 FirstAnswer = std::min(FirstAnswer, Tmp2);
2149
2150 // If we don't know any bits, early out.
2151 if (FirstAnswer == 1)
2152 break;
2153 }
2154 break;
2155 }
2156 case TargetOpcode::G_CONCAT_VECTORS: {
2157 if (MRI.getType(MI.getOperand(0).getReg()).isScalableVector())
2158 break;
2159 FirstAnswer = TyBits;
2160 // Determine the minimum number of sign bits across all demanded
2161 // elts of the input vectors. Early out if the result is already 1.
2162 unsigned NumSubVectorElts =
2163 MRI.getType(MI.getOperand(1).getReg()).getNumElements();
2164 for (const auto &[I, MO] : enumerate(drop_begin(MI.operands()))) {
2165 APInt DemandedSub =
2166 DemandedElts.extractBits(NumSubVectorElts, I * NumSubVectorElts);
2167 if (!DemandedSub)
2168 continue;
2169 unsigned Tmp2 = computeNumSignBits(MO.getReg(), DemandedSub, Depth + 1);
2170
2171 FirstAnswer = std::min(FirstAnswer, Tmp2);
2172
2173 // If we don't know any bits, early out.
2174 if (FirstAnswer == 1)
2175 break;
2176 }
2177 break;
2178 }
2179 case TargetOpcode::G_SHUFFLE_VECTOR: {
2180 // Collect the minimum number of sign bits that are shared by every vector
2181 // element referenced by the shuffle.
2182 APInt DemandedLHS, DemandedRHS;
2183 Register Src1 = MI.getOperand(1).getReg();
2184 unsigned NumElts = MRI.getType(Src1).getNumElements();
2185 if (!getShuffleDemandedElts(NumElts, MI.getOperand(3).getShuffleMask(),
2186 DemandedElts, DemandedLHS, DemandedRHS))
2187 return 1;
2188
2189 if (!!DemandedLHS)
2190 FirstAnswer = computeNumSignBits(Src1, DemandedLHS, Depth + 1);
2191 // If we don't know anything, early out and try computeKnownBits fall-back.
2192 if (FirstAnswer == 1)
2193 break;
2194 if (!!DemandedRHS) {
2195 unsigned Tmp2 =
2196 computeNumSignBits(MI.getOperand(2).getReg(), DemandedRHS, Depth + 1);
2197 FirstAnswer = std::min(FirstAnswer, Tmp2);
2198 }
2199 break;
2200 }
2201 case TargetOpcode::G_SPLAT_VECTOR: {
2202 // Check if the sign bits of source go down as far as the truncated value.
2203 Register Src = MI.getOperand(1).getReg();
2204 unsigned NumSrcSignBits = computeNumSignBits(Src, APInt(1, 1), Depth + 1);
2205 unsigned NumSrcBits = MRI.getType(Src).getSizeInBits();
2206 if (NumSrcSignBits > (NumSrcBits - TyBits))
2207 return NumSrcSignBits - (NumSrcBits - TyBits);
2208 break;
2209 }
2210 case TargetOpcode::G_INTRINSIC:
2211 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
2212 case TargetOpcode::G_INTRINSIC_CONVERGENT:
2213 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
2214 default: {
2215 unsigned NumBits =
2216 TL.computeNumSignBitsForTargetInstr(*this, R, DemandedElts, MRI, Depth);
2217 if (NumBits > 1)
2218 FirstAnswer = std::max(FirstAnswer, NumBits);
2219 break;
2220 }
2221 }
2222
2223 // Finally, if we can prove that the top bits of the result are 0's or 1's,
2224 // use this information.
2225 KnownBits Known = getKnownBits(R, DemandedElts, Depth);
2226 APInt Mask;
2227 if (Known.isNonNegative()) { // sign bit is 0
2228 Mask = Known.Zero;
2229 } else if (Known.isNegative()) { // sign bit is 1;
2230 Mask = Known.One;
2231 } else {
2232 // Nothing known.
2233 return FirstAnswer;
2234 }
2235
2236 // Okay, we know that the sign bit in Mask is set. Use CLO to determine
2237 // the number of identical bits in the top of the input value.
2238 Mask <<= Mask.getBitWidth() - TyBits;
2239 return std::max(FirstAnswer, Mask.countl_one());
2240}
2241
2243 LLT Ty = MRI.getType(R);
2244 APInt DemandedElts =
2245 Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
2246 return computeNumSignBits(R, DemandedElts, Depth);
2247}
2248
2250 Register R, const APInt &DemandedElts, unsigned Depth) {
2251 // Shifting more than the bitwidth is not valid.
2252 MachineInstr &MI = *MRI.getVRegDef(R);
2253 unsigned Opcode = MI.getOpcode();
2254
2255 LLT Ty = MRI.getType(R);
2256 unsigned BitWidth = Ty.getScalarSizeInBits();
2257
2258 if (Opcode == TargetOpcode::G_CONSTANT) {
2259 const APInt &ShAmt = MI.getOperand(1).getCImm()->getValue();
2260 if (ShAmt.uge(BitWidth))
2261 return std::nullopt;
2262 return ConstantRange(ShAmt);
2263 }
2264
2265 if (Opcode == TargetOpcode::G_BUILD_VECTOR) {
2266 const APInt *MinAmt = nullptr, *MaxAmt = nullptr;
2267 for (unsigned I = 0, E = MI.getNumOperands() - 1; I != E; ++I) {
2268 if (!DemandedElts[I])
2269 continue;
2270 MachineInstr *Op = MRI.getVRegDef(MI.getOperand(I + 1).getReg());
2271 if (Op->getOpcode() != TargetOpcode::G_CONSTANT) {
2272 MinAmt = MaxAmt = nullptr;
2273 break;
2274 }
2275
2276 const APInt &ShAmt = Op->getOperand(1).getCImm()->getValue();
2277 if (ShAmt.uge(BitWidth))
2278 return std::nullopt;
2279 if (!MinAmt || MinAmt->ugt(ShAmt))
2280 MinAmt = &ShAmt;
2281 if (!MaxAmt || MaxAmt->ult(ShAmt))
2282 MaxAmt = &ShAmt;
2283 }
2284 assert(((!MinAmt && !MaxAmt) || (MinAmt && MaxAmt)) &&
2285 "Failed to find matching min/max shift amounts");
2286 if (MinAmt && MaxAmt)
2287 return ConstantRange(*MinAmt, *MaxAmt + 1);
2288 }
2289
2290 // Use computeKnownBits to find a hidden constant/knownbits (usually type
2291 // legalized). e.g. Hidden behind multiple bitcasts/build_vector/casts etc.
2292 KnownBits KnownAmt = getKnownBits(R, DemandedElts, Depth);
2293 if (KnownAmt.getMaxValue().ult(BitWidth))
2294 return ConstantRange::fromKnownBits(KnownAmt, /*IsSigned=*/false);
2295
2296 return std::nullopt;
2297}
2298
2300 Register R, const APInt &DemandedElts, unsigned Depth) {
2301 if (std::optional<ConstantRange> AmtRange =
2302 getValidShiftAmountRange(R, DemandedElts, Depth))
2303 return AmtRange->getUnsignedMin().getZExtValue();
2304 return std::nullopt;
2305}
2306
2312
2317
2319 if (!Info) {
2320 unsigned MaxDepth =
2322 Info = std::make_unique<GISelValueTracking>(MF, MaxDepth);
2323 }
2324 return *Info;
2325}
2326
2327AnalysisKey GISelValueTrackingAnalysis::Key;
2328
2334
2338 auto &VTA = MFAM.getResult<GISelValueTrackingAnalysis>(MF);
2339 const auto &MRI = MF.getRegInfo();
2340 OS << "name: ";
2341 MF.getFunction().printAsOperand(OS, /*PrintType=*/false);
2342 OS << '\n';
2343
2344 for (MachineBasicBlock &BB : MF) {
2345 for (MachineInstr &MI : BB) {
2346 for (MachineOperand &MO : MI.defs()) {
2347 if (!MO.isReg() || MO.getReg().isPhysical())
2348 continue;
2349 Register Reg = MO.getReg();
2350 if (!MRI.getType(Reg).isValid())
2351 continue;
2352 KnownBits Known = VTA.getKnownBits(Reg);
2353 unsigned SignedBits = VTA.computeNumSignBits(Reg);
2354 OS << " " << MO << " KnownBits:" << Known << " SignBits:" << SignedBits
2355 << '\n';
2356 };
2357 }
2358 }
2359 return PreservedAnalyses::all();
2360}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares a class to represent arbitrary precision floating point values and provide a varie...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Utilities for dealing with flags related to floating point properties and mode controls.
static void dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth)
static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld, unsigned TyBits)
Compute the known number of sign bits with attached range metadata in the memory operand.
static bool outputDenormalIsIEEEOrPosZero(const MachineFunction &MF, LLT Ty)
Provides analysis for querying information about KnownBits during GISel passes.
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
Implement a low-level type suitable for MachineInstr level instruction selection.
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This file describes how to lower LLVM code to machine code.
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
Value * RHS
Value * LHS
static LLVM_ABI bool isRepresentableAsNormalIn(const fltSemantics &Src, const fltSemantics &Dst)
Definition APFloat.cpp:263
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1201
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1421
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1406
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1400
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
unsigned getNumSignBits() const
Computes the number of leading bits of this APInt that are equal to its sign bit.
Definition APInt.h:1643
void clearLowBits(unsigned loBits)
Set bottom loBits bits to 0.
Definition APInt.h:1450
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
void setAllBits()
Set every bit to 1.
Definition APInt.h:1334
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:880
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1403
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
void setPreservesAll()
Set by analyses that do not transform their input at all.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
This class represents a range of values.
static LLVM_ABI ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
LLVM_ABI ConstantRange zeroExtend(uint32_t BitWidth) const
Return a new range in the specified integer type, which must be strictly larger than the current type...
LLVM_ABI APInt getSignedMin() const
Return the smallest signed value contained in the ConstantRange.
LLVM_ABI ConstantRange signExtend(uint32_t BitWidth) const
Return a new range in the specified integer type, which must be strictly larger than the current type...
LLVM_ABI APInt getSignedMax() const
Return the largest signed value contained in the ConstantRange.
uint32_t getBitWidth() const
Get the bit width of this ConstantRange.
Represents any generic load, including sign/zero extending variants.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
Represents an extract vector element.
static LLVM_ABI std::optional< GFConstant > getConstant(Register Const, const MachineRegisterInfo &MRI)
Definition Utils.cpp:2107
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelValueTrackingInfoAnal...
GISelValueTracking & get(MachineFunction &MF)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
LLVM_ABI Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
LLVM_ABI PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
KnownBits getKnownBits(Register R)
Align computeKnownAlignment(Register R, unsigned Depth=0)
std::optional< ConstantRange > getValidShiftAmountRange(Register R, const APInt &DemandedElts, unsigned Depth)
If a G_SHL/G_ASHR/G_LSHR node with shift operand R has shift amounts that are all less than the eleme...
bool maskedValueIsZero(Register Val, const APInt &Mask)
std::optional< uint64_t > getValidMinimumShiftAmount(Register R, const APInt &DemandedElts, unsigned Depth=0)
If a G_SHL/G_ASHR/G_LSHR node with shift operand R has shift amounts that are all less than the eleme...
const DataLayout & getDataLayout() const
unsigned computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth=0)
void computeKnownBitsImpl(Register R, KnownBits &Known, const APInt &DemandedElts, unsigned Depth=0)
Represents a G_LOAD.
Represents a G_SEXTLOAD.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
ArrayRef< int > getMask() const
Represents a G_ZEXTLOAD.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
constexpr LLT getScalarType() const
TypeSize getValue() const
Metadata node.
Definition Metadata.h:1080
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
const MDNode * getRanges() const
Return the range tag for the memory reference.
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
Register getReg() const
getReg - Returns the register number.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
operand_type_match m_Reg()
operand_type_match m_Pred()
bind_ty< FPClassTest > m_FPClassTest(FPClassTest &T)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
ClassifyOp_match< LHS, Test, TargetOpcode::G_IS_FPCLASS > m_GIsFPClass(const LHS &L, const Test &T)
Matches the register and immediate used in a fpclass test G_IS_FPCLASS val, 96.
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:295
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2544
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
scope_exit(Callable) -> scope_exit< Callable >
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
int ilogb(const APFloat &Arg)
Returns the exponent of the internal representation of the APFloat.
Definition APFloat.h:1597
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
std::tuple< Value *, FPClassTest, FPClassTest > fcmpImpliesClass(CmpInst::Predicate Pred, const Function &F, Value *LHS, FPClassTest RHSClass, bool LookThroughSrc=true)
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
constexpr unsigned MaxAnalysisRecursionDepth
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
DWARFExpression::Operation Op
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
static uint32_t extractBits(uint64_t Val, uint32_t Hi, uint32_t Lo)
LLVM_ABI void computeKnownBitsFromRangeMetadata(const MDNode &Ranges, KnownBits &Known)
Compute known bits from the range metadata.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
constexpr bool outputsAreZero() const
Return true if output denormals should be flushed to 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
DenormalModeKind Output
Denormal flushing mode for floating point instruction results in the default floating point environme...
static constexpr DenormalMode getIEEE()
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition KnownBits.h:314
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:189
LLVM_ABI KnownBits sextInReg(unsigned SrcBitWidth) const
Return known bits for a in-register sign extension of the value we're tracking.
static LLVM_ABI KnownBits mulhu(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits from zero-extended multiply-hi.
static LLVM_ABI KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108
bool isZero() const
Returns true if value is all zero.
Definition KnownBits.h:80
static LLVM_ABI KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for ashr(LHS, RHS).
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:164
KnownBits byteSwap() const
Definition KnownBits.h:535
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:302
void setAllZero()
Make all bits known to be zero and discard any previous information.
Definition KnownBits.h:86
KnownBits reverseBits() const
Definition KnownBits.h:539
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
static LLVM_ABI KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:175
static LLVM_ABI KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for lshr(LHS, RHS).
KnownBits extractBits(unsigned NumBits, unsigned BitPosition) const
Return a subset of the known bits from [bitPosition,bitPosition+numBits).
Definition KnownBits.h:238
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:324
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:183
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
Definition KnownBits.h:360
KnownBits zextOrTrunc(unsigned BitWidth) const
Return known bits for a zero extension or truncation of the value we're tracking.
Definition KnownBits.h:199
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:148
static LLVM_ABI KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
static LLVM_ABI KnownBits mulhs(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits from sign-extended multiply-hi.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.
Definition KnownBits.h:132
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:105
static LLVM_ABI KnownBits computeForAddCarry(const KnownBits &LHS, const KnownBits &RHS, const KnownBits &Carry)
Compute known bits resulting from adding LHS, RHS and a 1-bit Carry.
Definition KnownBits.cpp:54
static KnownBits sub(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from subtraction of LHS and RHS.
Definition KnownBits.h:366
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:293
void insertBits(const KnownBits &SubBits, unsigned BitPosition)
Insert the bits from a smaller known bits starting at bitPosition.
Definition KnownBits.h:232
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition KnownBits.h:170
LLVM_ABI KnownBits abs(bool IntMinIsPoison=false) const
Compute known bits for the absolute value.
static LLVM_ABI KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
static LLVM_ABI KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
bool isAllOnes() const
Returns true if value is all one bits.
Definition KnownBits.h:83
FPClassTest KnownFPClasses
Floating-point classes the value could be one of.
bool isKnownNeverInfinity() const
Return true if it's known this can never be an infinity.
bool cannotBeOrderedGreaterThanZero() const
Return true if we can prove that the analyzed floating-point value is either NaN or never greater tha...
static constexpr FPClassTest OrderedGreaterThanZeroMask
static constexpr FPClassTest OrderedLessThanZeroMask
void knownNot(FPClassTest RuleOut)
void copysign(const KnownFPClass &Sign)
bool isKnownNeverSubnormal() const
Return true if it's known this can never be a subnormal.
LLVM_ABI bool isKnownNeverLogicalZero(DenormalMode Mode) const
Return true if it's known this can never be interpreted as a zero.
bool isUnknown() const
bool isKnownNeverPosZero() const
Return true if it's known this can never be a literal positive zero.
std::optional< bool > SignBit
std::nullopt if the sign bit is unknown, true if the sign bit is definitely set or false if the sign ...
bool isKnownNeverNaN() const
Return true if it's known this can never be a nan.
bool isKnownNever(FPClassTest Mask) const
Return true if it's known this can never be one of the mask entries.
bool isKnownNeverNegZero() const
Return true if it's known this can never be a negative zero.
void propagateNaN(const KnownFPClass &Src, bool PreserveSign=false)
bool cannotBeOrderedLessThanZero() const
Return true if we can prove that the analyzed floating-point value is either NaN or never less than -...
void signBitMustBeOne()
Assume the sign bit is one.
void signBitMustBeZero()
Assume the sign bit is zero.
LLVM_ABI bool isKnownNeverLogicalPosZero(DenormalMode Mode) const
Return true if it's known this can never be interpreted as a positive zero.
bool isKnownNeverPosInfinity() const
Return true if it's known this can never be +infinity.
LLVM_ABI bool isKnownNeverLogicalNegZero(DenormalMode Mode) const
Return true if it's known this can never be interpreted as a negative zero.