LLVM 23.0.0git
InstructionCombining.cpp
Go to the documentation of this file.
1//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// InstructionCombining - Combine instructions to form fewer, simple
10// instructions. This pass does not modify the CFG. This pass is where
11// algebraic simplification happens.
12//
13// This pass combines things like:
14// %Y = add i32 %X, 1
15// %Z = add i32 %Y, 1
16// into:
17// %Z = add i32 %X, 2
18//
19// This is a simple worklist driven algorithm.
20//
21// This pass guarantees that the following canonicalizations are performed on
22// the program:
23// 1. If a binary operator has a constant operand, it is moved to the RHS
24// 2. Bitwise operators with constant operands are always grouped so that
25// shifts are performed first, then or's, then and's, then xor's.
26// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27// 4. All cmp instructions on boolean values are replaced with logical ops
28// 5. add X, X is represented as (X*2) => (X << 1)
29// 6. Multiplies with a power-of-two constant argument are transformed into
30// shifts.
31// ... etc.
32//
33//===----------------------------------------------------------------------===//
34
35#include "InstCombineInternal.h"
36#include "llvm/ADT/APFloat.h"
37#include "llvm/ADT/APInt.h"
38#include "llvm/ADT/ArrayRef.h"
39#include "llvm/ADT/DenseMap.h"
42#include "llvm/ADT/Statistic.h"
47#include "llvm/Analysis/CFG.h"
62#include "llvm/IR/BasicBlock.h"
63#include "llvm/IR/CFG.h"
64#include "llvm/IR/Constant.h"
65#include "llvm/IR/Constants.h"
66#include "llvm/IR/DIBuilder.h"
67#include "llvm/IR/DataLayout.h"
68#include "llvm/IR/DebugInfo.h"
70#include "llvm/IR/Dominators.h"
72#include "llvm/IR/Function.h"
74#include "llvm/IR/IRBuilder.h"
75#include "llvm/IR/InstrTypes.h"
76#include "llvm/IR/Instruction.h"
79#include "llvm/IR/Intrinsics.h"
80#include "llvm/IR/Metadata.h"
81#include "llvm/IR/Operator.h"
82#include "llvm/IR/PassManager.h"
84#include "llvm/IR/Type.h"
85#include "llvm/IR/Use.h"
86#include "llvm/IR/User.h"
87#include "llvm/IR/Value.h"
88#include "llvm/IR/ValueHandle.h"
93#include "llvm/Support/Debug.h"
102#include <algorithm>
103#include <cassert>
104#include <cstdint>
105#include <memory>
106#include <optional>
107#include <string>
108#include <utility>
109
110#define DEBUG_TYPE "instcombine"
112#include <optional>
113
114using namespace llvm;
115using namespace llvm::PatternMatch;
116
117STATISTIC(NumWorklistIterations,
118 "Number of instruction combining iterations performed");
119STATISTIC(NumOneIteration, "Number of functions with one iteration");
120STATISTIC(NumTwoIterations, "Number of functions with two iterations");
121STATISTIC(NumThreeIterations, "Number of functions with three iterations");
122STATISTIC(NumFourOrMoreIterations,
123 "Number of functions with four or more iterations");
124
125STATISTIC(NumCombined , "Number of insts combined");
126STATISTIC(NumConstProp, "Number of constant folds");
127STATISTIC(NumDeadInst , "Number of dead inst eliminated");
128STATISTIC(NumSunkInst , "Number of instructions sunk");
129STATISTIC(NumExpand, "Number of expansions");
130STATISTIC(NumFactor , "Number of factorizations");
131STATISTIC(NumReassoc , "Number of reassociations");
132DEBUG_COUNTER(VisitCounter, "instcombine-visit",
133 "Controls which instructions are visited");
134
135static cl::opt<bool> EnableCodeSinking("instcombine-code-sinking",
136 cl::desc("Enable code sinking"),
137 cl::init(true));
138
140 "instcombine-max-sink-users", cl::init(32),
141 cl::desc("Maximum number of undroppable users for instruction sinking"));
142
144MaxArraySize("instcombine-maxarray-size", cl::init(1024),
145 cl::desc("Maximum array size considered when doing a combine"));
146
148 "instcombine-max-allocsite-removable-users", cl::Hidden, cl::init(2048),
149 cl::desc("Maximum number of users to visit in alloc-site "
150 "removability analysis"));
151
152namespace llvm {
154} // end namespace llvm
155
156// FIXME: Remove this flag when it is no longer necessary to convert
157// llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
158// increases variable availability at the cost of accuracy. Variables that
159// cannot be promoted by mem2reg or SROA will be described as living in memory
160// for their entire lifetime. However, passes like DSE and instcombine can
161// delete stores to the alloca, leading to misleading and inaccurate debug
162// information. This flag can be removed when those passes are fixed.
163static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
164 cl::Hidden, cl::init(true));
165
166InstCombiner::IRBuilderInstCombineInserter::~IRBuilderInstCombineInserter() =
167 default;
168
169void InstCombiner::IRBuilderInstCombineInserter::InsertHelper(
170 Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const {
172 IC.Worklist.add(I);
173 if (auto *Assume = dyn_cast<AssumeInst>(I))
174 IC.AC.registerAssumption(Assume);
175 if (IC.AnnotationMetadataSource)
176 I->copyMetadata(*IC.AnnotationMetadataSource, LLVMContext::MD_annotation);
177}
178
179std::optional<Instruction *>
181 // Handle target specific intrinsics
182 if (II.getCalledFunction()->isTargetIntrinsic()) {
183 return TTIForTargetIntrinsicsOnly.instCombineIntrinsic(*this, II);
184 }
185 return std::nullopt;
186}
187
189 IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
190 bool &KnownBitsComputed) {
191 // Handle target specific intrinsics
192 if (II.getCalledFunction()->isTargetIntrinsic()) {
193 return TTIForTargetIntrinsicsOnly.simplifyDemandedUseBitsIntrinsic(
194 *this, II, DemandedMask, Known, KnownBitsComputed);
195 }
196 return std::nullopt;
197}
198
200 IntrinsicInst &II, APInt DemandedElts, APInt &PoisonElts,
201 APInt &PoisonElts2, APInt &PoisonElts3,
202 std::function<void(Instruction *, unsigned, APInt, APInt &)>
203 SimplifyAndSetOp) {
204 // Handle target specific intrinsics
205 if (II.getCalledFunction()->isTargetIntrinsic()) {
206 return TTIForTargetIntrinsicsOnly.simplifyDemandedVectorEltsIntrinsic(
207 *this, II, DemandedElts, PoisonElts, PoisonElts2, PoisonElts3,
208 SimplifyAndSetOp);
209 }
210 return std::nullopt;
211}
212
213bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
214 // Approved exception for TTI use: This queries a legality property of the
215 // target, not an profitability heuristic. Ideally this should be part of
216 // DataLayout instead.
217 return TTIForTargetIntrinsicsOnly.isValidAddrSpaceCast(FromAS, ToAS);
218}
219
220Value *InstCombinerImpl::EmitGEPOffset(GEPOperator *GEP, bool RewriteGEP) {
221 if (!RewriteGEP)
222 return llvm::emitGEPOffset(&Builder, DL, GEP);
223
224 IRBuilderBase::InsertPointGuard Guard(Builder);
225 auto *Inst = dyn_cast<Instruction>(GEP);
226 if (Inst)
227 Builder.SetInsertPoint(Inst);
228
229 Value *Offset = EmitGEPOffset(GEP);
230 // Rewrite non-trivial GEPs to avoid duplicating the offset arithmetic.
231 if (Inst && !GEP->hasAllConstantIndices() &&
232 !GEP->getSourceElementType()->isIntegerTy(8)) {
234 *Inst, Builder.CreateGEP(Builder.getInt8Ty(), GEP->getPointerOperand(),
235 Offset, "", GEP->getNoWrapFlags()));
237 }
238 return Offset;
239}
240
241Value *InstCombinerImpl::EmitGEPOffsets(ArrayRef<GEPOperator *> GEPs,
242 GEPNoWrapFlags NW, Type *IdxTy,
243 bool RewriteGEPs) {
244 auto Add = [&](Value *Sum, Value *Offset) -> Value * {
245 if (Sum)
246 return Builder.CreateAdd(Sum, Offset, "", NW.hasNoUnsignedWrap(),
247 NW.isInBounds());
248 else
249 return Offset;
250 };
251
252 Value *Sum = nullptr;
253 Value *OneUseSum = nullptr;
254 Value *OneUseBase = nullptr;
255 GEPNoWrapFlags OneUseFlags = GEPNoWrapFlags::all();
256 for (GEPOperator *GEP : reverse(GEPs)) {
257 Value *Offset;
258 {
259 // Expand the offset at the point of the previous GEP to enable rewriting.
260 // However, use the original insertion point for calculating Sum.
261 IRBuilderBase::InsertPointGuard Guard(Builder);
262 auto *Inst = dyn_cast<Instruction>(GEP);
263 if (RewriteGEPs && Inst)
264 Builder.SetInsertPoint(Inst);
265
267 if (Offset->getType() != IdxTy)
268 Offset = Builder.CreateVectorSplat(
269 cast<VectorType>(IdxTy)->getElementCount(), Offset);
270 if (GEP->hasOneUse()) {
271 // Offsets of one-use GEPs will be merged into the next multi-use GEP.
272 OneUseSum = Add(OneUseSum, Offset);
273 OneUseFlags = OneUseFlags.intersectForOffsetAdd(GEP->getNoWrapFlags());
274 if (!OneUseBase)
275 OneUseBase = GEP->getPointerOperand();
276 continue;
277 }
278
279 if (OneUseSum)
280 Offset = Add(OneUseSum, Offset);
281
282 // Rewrite the GEP to reuse the computed offset. This also includes
283 // offsets from preceding one-use GEPs of matched type.
284 if (RewriteGEPs && Inst &&
285 Offset->getType()->isVectorTy() == GEP->getType()->isVectorTy() &&
286 !(GEP->getSourceElementType()->isIntegerTy(8) &&
287 GEP->getOperand(1) == Offset)) {
289 *Inst,
290 Builder.CreatePtrAdd(
291 OneUseBase ? OneUseBase : GEP->getPointerOperand(), Offset, "",
292 OneUseFlags.intersectForOffsetAdd(GEP->getNoWrapFlags())));
294 }
295 }
296
297 Sum = Add(Sum, Offset);
298 OneUseSum = OneUseBase = nullptr;
299 OneUseFlags = GEPNoWrapFlags::all();
300 }
301 if (OneUseSum)
302 Sum = Add(Sum, OneUseSum);
303 if (!Sum)
304 return Constant::getNullValue(IdxTy);
305 return Sum;
306}
307
308/// Legal integers and common types are considered desirable. This is used to
309/// avoid creating instructions with types that may not be supported well by the
310/// the backend.
311/// NOTE: This treats i8, i16 and i32 specially because they are common
312/// types in frontend languages.
313bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
314 switch (BitWidth) {
315 case 8:
316 case 16:
317 case 32:
318 return true;
319 default:
320 return DL.isLegalInteger(BitWidth);
321 }
322}
323
324/// Return true if it is desirable to convert an integer computation from a
325/// given bit width to a new bit width.
326/// We don't want to convert from a legal or desirable type (like i8) to an
327/// illegal type or from a smaller to a larger illegal type. A width of '1'
328/// is always treated as a desirable type because i1 is a fundamental type in
329/// IR, and there are many specialized optimizations for i1 types.
330/// Common/desirable widths are equally treated as legal to convert to, in
331/// order to open up more combining opportunities.
332bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
333 unsigned ToWidth) const {
334 bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth);
335 bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth);
336
337 // Convert to desirable widths even if they are not legal types.
338 // Only shrink types, to prevent infinite loops.
339 if (ToWidth < FromWidth && isDesirableIntType(ToWidth))
340 return true;
341
342 // If this is a legal or desiable integer from type, and the result would be
343 // an illegal type, don't do the transformation.
344 if ((FromLegal || isDesirableIntType(FromWidth)) && !ToLegal)
345 return false;
346
347 // Otherwise, if both are illegal, do not increase the size of the result. We
348 // do allow things like i160 -> i64, but not i64 -> i160.
349 if (!FromLegal && !ToLegal && ToWidth > FromWidth)
350 return false;
351
352 return true;
353}
354
355/// Return true if it is desirable to convert a computation from 'From' to 'To'.
356/// We don't want to convert from a legal to an illegal type or from a smaller
357/// to a larger illegal type. i1 is always treated as a legal type because it is
358/// a fundamental type in IR, and there are many specialized optimizations for
359/// i1 types.
360bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const {
361 // TODO: This could be extended to allow vectors. Datalayout changes might be
362 // needed to properly support that.
363 if (!From->isIntegerTy() || !To->isIntegerTy())
364 return false;
365
366 unsigned FromWidth = From->getPrimitiveSizeInBits();
367 unsigned ToWidth = To->getPrimitiveSizeInBits();
368 return shouldChangeType(FromWidth, ToWidth);
369}
370
371// Return true, if No Signed Wrap should be maintained for I.
372// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
373// where both B and C should be ConstantInts, results in a constant that does
374// not overflow. This function only handles the Add/Sub/Mul opcodes. For
375// all other opcodes, the function conservatively returns false.
378 if (!OBO || !OBO->hasNoSignedWrap())
379 return false;
380
381 const APInt *BVal, *CVal;
382 if (!match(B, m_APInt(BVal)) || !match(C, m_APInt(CVal)))
383 return false;
384
385 // We reason about Add/Sub/Mul Only.
386 bool Overflow = false;
387 switch (I.getOpcode()) {
388 case Instruction::Add:
389 (void)BVal->sadd_ov(*CVal, Overflow);
390 break;
391 case Instruction::Sub:
392 (void)BVal->ssub_ov(*CVal, Overflow);
393 break;
394 case Instruction::Mul:
395 (void)BVal->smul_ov(*CVal, Overflow);
396 break;
397 default:
398 // Conservatively return false for other opcodes.
399 return false;
400 }
401 return !Overflow;
402}
403
406 return OBO && OBO->hasNoUnsignedWrap();
407}
408
411 return OBO && OBO->hasNoSignedWrap();
412}
413
414/// Combine constant operands of associative operations either before or after a
415/// cast to eliminate one of the associative operations:
416/// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
417/// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
419 InstCombinerImpl &IC) {
420 auto *Cast = dyn_cast<CastInst>(BinOp1->getOperand(0));
421 if (!Cast || !Cast->hasOneUse())
422 return false;
423
424 // TODO: Enhance logic for other casts and remove this check.
425 auto CastOpcode = Cast->getOpcode();
426 if (CastOpcode != Instruction::ZExt)
427 return false;
428
429 // TODO: Enhance logic for other BinOps and remove this check.
430 if (!BinOp1->isBitwiseLogicOp())
431 return false;
432
433 auto AssocOpcode = BinOp1->getOpcode();
434 auto *BinOp2 = dyn_cast<BinaryOperator>(Cast->getOperand(0));
435 if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
436 return false;
437
438 Constant *C1, *C2;
439 if (!match(BinOp1->getOperand(1), m_Constant(C1)) ||
440 !match(BinOp2->getOperand(1), m_Constant(C2)))
441 return false;
442
443 // TODO: This assumes a zext cast.
444 // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
445 // to the destination type might lose bits.
446
447 // Fold the constants together in the destination type:
448 // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
449 const DataLayout &DL = IC.getDataLayout();
450 Type *DestTy = C1->getType();
451 Constant *CastC2 = ConstantFoldCastOperand(CastOpcode, C2, DestTy, DL);
452 if (!CastC2)
453 return false;
454 Constant *FoldedC = ConstantFoldBinaryOpOperands(AssocOpcode, C1, CastC2, DL);
455 if (!FoldedC)
456 return false;
457
458 IC.replaceOperand(*Cast, 0, BinOp2->getOperand(0));
459 IC.replaceOperand(*BinOp1, 1, FoldedC);
461 Cast->dropPoisonGeneratingFlags();
462 return true;
463}
464
465// Simplifies IntToPtr/PtrToInt RoundTrip Cast.
466// inttoptr ( ptrtoint (x) ) --> x
467Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
468 auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
469 if (IntToPtr && DL.getTypeSizeInBits(IntToPtr->getDestTy()) ==
470 DL.getTypeSizeInBits(IntToPtr->getSrcTy())) {
471 auto *PtrToInt = dyn_cast<PtrToIntInst>(IntToPtr->getOperand(0));
472 Type *CastTy = IntToPtr->getDestTy();
473 if (PtrToInt &&
474 CastTy->getPointerAddressSpace() ==
475 PtrToInt->getSrcTy()->getPointerAddressSpace() &&
476 DL.getTypeSizeInBits(PtrToInt->getSrcTy()) ==
477 DL.getTypeSizeInBits(PtrToInt->getDestTy()))
478 return PtrToInt->getOperand(0);
479 }
480 return nullptr;
481}
482
483/// This performs a few simplifications for operators that are associative or
484/// commutative:
485///
486/// Commutative operators:
487///
488/// 1. Order operands such that they are listed from right (least complex) to
489/// left (most complex). This puts constants before unary operators before
490/// binary operators.
491///
492/// Associative operators:
493///
494/// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
495/// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
496///
497/// Associative and commutative operators:
498///
499/// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
500/// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
501/// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
502/// if C1 and C2 are constants.
504 Instruction::BinaryOps Opcode = I.getOpcode();
505 bool Changed = false;
506
507 do {
508 // Order operands such that they are listed from right (least complex) to
509 // left (most complex). This puts constants before unary operators before
510 // binary operators.
511 if (I.isCommutative() && getComplexity(I.getOperand(0)) <
512 getComplexity(I.getOperand(1)))
513 Changed = !I.swapOperands();
514
515 if (I.isCommutative()) {
516 if (auto Pair = matchSymmetricPair(I.getOperand(0), I.getOperand(1))) {
517 replaceOperand(I, 0, Pair->first);
518 replaceOperand(I, 1, Pair->second);
519 Changed = true;
520 }
521 }
522
523 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
524 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
525
526 if (I.isAssociative()) {
527 // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
528 if (Op0 && Op0->getOpcode() == Opcode) {
529 Value *A = Op0->getOperand(0);
530 Value *B = Op0->getOperand(1);
531 Value *C = I.getOperand(1);
532
533 // Does "B op C" simplify?
534 if (Value *V = simplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) {
535 // It simplifies to V. Form "A op V".
536 replaceOperand(I, 0, A);
537 replaceOperand(I, 1, V);
538 bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0);
539 bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(*Op0);
540
541 // Conservatively clear all optional flags since they may not be
542 // preserved by the reassociation. Reset nsw/nuw based on the above
543 // analysis.
544 if (auto *PDI = dyn_cast<PossiblyDisjointInst>(&I))
545 PDI->setIsDisjoint(false);
546
547 // Note: this is only valid because SimplifyBinOp doesn't look at
548 // the operands to Op0.
550 I.setHasNoUnsignedWrap(IsNUW);
551 I.setHasNoSignedWrap(IsNSW);
552 }
553
554 Changed = true;
555 ++NumReassoc;
556 continue;
557 }
558 }
559
560 // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
561 if (Op1 && Op1->getOpcode() == Opcode) {
562 Value *A = I.getOperand(0);
563 Value *B = Op1->getOperand(0);
564 Value *C = Op1->getOperand(1);
565
566 // Does "A op B" simplify?
567 if (Value *V = simplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) {
568 // It simplifies to V. Form "V op C".
569 replaceOperand(I, 0, V);
570 replaceOperand(I, 1, C);
571 // Conservatively clear the optional flags, since they may not be
572 // preserved by the reassociation.
574 I.dropPoisonGeneratingFlags();
575 Changed = true;
576 ++NumReassoc;
577 continue;
578 }
579 }
580 }
581
582 if (I.isAssociative() && I.isCommutative()) {
583 if (simplifyAssocCastAssoc(&I, *this)) {
584 Changed = true;
585 ++NumReassoc;
586 continue;
587 }
588
589 // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
590 if (Op0 && Op0->getOpcode() == Opcode) {
591 Value *A = Op0->getOperand(0);
592 Value *B = Op0->getOperand(1);
593 Value *C = I.getOperand(1);
594
595 // Does "C op A" simplify?
596 if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
597 // It simplifies to V. Form "V op B".
598 replaceOperand(I, 0, V);
599 replaceOperand(I, 1, B);
600 // Conservatively clear the optional flags, since they may not be
601 // preserved by the reassociation.
603 I.dropPoisonGeneratingFlags();
604 Changed = true;
605 ++NumReassoc;
606 continue;
607 }
608 }
609
610 // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
611 if (Op1 && Op1->getOpcode() == Opcode) {
612 Value *A = I.getOperand(0);
613 Value *B = Op1->getOperand(0);
614 Value *C = Op1->getOperand(1);
615
616 // Does "C op A" simplify?
617 if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
618 // It simplifies to V. Form "B op V".
619 replaceOperand(I, 0, B);
620 replaceOperand(I, 1, V);
621 // Conservatively clear the optional flags, since they may not be
622 // preserved by the reassociation.
624 I.dropPoisonGeneratingFlags();
625 Changed = true;
626 ++NumReassoc;
627 continue;
628 }
629 }
630
631 // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
632 // if C1 and C2 are constants.
633 Value *A, *B;
634 Constant *C1, *C2, *CRes;
635 if (Op0 && Op1 &&
636 Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
637 match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) &&
638 match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2)))) &&
639 (CRes = ConstantFoldBinaryOpOperands(Opcode, C1, C2, DL))) {
640 bool IsNUW = hasNoUnsignedWrap(I) &&
641 hasNoUnsignedWrap(*Op0) &&
642 hasNoUnsignedWrap(*Op1);
643 BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
644 BinaryOperator::CreateNUW(Opcode, A, B) :
645 BinaryOperator::Create(Opcode, A, B);
646
647 if (isa<FPMathOperator>(NewBO)) {
648 FastMathFlags Flags = I.getFastMathFlags() &
649 Op0->getFastMathFlags() &
650 Op1->getFastMathFlags();
651 NewBO->setFastMathFlags(Flags);
652 }
653 InsertNewInstWith(NewBO, I.getIterator());
654 NewBO->takeName(Op1);
655 replaceOperand(I, 0, NewBO);
656 replaceOperand(I, 1, CRes);
657 // Conservatively clear the optional flags, since they may not be
658 // preserved by the reassociation.
660 I.dropPoisonGeneratingFlags();
661 if (IsNUW)
662 I.setHasNoUnsignedWrap(true);
663
664 Changed = true;
665 continue;
666 }
667 }
668
669 // No further simplifications.
670 return Changed;
671 } while (true);
672}
673
674/// Return whether "X LOp (Y ROp Z)" is always equal to
675/// "(X LOp Y) ROp (X LOp Z)".
678 // X & (Y | Z) <--> (X & Y) | (X & Z)
679 // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
680 if (LOp == Instruction::And)
681 return ROp == Instruction::Or || ROp == Instruction::Xor;
682
683 // X | (Y & Z) <--> (X | Y) & (X | Z)
684 if (LOp == Instruction::Or)
685 return ROp == Instruction::And;
686
687 // X * (Y + Z) <--> (X * Y) + (X * Z)
688 // X * (Y - Z) <--> (X * Y) - (X * Z)
689 if (LOp == Instruction::Mul)
690 return ROp == Instruction::Add || ROp == Instruction::Sub;
691
692 return false;
693}
694
695/// Return whether "(X LOp Y) ROp Z" is always equal to
696/// "(X ROp Z) LOp (Y ROp Z)".
700 return leftDistributesOverRight(ROp, LOp);
701
702 // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
704
705 // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
706 // but this requires knowing that the addition does not overflow and other
707 // such subtleties.
708}
709
710/// This function returns identity value for given opcode, which can be used to
711/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
713 if (isa<Constant>(V))
714 return nullptr;
715
716 return ConstantExpr::getBinOpIdentity(Opcode, V->getType());
717}
718
719/// This function predicates factorization using distributive laws. By default,
720/// it just returns the 'Op' inputs. But for special-cases like
721/// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
722/// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
723/// allow more factorization opportunities.
726 Value *&LHS, Value *&RHS, BinaryOperator *OtherOp) {
727 assert(Op && "Expected a binary operator");
728 LHS = Op->getOperand(0);
729 RHS = Op->getOperand(1);
730 if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) {
731 Constant *C;
732 if (match(Op, m_Shl(m_Value(), m_ImmConstant(C)))) {
733 // X << C --> X * (1 << C)
735 Instruction::Shl, ConstantInt::get(Op->getType(), 1), C);
736 assert(RHS && "Constant folding of immediate constants failed");
737 return Instruction::Mul;
738 }
739 // TODO: We can add other conversions e.g. shr => div etc.
740 }
741 if (Instruction::isBitwiseLogicOp(TopOpcode)) {
742 if (OtherOp && OtherOp->getOpcode() == Instruction::AShr &&
744 // lshr nneg C, X --> ashr nneg C, X
745 return Instruction::AShr;
746 }
747 }
748 return Op->getOpcode();
749}
750
751/// This tries to simplify binary operations by factorizing out common terms
752/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
755 Instruction::BinaryOps InnerOpcode, Value *A,
756 Value *B, Value *C, Value *D) {
757 assert(A && B && C && D && "All values must be provided");
758
759 Value *V = nullptr;
760 Value *RetVal = nullptr;
761 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
762 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
763
764 // Does "X op' Y" always equal "Y op' X"?
765 bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
766
767 // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
768 if (leftDistributesOverRight(InnerOpcode, TopLevelOpcode)) {
769 // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
770 // commutative case, "(A op' B) op (C op' A)"?
771 if (A == C || (InnerCommutative && A == D)) {
772 if (A != C)
773 std::swap(C, D);
774 // Consider forming "A op' (B op D)".
775 // If "B op D" simplifies then it can be formed with no cost.
776 V = simplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
777
778 // If "B op D" doesn't simplify then only go on if one of the existing
779 // operations "A op' B" and "C op' D" will be zapped as no longer used.
780 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
781 V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
782 if (V)
783 RetVal = Builder.CreateBinOp(InnerOpcode, A, V);
784 }
785 }
786
787 // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
788 if (!RetVal && rightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) {
789 // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
790 // commutative case, "(A op' B) op (B op' D)"?
791 if (B == D || (InnerCommutative && B == C)) {
792 if (B != D)
793 std::swap(C, D);
794 // Consider forming "(A op C) op' B".
795 // If "A op C" simplifies then it can be formed with no cost.
796 V = simplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
797
798 // If "A op C" doesn't simplify then only go on if one of the existing
799 // operations "A op' B" and "C op' D" will be zapped as no longer used.
800 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
801 V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
802 if (V)
803 RetVal = Builder.CreateBinOp(InnerOpcode, V, B);
804 }
805 }
806
807 if (!RetVal)
808 return nullptr;
809
810 ++NumFactor;
811 RetVal->takeName(&I);
812
813 // Try to add no-overflow flags to the final value.
814 if (isa<BinaryOperator>(RetVal)) {
815 bool HasNSW = false;
816 bool HasNUW = false;
818 HasNSW = I.hasNoSignedWrap();
819 HasNUW = I.hasNoUnsignedWrap();
820 }
821 if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) {
822 HasNSW &= LOBO->hasNoSignedWrap();
823 HasNUW &= LOBO->hasNoUnsignedWrap();
824 }
825
826 if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) {
827 HasNSW &= ROBO->hasNoSignedWrap();
828 HasNUW &= ROBO->hasNoUnsignedWrap();
829 }
830
831 if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) {
832 // We can propagate 'nsw' if we know that
833 // %Y = mul nsw i16 %X, C
834 // %Z = add nsw i16 %Y, %X
835 // =>
836 // %Z = mul nsw i16 %X, C+1
837 //
838 // iff C+1 isn't INT_MIN
839 const APInt *CInt;
840 if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue())
841 cast<Instruction>(RetVal)->setHasNoSignedWrap(HasNSW);
842
843 // nuw can be propagated with any constant or nuw value.
844 cast<Instruction>(RetVal)->setHasNoUnsignedWrap(HasNUW);
845 }
846 }
847 return RetVal;
848}
849
850// If `I` has one Const operand and the other matches `(ctpop (not x))`,
851// replace `(ctpop (not x))` with `(sub nuw nsw BitWidth(x), (ctpop x))`.
852// This is only useful is the new subtract can fold so we only handle the
853// following cases:
854// 1) (add/sub/disjoint_or C, (ctpop (not x))
855// -> (add/sub/disjoint_or C', (ctpop x))
856// 1) (cmp pred C, (ctpop (not x))
857// -> (cmp pred C', (ctpop x))
859 unsigned Opc = I->getOpcode();
860 unsigned ConstIdx = 1;
861 switch (Opc) {
862 default:
863 return nullptr;
864 // (ctpop (not x)) <-> (sub nuw nsw BitWidth(x) - (ctpop x))
865 // We can fold the BitWidth(x) with add/sub/icmp as long the other operand
866 // is constant.
867 case Instruction::Sub:
868 ConstIdx = 0;
869 break;
870 case Instruction::ICmp:
871 // Signed predicates aren't correct in some edge cases like for i2 types, as
872 // well since (ctpop x) is known [0, log2(BitWidth(x))] almost all signed
873 // comparisons against it are simplfied to unsigned.
874 if (cast<ICmpInst>(I)->isSigned())
875 return nullptr;
876 break;
877 case Instruction::Or:
878 if (!match(I, m_DisjointOr(m_Value(), m_Value())))
879 return nullptr;
880 [[fallthrough]];
881 case Instruction::Add:
882 break;
883 }
884
885 Value *Op;
886 // Find ctpop.
887 if (!match(I->getOperand(1 - ConstIdx), m_OneUse(m_Ctpop(m_Value(Op)))))
888 return nullptr;
889
890 Constant *C;
891 // Check other operand is ImmConstant.
892 if (!match(I->getOperand(ConstIdx), m_ImmConstant(C)))
893 return nullptr;
894
895 Type *Ty = Op->getType();
896 Constant *BitWidthC = ConstantInt::get(Ty, Ty->getScalarSizeInBits());
897 // Need extra check for icmp. Note if this check is true, it generally means
898 // the icmp will simplify to true/false.
899 if (Opc == Instruction::ICmp && !cast<ICmpInst>(I)->isEquality()) {
900 Constant *Cmp =
902 if (!Cmp || !Cmp->isNullValue())
903 return nullptr;
904 }
905
906 // Check we can invert `(not x)` for free.
907 bool Consumes = false;
908 if (!isFreeToInvert(Op, Op->hasOneUse(), Consumes) || !Consumes)
909 return nullptr;
910 Value *NotOp = getFreelyInverted(Op, Op->hasOneUse(), &Builder);
911 assert(NotOp != nullptr &&
912 "Desync between isFreeToInvert and getFreelyInverted");
913
914 Value *CtpopOfNotOp = Builder.CreateIntrinsic(Ty, Intrinsic::ctpop, NotOp);
915
916 Value *R = nullptr;
917
918 // Do the transformation here to avoid potentially introducing an infinite
919 // loop.
920 switch (Opc) {
921 case Instruction::Sub:
922 R = Builder.CreateAdd(CtpopOfNotOp, ConstantExpr::getSub(C, BitWidthC));
923 break;
924 case Instruction::Or:
925 case Instruction::Add:
926 R = Builder.CreateSub(ConstantExpr::getAdd(C, BitWidthC), CtpopOfNotOp);
927 break;
928 case Instruction::ICmp:
929 R = Builder.CreateICmp(cast<ICmpInst>(I)->getSwappedPredicate(),
930 CtpopOfNotOp, ConstantExpr::getSub(BitWidthC, C));
931 break;
932 default:
933 llvm_unreachable("Unhandled Opcode");
934 }
935 assert(R != nullptr);
936 return replaceInstUsesWith(*I, R);
937}
938
939// (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C))
940// IFF
941// 1) the logic_shifts match
942// 2) either both binops are binops and one is `and` or
943// BinOp1 is `and`
944// (logic_shift (inv_logic_shift C1, C), C) == C1 or
945//
946// -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C)
947//
948// (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt))
949// IFF
950// 1) the logic_shifts match
951// 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`).
952//
953// -> (BinOp (logic_shift (BinOp X, Y)), Mask)
954//
955// (Binop1 (Binop2 (arithmetic_shift X, Amt), Mask), (arithmetic_shift Y, Amt))
956// IFF
957// 1) Binop1 is bitwise logical operator `and`, `or` or `xor`
958// 2) Binop2 is `not`
959//
960// -> (arithmetic_shift Binop1((not X), Y), Amt)
961
963 const DataLayout &DL = I.getDataLayout();
964 auto IsValidBinOpc = [](unsigned Opc) {
965 switch (Opc) {
966 default:
967 return false;
968 case Instruction::And:
969 case Instruction::Or:
970 case Instruction::Xor:
971 case Instruction::Add:
972 // Skip Sub as we only match constant masks which will canonicalize to use
973 // add.
974 return true;
975 }
976 };
977
978 // Check if we can distribute binop arbitrarily. `add` + `lshr` has extra
979 // constraints.
980 auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2,
981 unsigned ShOpc) {
982 assert(ShOpc != Instruction::AShr);
983 return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) ||
984 ShOpc == Instruction::Shl;
985 };
986
987 auto GetInvShift = [](unsigned ShOpc) {
988 assert(ShOpc != Instruction::AShr);
989 return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr;
990 };
991
992 auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2,
993 unsigned ShOpc, Constant *CMask,
994 Constant *CShift) {
995 // If the BinOp1 is `and` we don't need to check the mask.
996 if (BinOpc1 == Instruction::And)
997 return true;
998
999 // For all other possible transfers we need complete distributable
1000 // binop/shift (anything but `add` + `lshr`).
1001 if (!IsCompletelyDistributable(BinOpc1, BinOpc2, ShOpc))
1002 return false;
1003
1004 // If BinOp2 is `and`, any mask works (this only really helps for non-splat
1005 // vecs, otherwise the mask will be simplified and the following check will
1006 // handle it).
1007 if (BinOpc2 == Instruction::And)
1008 return true;
1009
1010 // Otherwise, need mask that meets the below requirement.
1011 // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask
1012 Constant *MaskInvShift =
1013 ConstantFoldBinaryOpOperands(GetInvShift(ShOpc), CMask, CShift, DL);
1014 return ConstantFoldBinaryOpOperands(ShOpc, MaskInvShift, CShift, DL) ==
1015 CMask;
1016 };
1017
1018 auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * {
1019 Constant *CMask, *CShift;
1020 Value *X, *Y, *ShiftedX, *Mask, *Shift;
1021 if (!match(I.getOperand(ShOpnum),
1022 m_OneUse(m_Shift(m_Value(Y), m_Value(Shift)))))
1023 return nullptr;
1024 if (!match(
1025 I.getOperand(1 - ShOpnum),
1028 m_Value(ShiftedX)),
1029 m_Value(Mask)))))
1030 return nullptr;
1031 // Make sure we are matching instruction shifts and not ConstantExpr
1032 auto *IY = dyn_cast<Instruction>(I.getOperand(ShOpnum));
1033 auto *IX = dyn_cast<Instruction>(ShiftedX);
1034 if (!IY || !IX)
1035 return nullptr;
1036
1037 // LHS and RHS need same shift opcode
1038 unsigned ShOpc = IY->getOpcode();
1039 if (ShOpc != IX->getOpcode())
1040 return nullptr;
1041
1042 // Make sure binop is real instruction and not ConstantExpr
1043 auto *BO2 = dyn_cast<Instruction>(I.getOperand(1 - ShOpnum));
1044 if (!BO2)
1045 return nullptr;
1046
1047 unsigned BinOpc = BO2->getOpcode();
1048 // Make sure we have valid binops.
1049 if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc))
1050 return nullptr;
1051
1052 if (ShOpc == Instruction::AShr) {
1053 if (Instruction::isBitwiseLogicOp(I.getOpcode()) &&
1054 BinOpc == Instruction::Xor && match(Mask, m_AllOnes())) {
1055 Value *NotX = Builder.CreateNot(X);
1056 Value *NewBinOp = Builder.CreateBinOp(I.getOpcode(), Y, NotX);
1058 static_cast<Instruction::BinaryOps>(ShOpc), NewBinOp, Shift);
1059 }
1060
1061 return nullptr;
1062 }
1063
1064 // If BinOp1 == BinOp2 and it's bitwise or shl with add, then just
1065 // distribute to drop the shift irrelevant of constants.
1066 if (BinOpc == I.getOpcode() &&
1067 IsCompletelyDistributable(I.getOpcode(), BinOpc, ShOpc)) {
1068 Value *NewBinOp2 = Builder.CreateBinOp(I.getOpcode(), X, Y);
1069 Value *NewBinOp1 = Builder.CreateBinOp(
1070 static_cast<Instruction::BinaryOps>(ShOpc), NewBinOp2, Shift);
1071 return BinaryOperator::Create(I.getOpcode(), NewBinOp1, Mask);
1072 }
1073
1074 // Otherwise we can only distribute by constant shifting the mask, so
1075 // ensure we have constants.
1076 if (!match(Shift, m_ImmConstant(CShift)))
1077 return nullptr;
1078 if (!match(Mask, m_ImmConstant(CMask)))
1079 return nullptr;
1080
1081 // Check if we can distribute the binops.
1082 if (!CanDistributeBinops(I.getOpcode(), BinOpc, ShOpc, CMask, CShift))
1083 return nullptr;
1084
1085 Constant *NewCMask =
1086 ConstantFoldBinaryOpOperands(GetInvShift(ShOpc), CMask, CShift, DL);
1087 Value *NewBinOp2 = Builder.CreateBinOp(
1088 static_cast<Instruction::BinaryOps>(BinOpc), X, NewCMask);
1089 Value *NewBinOp1 = Builder.CreateBinOp(I.getOpcode(), Y, NewBinOp2);
1090 return BinaryOperator::Create(static_cast<Instruction::BinaryOps>(ShOpc),
1091 NewBinOp1, CShift);
1092 };
1093
1094 if (Instruction *R = MatchBinOp(0))
1095 return R;
1096 return MatchBinOp(1);
1097}
1098
1099// (Binop (zext C), (select C, T, F))
1100// -> (select C, (binop 1, T), (binop 0, F))
1101//
1102// (Binop (sext C), (select C, T, F))
1103// -> (select C, (binop -1, T), (binop 0, F))
1104//
1105// Attempt to simplify binary operations into a select with folded args, when
1106// one operand of the binop is a select instruction and the other operand is a
1107// zext/sext extension, whose value is the select condition.
1110 // TODO: this simplification may be extended to any speculatable instruction,
1111 // not just binops, and would possibly be handled better in FoldOpIntoSelect.
1112 Instruction::BinaryOps Opc = I.getOpcode();
1113 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1114 Value *A, *CondVal, *TrueVal, *FalseVal;
1115 Value *CastOp;
1116 Constant *CastTrueVal, *CastFalseVal;
1117
1118 auto MatchSelectAndCast = [&](Value *CastOp, Value *SelectOp) {
1119 return match(CastOp, m_SelectLike(m_Value(A), m_Constant(CastTrueVal),
1120 m_Constant(CastFalseVal))) &&
1121 match(SelectOp, m_Select(m_Value(CondVal), m_Value(TrueVal),
1122 m_Value(FalseVal)));
1123 };
1124
1125 // Make sure one side of the binop is a select instruction, and the other is a
1126 // zero/sign extension operating on a i1.
1127 if (MatchSelectAndCast(LHS, RHS))
1128 CastOp = LHS;
1129 else if (MatchSelectAndCast(RHS, LHS))
1130 CastOp = RHS;
1131 else
1132 return nullptr;
1133
1135 ? nullptr
1136 : cast<SelectInst>(CastOp == LHS ? RHS : LHS);
1137
1138 auto NewFoldedConst = [&](bool IsTrueArm, Value *V) {
1139 bool IsCastOpRHS = (CastOp == RHS);
1140 Value *CastVal = IsTrueArm ? CastFalseVal : CastTrueVal;
1141
1142 return IsCastOpRHS ? Builder.CreateBinOp(Opc, V, CastVal)
1143 : Builder.CreateBinOp(Opc, CastVal, V);
1144 };
1145
1146 // If the value used in the zext/sext is the select condition, or the negated
1147 // of the select condition, the binop can be simplified.
1148 if (CondVal == A) {
1149 Value *NewTrueVal = NewFoldedConst(false, TrueVal);
1150 return SelectInst::Create(CondVal, NewTrueVal,
1151 NewFoldedConst(true, FalseVal), "", nullptr, SI);
1152 }
1153 if (match(A, m_Not(m_Specific(CondVal)))) {
1154 Value *NewTrueVal = NewFoldedConst(true, TrueVal);
1155 return SelectInst::Create(CondVal, NewTrueVal,
1156 NewFoldedConst(false, FalseVal), "", nullptr, SI);
1157 }
1158
1159 return nullptr;
1160}
1161
1163 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1166 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1167 Value *A, *B, *C, *D;
1168 Instruction::BinaryOps LHSOpcode, RHSOpcode;
1169
1170 if (Op0)
1171 LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B, Op1);
1172 if (Op1)
1173 RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D, Op0);
1174
1175 // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
1176 // a common term.
1177 if (Op0 && Op1 && LHSOpcode == RHSOpcode)
1178 if (Value *V = tryFactorization(I, SQ, Builder, LHSOpcode, A, B, C, D))
1179 return V;
1180
1181 // The instruction has the form "(A op' B) op (C)". Try to factorize common
1182 // term.
1183 if (Op0)
1184 if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
1185 if (Value *V =
1186 tryFactorization(I, SQ, Builder, LHSOpcode, A, B, RHS, Ident))
1187 return V;
1188
1189 // The instruction has the form "(B) op (C op' D)". Try to factorize common
1190 // term.
1191 if (Op1)
1192 if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
1193 if (Value *V =
1194 tryFactorization(I, SQ, Builder, RHSOpcode, LHS, Ident, C, D))
1195 return V;
1196
1197 return nullptr;
1198}
1199
1200/// This tries to simplify binary operations which some other binary operation
1201/// distributes over either by factorizing out common terms
1202/// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
1203/// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
1204/// Returns the simplified value, or null if it didn't simplify.
1206 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1209 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1210
1211 // Factorization.
1212 if (Value *R = tryFactorizationFolds(I))
1213 return R;
1214
1215 // Expansion.
1216 if (Op0 && rightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
1217 // The instruction has the form "(A op' B) op C". See if expanding it out
1218 // to "(A op C) op' (B op C)" results in simplifications.
1219 Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
1220 Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
1221
1222 // Disable the use of undef because it's not safe to distribute undef.
1223 auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
1224 Value *L = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
1225 Value *R = simplifyBinOp(TopLevelOpcode, B, C, SQDistributive);
1226
1227 // Do "A op C" and "B op C" both simplify?
1228 if (L && R) {
1229 // They do! Return "L op' R".
1230 ++NumExpand;
1231 C = Builder.CreateBinOp(InnerOpcode, L, R);
1232 C->takeName(&I);
1233 return C;
1234 }
1235
1236 // Does "A op C" simplify to the identity value for the inner opcode?
1237 if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
1238 // They do! Return "B op C".
1239 ++NumExpand;
1240 C = Builder.CreateBinOp(TopLevelOpcode, B, C);
1241 C->takeName(&I);
1242 return C;
1243 }
1244
1245 // Does "B op C" simplify to the identity value for the inner opcode?
1246 if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
1247 // They do! Return "A op C".
1248 ++NumExpand;
1249 C = Builder.CreateBinOp(TopLevelOpcode, A, C);
1250 C->takeName(&I);
1251 return C;
1252 }
1253 }
1254
1255 if (Op1 && leftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
1256 // The instruction has the form "A op (B op' C)". See if expanding it out
1257 // to "(A op B) op' (A op C)" results in simplifications.
1258 Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
1259 Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
1260
1261 // Disable the use of undef because it's not safe to distribute undef.
1262 auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
1263 Value *L = simplifyBinOp(TopLevelOpcode, A, B, SQDistributive);
1264 Value *R = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
1265
1266 // Do "A op B" and "A op C" both simplify?
1267 if (L && R) {
1268 // They do! Return "L op' R".
1269 ++NumExpand;
1270 A = Builder.CreateBinOp(InnerOpcode, L, R);
1271 A->takeName(&I);
1272 return A;
1273 }
1274
1275 // Does "A op B" simplify to the identity value for the inner opcode?
1276 if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
1277 // They do! Return "A op C".
1278 ++NumExpand;
1279 A = Builder.CreateBinOp(TopLevelOpcode, A, C);
1280 A->takeName(&I);
1281 return A;
1282 }
1283
1284 // Does "A op C" simplify to the identity value for the inner opcode?
1285 if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
1286 // They do! Return "A op B".
1287 ++NumExpand;
1288 A = Builder.CreateBinOp(TopLevelOpcode, A, B);
1289 A->takeName(&I);
1290 return A;
1291 }
1292 }
1293
1294 return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
1295}
1296
1297static std::optional<std::pair<Value *, Value *>>
1299 if (LHS->getParent() != RHS->getParent())
1300 return std::nullopt;
1301
1302 if (LHS->getNumIncomingValues() < 2)
1303 return std::nullopt;
1304
1305 if (!equal(LHS->blocks(), RHS->blocks()))
1306 return std::nullopt;
1307
1308 Value *L0 = LHS->getIncomingValue(0);
1309 Value *R0 = RHS->getIncomingValue(0);
1310
1311 for (unsigned I = 1, E = LHS->getNumIncomingValues(); I != E; ++I) {
1312 Value *L1 = LHS->getIncomingValue(I);
1313 Value *R1 = RHS->getIncomingValue(I);
1314
1315 if ((L0 == L1 && R0 == R1) || (L0 == R1 && R0 == L1))
1316 continue;
1317
1318 return std::nullopt;
1319 }
1320
1321 return std::optional(std::pair(L0, R0));
1322}
1323
1324std::optional<std::pair<Value *, Value *>>
1325InstCombinerImpl::matchSymmetricPair(Value *LHS, Value *RHS) {
1328 if (!LHSInst || !RHSInst || LHSInst->getOpcode() != RHSInst->getOpcode())
1329 return std::nullopt;
1330 switch (LHSInst->getOpcode()) {
1331 case Instruction::PHI:
1333 case Instruction::Select: {
1334 Value *Cond = LHSInst->getOperand(0);
1335 Value *TrueVal = LHSInst->getOperand(1);
1336 Value *FalseVal = LHSInst->getOperand(2);
1337 if (Cond == RHSInst->getOperand(0) && TrueVal == RHSInst->getOperand(2) &&
1338 FalseVal == RHSInst->getOperand(1))
1339 return std::pair(TrueVal, FalseVal);
1340 return std::nullopt;
1341 }
1342 case Instruction::Call: {
1343 // Match min(a, b) and max(a, b)
1344 MinMaxIntrinsic *LHSMinMax = dyn_cast<MinMaxIntrinsic>(LHSInst);
1345 MinMaxIntrinsic *RHSMinMax = dyn_cast<MinMaxIntrinsic>(RHSInst);
1346 if (LHSMinMax && RHSMinMax &&
1347 LHSMinMax->getPredicate() ==
1349 ((LHSMinMax->getLHS() == RHSMinMax->getLHS() &&
1350 LHSMinMax->getRHS() == RHSMinMax->getRHS()) ||
1351 (LHSMinMax->getLHS() == RHSMinMax->getRHS() &&
1352 LHSMinMax->getRHS() == RHSMinMax->getLHS())))
1353 return std::pair(LHSMinMax->getLHS(), LHSMinMax->getRHS());
1354 return std::nullopt;
1355 }
1356 default:
1357 return std::nullopt;
1358 }
1359}
1360
1362 Value *LHS,
1363 Value *RHS) {
1364 Value *A, *B, *C, *D, *E, *F;
1365 bool LHSIsSelect = match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C)));
1366 bool RHSIsSelect = match(RHS, m_Select(m_Value(D), m_Value(E), m_Value(F)));
1367 if (!LHSIsSelect && !RHSIsSelect)
1368 return nullptr;
1369
1371 ? nullptr
1372 : cast<SelectInst>(LHSIsSelect ? LHS : RHS);
1373
1374 FastMathFlags FMF;
1376 if (const auto *FPOp = dyn_cast<FPMathOperator>(&I)) {
1377 FMF = FPOp->getFastMathFlags();
1378 Builder.setFastMathFlags(FMF);
1379 }
1380
1381 Instruction::BinaryOps Opcode = I.getOpcode();
1382 SimplifyQuery Q = SQ.getWithInstruction(&I);
1383
1384 Value *Cond, *True = nullptr, *False = nullptr;
1385
1386 // Special-case for add/negate combination. Replace the zero in the negation
1387 // with the trailing add operand:
1388 // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N)
1389 // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False
1390 auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * {
1391 // We need an 'add' and exactly 1 arm of the select to have been simplified.
1392 if (Opcode != Instruction::Add || (!True && !False) || (True && False))
1393 return nullptr;
1394 Value *N;
1395 if (True && match(FVal, m_Neg(m_Value(N)))) {
1396 Value *Sub = Builder.CreateSub(Z, N);
1397 return Builder.CreateSelect(Cond, True, Sub, I.getName(), SI);
1398 }
1399 if (False && match(TVal, m_Neg(m_Value(N)))) {
1400 Value *Sub = Builder.CreateSub(Z, N);
1401 return Builder.CreateSelect(Cond, Sub, False, I.getName(), SI);
1402 }
1403 return nullptr;
1404 };
1405
1406 if (LHSIsSelect && RHSIsSelect && A == D) {
1407 // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
1408 Cond = A;
1409 True = simplifyBinOp(Opcode, B, E, FMF, Q);
1410 False = simplifyBinOp(Opcode, C, F, FMF, Q);
1411
1412 if (LHS->hasOneUse() && RHS->hasOneUse()) {
1413 if (False && !True)
1414 True = Builder.CreateBinOp(Opcode, B, E);
1415 else if (True && !False)
1416 False = Builder.CreateBinOp(Opcode, C, F);
1417 }
1418 } else if (LHSIsSelect && LHS->hasOneUse()) {
1419 // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
1420 Cond = A;
1421 True = simplifyBinOp(Opcode, B, RHS, FMF, Q);
1422 False = simplifyBinOp(Opcode, C, RHS, FMF, Q);
1423 if (Value *NewSel = foldAddNegate(B, C, RHS))
1424 return NewSel;
1425 } else if (RHSIsSelect && RHS->hasOneUse()) {
1426 // X op (D ? E : F) -> D ? (X op E) : (X op F)
1427 Cond = D;
1428 True = simplifyBinOp(Opcode, LHS, E, FMF, Q);
1429 False = simplifyBinOp(Opcode, LHS, F, FMF, Q);
1430 if (Value *NewSel = foldAddNegate(E, F, LHS))
1431 return NewSel;
1432 }
1433
1434 if (!True || !False)
1435 return nullptr;
1436
1437 Value *NewSI = Builder.CreateSelect(Cond, True, False, I.getName(), SI);
1438 NewSI->takeName(&I);
1439 return NewSI;
1440}
1441
1442/// Freely adapt every user of V as-if V was changed to !V.
1443/// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
1445 assert(!isa<Constant>(I) && "Shouldn't invert users of constant");
1446 for (User *U : make_early_inc_range(I->users())) {
1447 if (U == IgnoredUser)
1448 continue; // Don't consider this user.
1449 switch (cast<Instruction>(U)->getOpcode()) {
1450 case Instruction::Select: {
1451 auto *SI = cast<SelectInst>(U);
1452 SI->swapValues();
1453 SI->swapProfMetadata();
1454 break;
1455 }
1456 case Instruction::CondBr: {
1458 BI->swapSuccessors(); // swaps prof metadata too
1459 if (BPI)
1460 BPI->swapSuccEdgesProbabilities(BI->getParent());
1461 break;
1462 }
1463 case Instruction::Xor:
1465 // Add to worklist for DCE.
1467 break;
1468 default:
1469 llvm_unreachable("Got unexpected user - out of sync with "
1470 "canFreelyInvertAllUsersOf() ?");
1471 }
1472 }
1473
1474 // Update pre-existing debug value uses.
1475 SmallVector<DbgVariableRecord *, 4> DbgVariableRecords;
1476 llvm::findDbgValues(I, DbgVariableRecords);
1477
1478 for (DbgVariableRecord *DbgVal : DbgVariableRecords) {
1479 SmallVector<uint64_t, 1> Ops = {dwarf::DW_OP_not};
1480 for (unsigned Idx = 0, End = DbgVal->getNumVariableLocationOps();
1481 Idx != End; ++Idx)
1482 if (DbgVal->getVariableLocationOp(Idx) == I)
1483 DbgVal->setExpression(
1484 DIExpression::appendOpsToArg(DbgVal->getExpression(), Ops, Idx));
1485 }
1486}
1487
1488/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
1489/// constant zero (which is the 'negate' form).
1490Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
1491 Value *NegV;
1492 if (match(V, m_Neg(m_Value(NegV))))
1493 return NegV;
1494
1495 // Constants can be considered to be negated values if they can be folded.
1497 return ConstantExpr::getNeg(C);
1498
1500 if (C->getType()->getElementType()->isIntegerTy())
1501 return ConstantExpr::getNeg(C);
1502
1504 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
1505 Constant *Elt = CV->getAggregateElement(i);
1506 if (!Elt)
1507 return nullptr;
1508
1509 if (isa<UndefValue>(Elt))
1510 continue;
1511
1512 if (!isa<ConstantInt>(Elt))
1513 return nullptr;
1514 }
1515 return ConstantExpr::getNeg(CV);
1516 }
1517
1518 // Negate integer vector splats.
1519 if (auto *CV = dyn_cast<Constant>(V))
1520 if (CV->getType()->isVectorTy() &&
1521 CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
1522 return ConstantExpr::getNeg(CV);
1523
1524 return nullptr;
1525}
1526
1527// Try to fold:
1528// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1529// -> ({s|u}itofp (int_binop x, y))
1530// 2) (fp_binop ({s|u}itofp x), FpC)
1531// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1532//
1533// Assuming the sign of the cast for x/y is `OpsFromSigned`.
1534Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign(
1535 BinaryOperator &BO, bool OpsFromSigned, std::array<Value *, 2> IntOps,
1537
1538 Type *FPTy = BO.getType();
1539 Type *IntTy = IntOps[0]->getType();
1540
1541 unsigned IntSz = IntTy->getScalarSizeInBits();
1542 // This is the maximum number of inuse bits by the integer where the int -> fp
1543 // casts are exact.
1544 unsigned MaxRepresentableBits =
1546
1547 // Preserve known number of leading bits. This can allow us to trivial nsw/nuw
1548 // checks later on.
1549 unsigned NumUsedLeadingBits[2] = {IntSz, IntSz};
1550
1551 // NB: This only comes up if OpsFromSigned is true, so there is no need to
1552 // cache if between calls to `foldFBinOpOfIntCastsFromSign`.
1553 auto IsNonZero = [&](unsigned OpNo) -> bool {
1554 if (OpsKnown[OpNo].hasKnownBits() &&
1555 OpsKnown[OpNo].getKnownBits(SQ).isNonZero())
1556 return true;
1557 return isKnownNonZero(IntOps[OpNo], SQ);
1558 };
1559
1560 auto IsNonNeg = [&](unsigned OpNo) -> bool {
1561 // NB: This matches the impl in ValueTracking, we just try to use cached
1562 // knownbits here. If we ever start supporting WithCache for
1563 // `isKnownNonNegative`, change this to an explicit call.
1564 return OpsKnown[OpNo].getKnownBits(SQ).isNonNegative();
1565 };
1566
1567 // Check if we know for certain that ({s|u}itofp op) is exact.
1568 auto IsValidPromotion = [&](unsigned OpNo) -> bool {
1569 // Can we treat this operand as the desired sign?
1570 if (OpsFromSigned != isa<SIToFPInst>(BO.getOperand(OpNo)) &&
1571 !IsNonNeg(OpNo))
1572 return false;
1573
1574 // If fp precision >= bitwidth(op) then its exact.
1575 // NB: This is slightly conservative for `sitofp`. For signed conversion, we
1576 // can handle `MaxRepresentableBits == IntSz - 1` as the sign bit will be
1577 // handled specially. We can't, however, increase the bound arbitrarily for
1578 // `sitofp` as for larger sizes, it won't sign extend.
1579 if (MaxRepresentableBits < IntSz) {
1580 // Otherwise if its signed cast check that fp precisions >= bitwidth(op) -
1581 // numSignBits(op).
1582 // TODO: If we add support for `WithCache` in `ComputeNumSignBits`, change
1583 // `IntOps[OpNo]` arguments to `KnownOps[OpNo]`.
1584 if (OpsFromSigned)
1585 NumUsedLeadingBits[OpNo] = IntSz - ComputeNumSignBits(IntOps[OpNo]);
1586 // Finally for unsigned check that fp precision >= bitwidth(op) -
1587 // numLeadingZeros(op).
1588 else {
1589 NumUsedLeadingBits[OpNo] =
1590 IntSz - OpsKnown[OpNo].getKnownBits(SQ).countMinLeadingZeros();
1591 }
1592 }
1593 // NB: We could also check if op is known to be a power of 2 or zero (which
1594 // will always be representable). Its unlikely, however, that is we are
1595 // unable to bound op in any way we will be able to pass the overflow checks
1596 // later on.
1597
1598 if (MaxRepresentableBits < NumUsedLeadingBits[OpNo])
1599 return false;
1600 // Signed + Mul also requires that op is non-zero to avoid -0 cases.
1601 return !OpsFromSigned || BO.getOpcode() != Instruction::FMul ||
1602 IsNonZero(OpNo);
1603 };
1604
1605 // If we have a constant rhs, see if we can losslessly convert it to an int.
1606 if (Op1FpC != nullptr) {
1607 // Signed + Mul req non-zero
1608 if (OpsFromSigned && BO.getOpcode() == Instruction::FMul &&
1609 !match(Op1FpC, m_NonZeroFP()))
1610 return nullptr;
1611
1613 OpsFromSigned ? Instruction::FPToSI : Instruction::FPToUI, Op1FpC,
1614 IntTy, DL);
1615 if (Op1IntC == nullptr)
1616 return nullptr;
1617 if (ConstantFoldCastOperand(OpsFromSigned ? Instruction::SIToFP
1618 : Instruction::UIToFP,
1619 Op1IntC, FPTy, DL) != Op1FpC)
1620 return nullptr;
1621
1622 // First try to keep sign of cast the same.
1623 IntOps[1] = Op1IntC;
1624 }
1625
1626 // Ensure lhs/rhs integer types match.
1627 if (IntTy != IntOps[1]->getType())
1628 return nullptr;
1629
1630 if (Op1FpC == nullptr) {
1631 if (!IsValidPromotion(1))
1632 return nullptr;
1633 }
1634 if (!IsValidPromotion(0))
1635 return nullptr;
1636
1637 // Final we check if the integer version of the binop will not overflow.
1639 // Because of the precision check, we can often rule out overflows.
1640 bool NeedsOverflowCheck = true;
1641 // Try to conservatively rule out overflow based on the already done precision
1642 // checks.
1643 unsigned OverflowMaxOutputBits = OpsFromSigned ? 2 : 1;
1644 unsigned OverflowMaxCurBits =
1645 std::max(NumUsedLeadingBits[0], NumUsedLeadingBits[1]);
1646 bool OutputSigned = OpsFromSigned;
1647 switch (BO.getOpcode()) {
1648 case Instruction::FAdd:
1649 IntOpc = Instruction::Add;
1650 OverflowMaxOutputBits += OverflowMaxCurBits;
1651 break;
1652 case Instruction::FSub:
1653 IntOpc = Instruction::Sub;
1654 OverflowMaxOutputBits += OverflowMaxCurBits;
1655 break;
1656 case Instruction::FMul:
1657 IntOpc = Instruction::Mul;
1658 OverflowMaxOutputBits += OverflowMaxCurBits * 2;
1659 break;
1660 default:
1661 llvm_unreachable("Unsupported binop");
1662 }
1663 // The precision check may have already ruled out overflow.
1664 if (OverflowMaxOutputBits < IntSz) {
1665 NeedsOverflowCheck = false;
1666 // We can bound unsigned overflow from sub to in range signed value (this is
1667 // what allows us to avoid the overflow check for sub).
1668 if (IntOpc == Instruction::Sub)
1669 OutputSigned = true;
1670 }
1671
1672 // Precision check did not rule out overflow, so need to check.
1673 // TODO: If we add support for `WithCache` in `willNotOverflow`, change
1674 // `IntOps[...]` arguments to `KnownOps[...]`.
1675 if (NeedsOverflowCheck &&
1676 !willNotOverflow(IntOpc, IntOps[0], IntOps[1], BO, OutputSigned))
1677 return nullptr;
1678
1679 Value *IntBinOp = Builder.CreateBinOp(IntOpc, IntOps[0], IntOps[1]);
1680 if (auto *IntBO = dyn_cast<BinaryOperator>(IntBinOp)) {
1681 IntBO->setHasNoSignedWrap(OutputSigned);
1682 IntBO->setHasNoUnsignedWrap(!OutputSigned);
1683 }
1684 if (OutputSigned)
1685 return new SIToFPInst(IntBinOp, FPTy);
1686 return new UIToFPInst(IntBinOp, FPTy);
1687}
1688
1689// Try to fold:
1690// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1691// -> ({s|u}itofp (int_binop x, y))
1692// 2) (fp_binop ({s|u}itofp x), FpC)
1693// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1694Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
1695 // Don't perform the fold on vectors, as the integer operation may be much
1696 // more expensive than the float operation in that case.
1697 if (BO.getType()->isVectorTy())
1698 return nullptr;
1699
1700 std::array<Value *, 2> IntOps = {nullptr, nullptr};
1701 Constant *Op1FpC = nullptr;
1702 // Check for:
1703 // 1) (binop ({s|u}itofp x), ({s|u}itofp y))
1704 // 2) (binop ({s|u}itofp x), FpC)
1705 if (!match(BO.getOperand(0), m_IToFP(m_Value(IntOps[0]))))
1706 return nullptr;
1707
1708 if (!match(BO.getOperand(1), m_Constant(Op1FpC)) &&
1709 !match(BO.getOperand(1), m_IToFP(m_Value(IntOps[1]))))
1710 return nullptr;
1711
1712 // Cache KnownBits a bit to potentially save some analysis.
1713 SmallVector<WithCache<const Value *>, 2> OpsKnown = {IntOps[0], IntOps[1]};
1714
1715 // Try treating x/y as coming from both `uitofp` and `sitofp`. There are
1716 // different constraints depending on the sign of the cast.
1717 // NB: `(uitofp nneg X)` == `(sitofp nneg X)`.
1718 if (Instruction *R = foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/false,
1719 IntOps, Op1FpC, OpsKnown))
1720 return R;
1721 return foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/true, IntOps,
1722 Op1FpC, OpsKnown);
1723}
1724
1725/// A binop with a constant operand and a sign-extended boolean operand may be
1726/// converted into a select of constants by applying the binary operation to
1727/// the constant with the two possible values of the extended boolean (0 or -1).
1728Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
1729 // TODO: Handle non-commutative binop (constant is operand 0).
1730 // TODO: Handle zext.
1731 // TODO: Peek through 'not' of cast.
1732 Value *BO0 = BO.getOperand(0);
1733 Value *BO1 = BO.getOperand(1);
1734 Value *X;
1735 Constant *C;
1736 if (!match(BO0, m_SExt(m_Value(X))) || !match(BO1, m_ImmConstant(C)) ||
1737 !X->getType()->isIntOrIntVectorTy(1))
1738 return nullptr;
1739
1740 // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
1743 Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C);
1744 Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C);
1745 return createSelectInstWithUnknownProfile(X, TVal, FVal);
1746}
1747
1749 bool IsTrueArm) {
1751 for (Value *Op : I.operands()) {
1752 Value *V = nullptr;
1753 if (Op == SI) {
1754 V = IsTrueArm ? SI->getTrueValue() : SI->getFalseValue();
1755 } else if (match(SI->getCondition(),
1758 m_Specific(Op), m_Value(V))) &&
1760 // Pass
1761 } else if (match(Op, m_ZExt(m_Specific(SI->getCondition())))) {
1762 V = IsTrueArm ? ConstantInt::get(Op->getType(), 1)
1763 : ConstantInt::getNullValue(Op->getType());
1764 } else {
1765 V = Op;
1766 }
1767 Ops.push_back(V);
1768 }
1769
1770 return simplifyInstructionWithOperands(&I, Ops, I.getDataLayout());
1771}
1772
1774 Value *NewOp, InstCombiner &IC) {
1775 Instruction *Clone = I.clone();
1776 Clone->replaceUsesOfWith(SI, NewOp);
1778 IC.InsertNewInstBefore(Clone, I.getIterator());
1779 return Clone;
1780}
1781
1783 bool FoldWithMultiUse,
1784 bool SimplifyBothArms) {
1785 // Don't modify shared select instructions unless set FoldWithMultiUse
1786 if (!SI->hasOneUser() && !FoldWithMultiUse)
1787 return nullptr;
1788
1789 Value *TV = SI->getTrueValue();
1790 Value *FV = SI->getFalseValue();
1791
1792 // Bool selects with constant operands can be folded to logical ops.
1793 if (SI->getType()->isIntOrIntVectorTy(1))
1794 return nullptr;
1795
1796 // Avoid breaking min/max reduction pattern,
1797 // which is necessary for vectorization later.
1799 for (Value *IntrinOp : Op.operands())
1800 if (auto *PN = dyn_cast<PHINode>(IntrinOp))
1801 for (Value *PhiOp : PN->operands())
1802 if (PhiOp == &Op)
1803 return nullptr;
1804
1805 // Test if a FCmpInst instruction is used exclusively by a select as
1806 // part of a minimum or maximum operation. If so, refrain from doing
1807 // any other folding. This helps out other analyses which understand
1808 // non-obfuscated minimum and maximum idioms. And in this case, at
1809 // least one of the comparison operands has at least one user besides
1810 // the compare (the select), which would often largely negate the
1811 // benefit of folding anyway.
1812 if (auto *CI = dyn_cast<FCmpInst>(SI->getCondition())) {
1813 if (CI->hasOneUse()) {
1814 Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
1815 if (((TV == Op0 && FV == Op1) || (FV == Op0 && TV == Op1)) &&
1816 !CI->isCommutative())
1817 return nullptr;
1818 }
1819 }
1820
1821 // Make sure that one of the select arms folds successfully.
1822 Value *NewTV = simplifyOperationIntoSelectOperand(Op, SI, /*IsTrueArm=*/true);
1823 Value *NewFV =
1824 simplifyOperationIntoSelectOperand(Op, SI, /*IsTrueArm=*/false);
1825 if (!NewTV && !NewFV)
1826 return nullptr;
1827
1828 if (SimplifyBothArms && !(NewTV && NewFV))
1829 return nullptr;
1830
1831 // Create an instruction for the arm that did not fold.
1832 if (!NewTV)
1833 NewTV = foldOperationIntoSelectOperand(Op, SI, TV, *this);
1834 if (!NewFV)
1835 NewFV = foldOperationIntoSelectOperand(Op, SI, FV, *this);
1836
1837 SelectInst *NewSel = SelectInst::Create(SI->getCondition(), NewTV, NewFV);
1838
1839 // Preserve metadata that remains valid for the transformed select.
1840 NewSel->copyMetadata(*SI,
1841 {LLVMContext::MD_prof, LLVMContext::MD_unpredictable});
1842
1843 // Preserve source location information.
1844 NewSel->setDebugLoc(SI->getDebugLoc());
1845
1846 return NewSel;
1847}
1848
1850 Value *InValue, BasicBlock *InBB,
1851 const DataLayout &DL,
1852 const SimplifyQuery SQ) {
1853 // NB: It is a precondition of this transform that the operands be
1854 // phi translatable!
1856 for (Value *Op : I.operands()) {
1857 if (Op == PN)
1858 Ops.push_back(InValue);
1859 else
1860 Ops.push_back(Op->DoPHITranslation(PN->getParent(), InBB));
1861 }
1862
1863 // Don't consider the simplification successful if we get back a constant
1864 // expression. That's just an instruction in hiding.
1865 // Also reject the case where we simplify back to the phi node. We wouldn't
1866 // be able to remove it in that case.
1868 &I, Ops, SQ.getWithInstruction(InBB->getTerminator()));
1869 if (NewVal && NewVal != PN && !match(NewVal, m_ConstantExpr()))
1870 return NewVal;
1871
1872 // Check if incoming PHI value can be replaced with constant
1873 // based on implied condition.
1874 CondBrInst *TerminatorBI = dyn_cast<CondBrInst>(InBB->getTerminator());
1875 const ICmpInst *ICmp = dyn_cast<ICmpInst>(&I);
1876 if (TerminatorBI &&
1877 TerminatorBI->getSuccessor(0) != TerminatorBI->getSuccessor(1) && ICmp) {
1878 bool LHSIsTrue = TerminatorBI->getSuccessor(0) == PN->getParent();
1879 std::optional<bool> ImpliedCond = isImpliedCondition(
1880 TerminatorBI->getCondition(), ICmp->getCmpPredicate(), Ops[0], Ops[1],
1881 DL, LHSIsTrue);
1882 if (ImpliedCond)
1883 return ConstantInt::getBool(I.getType(), ImpliedCond.value());
1884 }
1885
1886 return nullptr;
1887}
1888
1889/// In some cases it is beneficial to fold a select into a binary operator.
1890/// For example:
1891/// %1 = or %in, 4
1892/// %2 = select %cond, %1, %in
1893/// %3 = or %2, 1
1894/// =>
1895/// %1 = select i1 %cond, 5, 1
1896/// %2 = or %1, %in
1898 assert(Op.isAssociative() && "The operation must be associative!");
1899
1900 SelectInst *SI = dyn_cast<SelectInst>(Op.getOperand(0));
1901
1902 Constant *Const;
1903 if (!SI || !match(Op.getOperand(1), m_ImmConstant(Const)) ||
1904 !Op.hasOneUse() || !SI->hasOneUse())
1905 return nullptr;
1906
1907 Value *TV = SI->getTrueValue();
1908 Value *FV = SI->getFalseValue();
1909 Value *Input, *NewTV, *NewFV;
1910 Constant *Const2;
1911
1912 if (TV->hasOneUse() && match(TV, m_BinOp(Op.getOpcode(), m_Specific(FV),
1913 m_ImmConstant(Const2)))) {
1914 NewTV = ConstantFoldBinaryInstruction(Op.getOpcode(), Const, Const2);
1915 NewFV = Const;
1916 Input = FV;
1917 } else if (FV->hasOneUse() &&
1918 match(FV, m_BinOp(Op.getOpcode(), m_Specific(TV),
1919 m_ImmConstant(Const2)))) {
1920 NewTV = Const;
1921 NewFV = ConstantFoldBinaryInstruction(Op.getOpcode(), Const, Const2);
1922 Input = TV;
1923 } else
1924 return nullptr;
1925
1926 if (!NewTV || !NewFV)
1927 return nullptr;
1928
1929 Value *NewSI =
1930 Builder.CreateSelect(SI->getCondition(), NewTV, NewFV, "",
1931 ProfcheckDisableMetadataFixes ? nullptr : SI);
1932 return BinaryOperator::Create(Op.getOpcode(), NewSI, Input);
1933}
1934
1936 bool AllowMultipleUses) {
1937 unsigned NumPHIValues = PN->getNumIncomingValues();
1938 if (NumPHIValues == 0)
1939 return nullptr;
1940
1941 // We normally only transform phis with a single use. However, if a PHI has
1942 // multiple uses and they are all the same operation, we can fold *all* of the
1943 // uses into the PHI.
1944 bool OneUse = PN->hasOneUse();
1945 bool IdenticalUsers = false;
1946 if (!AllowMultipleUses && !OneUse) {
1947 // Walk the use list for the instruction, comparing them to I.
1948 for (User *U : PN->users()) {
1950 if (UI != &I && !I.isIdenticalTo(UI))
1951 return nullptr;
1952 }
1953 // Otherwise, we can replace *all* users with the new PHI we form.
1954 IdenticalUsers = true;
1955 }
1956
1957 // Check that all operands are phi-translatable.
1958 for (Value *Op : I.operands()) {
1959 if (Op == PN)
1960 continue;
1961
1962 // Non-instructions never require phi-translation.
1963 auto *I = dyn_cast<Instruction>(Op);
1964 if (!I)
1965 continue;
1966
1967 // Phi-translate can handle phi nodes in the same block.
1968 if (isa<PHINode>(I))
1969 if (I->getParent() == PN->getParent())
1970 continue;
1971
1972 // Operand dominates the block, no phi-translation necessary.
1973 if (DT.dominates(I, PN->getParent()))
1974 continue;
1975
1976 // Not phi-translatable, bail out.
1977 return nullptr;
1978 }
1979
1980 // Check to see whether the instruction can be folded into each phi operand.
1981 // If there is one operand that does not fold, remember the BB it is in.
1982 SmallVector<Value *> NewPhiValues;
1983 SmallVector<unsigned int> OpsToMoveUseToIncomingBB;
1984 bool SeenNonSimplifiedInVal = false;
1985 for (unsigned i = 0; i != NumPHIValues; ++i) {
1986 Value *InVal = PN->getIncomingValue(i);
1987 BasicBlock *InBB = PN->getIncomingBlock(i);
1988
1989 if (auto *NewVal = simplifyInstructionWithPHI(I, PN, InVal, InBB, DL, SQ)) {
1990 NewPhiValues.push_back(NewVal);
1991 continue;
1992 }
1993
1994 // Handle some cases that can't be fully simplified, but where we know that
1995 // the two instructions will fold into one.
1996 auto WillFold = [&]() {
1997 if (!InVal->hasUseList() || !InVal->hasOneUser())
1998 return false;
1999
2000 // icmp of ucmp/scmp with constant will fold to icmp.
2001 const APInt *Ignored;
2002 if (isa<CmpIntrinsic>(InVal) &&
2003 match(&I, m_ICmp(m_Specific(PN), m_APInt(Ignored))))
2004 return true;
2005
2006 // icmp eq zext(bool), 0 will fold to !bool.
2007 if (isa<ZExtInst>(InVal) &&
2008 cast<ZExtInst>(InVal)->getSrcTy()->isIntOrIntVectorTy(1) &&
2009 match(&I,
2011 return true;
2012
2013 return false;
2014 };
2015
2016 if (WillFold()) {
2017 OpsToMoveUseToIncomingBB.push_back(i);
2018 NewPhiValues.push_back(nullptr);
2019 continue;
2020 }
2021
2022 if (!OneUse && !IdenticalUsers)
2023 return nullptr;
2024
2025 if (SeenNonSimplifiedInVal)
2026 return nullptr; // More than one non-simplified value.
2027 SeenNonSimplifiedInVal = true;
2028
2029 // If there is exactly one non-simplified value, we can insert a copy of the
2030 // operation in that block. However, if this is a critical edge, we would
2031 // be inserting the computation on some other paths (e.g. inside a loop).
2032 // Only do this if the pred block is unconditionally branching into the phi
2033 // block. Also, make sure that the pred block is not dead code.
2035 if (!BI || !DT.isReachableFromEntry(InBB))
2036 return nullptr;
2037
2038 NewPhiValues.push_back(nullptr);
2039 OpsToMoveUseToIncomingBB.push_back(i);
2040
2041 // Do not push the operation across a loop backedge. This could result in
2042 // an infinite combine loop, and is generally non-profitable (especially
2043 // if the operation was originally outside the loop).
2044 if (isBackEdge(InBB, PN->getParent()))
2045 return nullptr;
2046 }
2047
2048 // Clone the instruction that uses the phi node and move it into the incoming
2049 // BB because we know that the next iteration of InstCombine will simplify it.
2051 for (auto OpIndex : OpsToMoveUseToIncomingBB) {
2053 BasicBlock *OpBB = PN->getIncomingBlock(OpIndex);
2054
2055 Instruction *Clone = Clones.lookup(OpBB);
2056 if (!Clone) {
2057 Clone = I.clone();
2058 for (Use &U : Clone->operands()) {
2059 if (U == PN)
2060 U = Op;
2061 else
2062 U = U->DoPHITranslation(PN->getParent(), OpBB);
2063 }
2064 Clone = InsertNewInstBefore(Clone, OpBB->getTerminator()->getIterator());
2065 Clones.insert({OpBB, Clone});
2066 // We may have speculated the instruction.
2068 }
2069
2070 NewPhiValues[OpIndex] = Clone;
2071 }
2072
2073 // Okay, we can do the transformation: create the new PHI node.
2074 PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());
2075 InsertNewInstBefore(NewPN, PN->getIterator());
2076 NewPN->takeName(PN);
2077 NewPN->setDebugLoc(PN->getDebugLoc());
2078
2079 for (unsigned i = 0; i != NumPHIValues; ++i)
2080 NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));
2081
2082 if (IdenticalUsers) {
2083 // Collect and deduplicate users up-front to avoid iterator invalidation.
2085 for (User *U : PN->users()) {
2087 if (User == &I)
2088 continue;
2089 ToReplace.insert(User);
2090 }
2091 for (Instruction *I : ToReplace) {
2092 replaceInstUsesWith(*I, NewPN);
2094 }
2095 OneUse = true;
2096 }
2097
2098 if (OneUse) {
2099 replaceAllDbgUsesWith(*PN, *NewPN, *PN, DT);
2100 }
2101 return replaceInstUsesWith(I, NewPN);
2102}
2103
2105 if (!BO.isAssociative())
2106 return nullptr;
2107
2108 // Find the interleaved binary ops.
2109 auto Opc = BO.getOpcode();
2110 auto *BO0 = dyn_cast<BinaryOperator>(BO.getOperand(0));
2111 auto *BO1 = dyn_cast<BinaryOperator>(BO.getOperand(1));
2112 if (!BO0 || !BO1 || !BO0->hasNUses(2) || !BO1->hasNUses(2) ||
2113 BO0->getOpcode() != Opc || BO1->getOpcode() != Opc ||
2114 !BO0->isAssociative() || !BO1->isAssociative() ||
2115 BO0->getParent() != BO1->getParent())
2116 return nullptr;
2117
2118 assert(BO.isCommutative() && BO0->isCommutative() && BO1->isCommutative() &&
2119 "Expected commutative instructions!");
2120
2121 // Find the matching phis, forming the recurrences.
2122 PHINode *PN0, *PN1;
2123 Value *Start0, *Step0, *Start1, *Step1;
2124 if (!matchSimpleRecurrence(BO0, PN0, Start0, Step0) || !PN0->hasOneUse() ||
2125 !matchSimpleRecurrence(BO1, PN1, Start1, Step1) || !PN1->hasOneUse() ||
2126 PN0->getParent() != PN1->getParent())
2127 return nullptr;
2128
2129 assert(PN0->getNumIncomingValues() == 2 && PN1->getNumIncomingValues() == 2 &&
2130 "Expected PHIs with two incoming values!");
2131
2132 // Convert the start and step values to constants.
2133 auto *Init0 = dyn_cast<Constant>(Start0);
2134 auto *Init1 = dyn_cast<Constant>(Start1);
2135 auto *C0 = dyn_cast<Constant>(Step0);
2136 auto *C1 = dyn_cast<Constant>(Step1);
2137 if (!Init0 || !Init1 || !C0 || !C1)
2138 return nullptr;
2139
2140 // Fold the recurrence constants.
2141 auto *Init = ConstantFoldBinaryInstruction(Opc, Init0, Init1);
2142 auto *C = ConstantFoldBinaryInstruction(Opc, C0, C1);
2143 if (!Init || !C)
2144 return nullptr;
2145
2146 // Create the reduced PHI.
2147 auto *NewPN = PHINode::Create(PN0->getType(), PN0->getNumIncomingValues(),
2148 "reduced.phi");
2149
2150 // Create the new binary op.
2151 auto *NewBO = BinaryOperator::Create(Opc, NewPN, C);
2152 if (Opc == Instruction::FAdd || Opc == Instruction::FMul) {
2153 // Intersect FMF flags for FADD and FMUL.
2154 FastMathFlags Intersect = BO0->getFastMathFlags() &
2155 BO1->getFastMathFlags() & BO.getFastMathFlags();
2156 NewBO->setFastMathFlags(Intersect);
2157 } else {
2158 OverflowTracking Flags;
2159 Flags.AllKnownNonNegative = false;
2160 Flags.AllKnownNonZero = false;
2161 Flags.mergeFlags(*BO0);
2162 Flags.mergeFlags(*BO1);
2163 Flags.mergeFlags(BO);
2164 Flags.applyFlags(*NewBO);
2165 }
2166 NewBO->takeName(&BO);
2167
2168 for (unsigned I = 0, E = PN0->getNumIncomingValues(); I != E; ++I) {
2169 auto *V = PN0->getIncomingValue(I);
2170 auto *BB = PN0->getIncomingBlock(I);
2171 if (V == Init0) {
2172 assert(((PN1->getIncomingValue(0) == Init1 &&
2173 PN1->getIncomingBlock(0) == BB) ||
2174 (PN1->getIncomingValue(1) == Init1 &&
2175 PN1->getIncomingBlock(1) == BB)) &&
2176 "Invalid incoming block!");
2177 NewPN->addIncoming(Init, BB);
2178 } else if (V == BO0) {
2179 assert(((PN1->getIncomingValue(0) == BO1 &&
2180 PN1->getIncomingBlock(0) == BB) ||
2181 (PN1->getIncomingValue(1) == BO1 &&
2182 PN1->getIncomingBlock(1) == BB)) &&
2183 "Invalid incoming block!");
2184 NewPN->addIncoming(NewBO, BB);
2185 } else
2186 llvm_unreachable("Unexpected incoming value!");
2187 }
2188
2189 LLVM_DEBUG(dbgs() << " Combined " << *PN0 << "\n " << *BO0
2190 << "\n with " << *PN1 << "\n " << *BO1
2191 << '\n');
2192
2193 // Insert the new recurrence and remove the old (dead) ones.
2194 InsertNewInstWith(NewPN, PN0->getIterator());
2195 InsertNewInstWith(NewBO, BO0->getIterator());
2196
2203
2204 return replaceInstUsesWith(BO, NewBO);
2205}
2206
2208 // Attempt to fold binary operators whose operands are simple recurrences.
2209 if (auto *NewBO = foldBinopWithRecurrence(BO))
2210 return NewBO;
2211
2212 // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
2213 // we are guarding against replicating the binop in >1 predecessor.
2214 // This could miss matching a phi with 2 constant incoming values.
2215 auto *Phi0 = dyn_cast<PHINode>(BO.getOperand(0));
2216 auto *Phi1 = dyn_cast<PHINode>(BO.getOperand(1));
2217 if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() ||
2218 Phi0->getNumOperands() != Phi1->getNumOperands())
2219 return nullptr;
2220
2221 // TODO: Remove the restriction for binop being in the same block as the phis.
2222 if (BO.getParent() != Phi0->getParent() ||
2223 BO.getParent() != Phi1->getParent())
2224 return nullptr;
2225
2226 // Fold if there is at least one specific constant value in phi0 or phi1's
2227 // incoming values that comes from the same block and this specific constant
2228 // value can be used to do optimization for specific binary operator.
2229 // For example:
2230 // %phi0 = phi i32 [0, %bb0], [%i, %bb1]
2231 // %phi1 = phi i32 [%j, %bb0], [0, %bb1]
2232 // %add = add i32 %phi0, %phi1
2233 // ==>
2234 // %add = phi i32 [%j, %bb0], [%i, %bb1]
2236 /*AllowRHSConstant*/ false);
2237 if (C) {
2238 SmallVector<Value *, 4> NewIncomingValues;
2239 auto CanFoldIncomingValuePair = [&](std::tuple<Use &, Use &> T) {
2240 auto &Phi0Use = std::get<0>(T);
2241 auto &Phi1Use = std::get<1>(T);
2242 if (Phi0->getIncomingBlock(Phi0Use) != Phi1->getIncomingBlock(Phi1Use))
2243 return false;
2244 Value *Phi0UseV = Phi0Use.get();
2245 Value *Phi1UseV = Phi1Use.get();
2246 if (Phi0UseV == C)
2247 NewIncomingValues.push_back(Phi1UseV);
2248 else if (Phi1UseV == C)
2249 NewIncomingValues.push_back(Phi0UseV);
2250 else
2251 return false;
2252 return true;
2253 };
2254
2255 if (all_of(zip(Phi0->operands(), Phi1->operands()),
2256 CanFoldIncomingValuePair)) {
2257 PHINode *NewPhi =
2258 PHINode::Create(Phi0->getType(), Phi0->getNumOperands());
2259 assert(NewIncomingValues.size() == Phi0->getNumOperands() &&
2260 "The number of collected incoming values should equal the number "
2261 "of the original PHINode operands!");
2262 for (unsigned I = 0; I < Phi0->getNumOperands(); I++)
2263 NewPhi->addIncoming(NewIncomingValues[I], Phi0->getIncomingBlock(I));
2264 return NewPhi;
2265 }
2266 }
2267
2268 if (Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
2269 return nullptr;
2270
2271 // Match a pair of incoming constants for one of the predecessor blocks.
2272 BasicBlock *ConstBB, *OtherBB;
2273 Constant *C0, *C1;
2274 if (match(Phi0->getIncomingValue(0), m_ImmConstant(C0))) {
2275 ConstBB = Phi0->getIncomingBlock(0);
2276 OtherBB = Phi0->getIncomingBlock(1);
2277 } else if (match(Phi0->getIncomingValue(1), m_ImmConstant(C0))) {
2278 ConstBB = Phi0->getIncomingBlock(1);
2279 OtherBB = Phi0->getIncomingBlock(0);
2280 } else {
2281 return nullptr;
2282 }
2283 if (!match(Phi1->getIncomingValueForBlock(ConstBB), m_ImmConstant(C1)))
2284 return nullptr;
2285
2286 // The block that we are hoisting to must reach here unconditionally.
2287 // Otherwise, we could be speculatively executing an expensive or
2288 // non-speculative op.
2289 auto *PredBlockBranch = dyn_cast<UncondBrInst>(OtherBB->getTerminator());
2290 if (!PredBlockBranch || !DT.isReachableFromEntry(OtherBB))
2291 return nullptr;
2292
2293 // TODO: This check could be tightened to only apply to binops (div/rem) that
2294 // are not safe to speculatively execute. But that could allow hoisting
2295 // potentially expensive instructions (fdiv for example).
2296 for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
2298 return nullptr;
2299
2300 // Fold constants for the predecessor block with constant incoming values.
2301 Constant *NewC = ConstantFoldBinaryOpOperands(BO.getOpcode(), C0, C1, DL);
2302 if (!NewC)
2303 return nullptr;
2304
2305 // Make a new binop in the predecessor block with the non-constant incoming
2306 // values.
2307 Builder.SetInsertPoint(PredBlockBranch);
2308 Value *NewBO = Builder.CreateBinOp(BO.getOpcode(),
2309 Phi0->getIncomingValueForBlock(OtherBB),
2310 Phi1->getIncomingValueForBlock(OtherBB));
2311 if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(NewBO))
2312 NotFoldedNewBO->copyIRFlags(&BO);
2313
2314 // Replace the binop with a phi of the new values. The old phis are dead.
2315 PHINode *NewPhi = PHINode::Create(BO.getType(), 2);
2316 NewPhi->addIncoming(NewBO, OtherBB);
2317 NewPhi->addIncoming(NewC, ConstBB);
2318 return NewPhi;
2319}
2320
2322 auto TryFoldOperand = [&](unsigned OpIdx,
2323 bool IsOtherParamConst) -> Instruction * {
2324 if (auto *Sel = dyn_cast<SelectInst>(I.getOperand(OpIdx)))
2325 return FoldOpIntoSelect(I, Sel, false, !IsOtherParamConst);
2326 if (auto *PN = dyn_cast<PHINode>(I.getOperand(OpIdx)))
2327 return foldOpIntoPhi(I, PN);
2328 return nullptr;
2329 };
2330
2331 if (Instruction *NewI =
2332 TryFoldOperand(/*OpIdx=*/0, isa<Constant>(I.getOperand(1))))
2333 return NewI;
2334 return TryFoldOperand(/*OpIdx=*/1, isa<Constant>(I.getOperand(0)));
2335}
2336
2338 // If this GEP has only 0 indices, it is the same pointer as
2339 // Src. If Src is not a trivial GEP too, don't combine
2340 // the indices.
2341 if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
2342 !Src.hasOneUse())
2343 return false;
2344 return true;
2345}
2346
2347/// Find a constant NewC that has property:
2348/// shuffle(NewC, ShMask) = C
2349/// Returns nullptr if such a constant does not exist e.g. ShMask=<0,0> C=<1,2>
2350///
2351/// A 1-to-1 mapping is not required. Example:
2352/// ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <poison,5,6,poison>
2354 VectorType *NewCTy) {
2355 if (isa<ScalableVectorType>(NewCTy)) {
2356 Constant *Splat = C->getSplatValue();
2357 if (!Splat)
2358 return nullptr;
2360 }
2361
2362 if (cast<FixedVectorType>(NewCTy)->getNumElements() >
2363 cast<FixedVectorType>(C->getType())->getNumElements())
2364 return nullptr;
2365
2366 unsigned NewCNumElts = cast<FixedVectorType>(NewCTy)->getNumElements();
2367 PoisonValue *PoisonScalar = PoisonValue::get(C->getType()->getScalarType());
2368 SmallVector<Constant *, 16> NewVecC(NewCNumElts, PoisonScalar);
2369 unsigned NumElts = cast<FixedVectorType>(C->getType())->getNumElements();
2370 for (unsigned I = 0; I < NumElts; ++I) {
2371 Constant *CElt = C->getAggregateElement(I);
2372 if (ShMask[I] >= 0) {
2373 assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
2374 Constant *NewCElt = NewVecC[ShMask[I]];
2375 // Bail out if:
2376 // 1. The constant vector contains a constant expression.
2377 // 2. The shuffle needs an element of the constant vector that can't
2378 // be mapped to a new constant vector.
2379 // 3. This is a widening shuffle that copies elements of V1 into the
2380 // extended elements (extending with poison is allowed).
2381 if (!CElt || (!isa<PoisonValue>(NewCElt) && NewCElt != CElt) ||
2382 I >= NewCNumElts)
2383 return nullptr;
2384 NewVecC[ShMask[I]] = CElt;
2385 }
2386 }
2387 return ConstantVector::get(NewVecC);
2388}
2389
2390// Get the result of `Vector Op Splat` (or Splat Op Vector if \p SplatLHS).
2392 Constant *Splat, bool SplatLHS,
2393 const DataLayout &DL) {
2394 ElementCount EC = cast<VectorType>(Vector->getType())->getElementCount();
2396 Constant *RHS = Vector;
2397 if (!SplatLHS)
2398 std::swap(LHS, RHS);
2399 return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL);
2400}
2401
2402template <Intrinsic::ID SpliceID>
2404 InstCombiner::BuilderTy &Builder) {
2405 Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
2406 auto CreateBinOpSplice = [&](Value *X, Value *Y, Value *Offset) {
2407 Value *V = Builder.CreateBinOp(Inst.getOpcode(), X, Y, Inst.getName());
2408 if (auto *BO = dyn_cast<BinaryOperator>(V))
2409 BO->copyIRFlags(&Inst);
2410 Module *M = Inst.getModule();
2411 Function *F = Intrinsic::getOrInsertDeclaration(M, SpliceID, V->getType());
2412 return CallInst::Create(F, {V, PoisonValue::get(V->getType()), Offset});
2413 };
2414 Value *V1, *V2, *Offset;
2415 if (match(LHS,
2417 // Op(splice(V1, poison, offset), splice(V2, poison, offset))
2418 // -> splice(Op(V1, V2), poison, offset)
2420 m_Specific(Offset))) &&
2421 (LHS->hasOneUse() || RHS->hasOneUse() ||
2422 (LHS == RHS && LHS->hasNUses(2))))
2423 return CreateBinOpSplice(V1, V2, Offset);
2424
2425 // Op(splice(V1, poison, offset), RHSSplat)
2426 // -> splice(Op(V1, RHSSplat), poison, offset)
2427 if (LHS->hasOneUse() && isSplatValue(RHS))
2428 return CreateBinOpSplice(V1, RHS, Offset);
2429 }
2430 // Op(LHSSplat, splice(V2, poison, offset))
2431 // -> splice(Op(LHSSplat, V2), poison, offset)
2432 else if (isSplatValue(LHS) &&
2434 m_Value(Offset)))))
2435 return CreateBinOpSplice(LHS, V2, Offset);
2436
2437 // TODO: Fold binops of the form
2438 // Op(splice(poison, V1, offset), splice(poison, V2, offset))
2439 // -> splice(poison, Op(V1, V2), offset)
2440
2441 return nullptr;
2442}
2443
2445 if (!isa<VectorType>(Inst.getType()))
2446 return nullptr;
2447
2448 BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
2449 Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
2450 assert(cast<VectorType>(LHS->getType())->getElementCount() ==
2451 cast<VectorType>(Inst.getType())->getElementCount());
2452 assert(cast<VectorType>(RHS->getType())->getElementCount() ==
2453 cast<VectorType>(Inst.getType())->getElementCount());
2454
2455 auto foldConstantsThroughSubVectorInsertSplat =
2456 [&](Value *MaybeSubVector, Value *MaybeSplat,
2457 bool SplatLHS) -> Instruction * {
2458 Value *Idx;
2459 Constant *Splat, *SubVector, *Dest;
2460 if (!match(MaybeSplat, m_ConstantSplat(m_Constant(Splat))) ||
2461 !match(MaybeSubVector,
2462 m_VectorInsert(m_Constant(Dest), m_Constant(SubVector),
2463 m_Value(Idx))))
2464 return nullptr;
2465 SubVector =
2466 constantFoldBinOpWithSplat(Opcode, SubVector, Splat, SplatLHS, DL);
2467 Dest = constantFoldBinOpWithSplat(Opcode, Dest, Splat, SplatLHS, DL);
2468 if (!SubVector || !Dest)
2469 return nullptr;
2470 auto *InsertVector =
2471 Builder.CreateInsertVector(Dest->getType(), Dest, SubVector, Idx);
2472 return replaceInstUsesWith(Inst, InsertVector);
2473 };
2474
2475 // If one operand is a constant splat and the other operand is a
2476 // `vector.insert` where both the destination and subvector are constant,
2477 // apply the operation to both the destination and subvector, returning a new
2478 // constant `vector.insert`. This helps constant folding for scalable vectors.
2479 if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
2480 /*MaybeSubVector=*/LHS, /*MaybeSplat=*/RHS, /*SplatLHS=*/false))
2481 return Folded;
2482 if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
2483 /*MaybeSubVector=*/RHS, /*MaybeSplat=*/LHS, /*SplatLHS=*/true))
2484 return Folded;
2485
2486 // If both operands of the binop are vector concatenations, then perform the
2487 // narrow binop on each pair of the source operands followed by concatenation
2488 // of the results.
2489 Value *L0, *L1, *R0, *R1;
2490 ArrayRef<int> Mask;
2491 if (match(LHS, m_Shuffle(m_Value(L0), m_Value(L1), m_Mask(Mask))) &&
2492 match(RHS, m_Shuffle(m_Value(R0), m_Value(R1), m_SpecificMask(Mask))) &&
2493 LHS->hasOneUse() && RHS->hasOneUse() &&
2494 cast<ShuffleVectorInst>(LHS)->isConcat() &&
2495 cast<ShuffleVectorInst>(RHS)->isConcat()) {
2496 // This transform does not have the speculative execution constraint as
2497 // below because the shuffle is a concatenation. The new binops are
2498 // operating on exactly the same elements as the existing binop.
2499 // TODO: We could ease the mask requirement to allow different undef lanes,
2500 // but that requires an analysis of the binop-with-undef output value.
2501 Value *NewBO0 = Builder.CreateBinOp(Opcode, L0, R0);
2502 if (auto *BO = dyn_cast<BinaryOperator>(NewBO0))
2503 BO->copyIRFlags(&Inst);
2504 Value *NewBO1 = Builder.CreateBinOp(Opcode, L1, R1);
2505 if (auto *BO = dyn_cast<BinaryOperator>(NewBO1))
2506 BO->copyIRFlags(&Inst);
2507 return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
2508 }
2509
2510 auto createBinOpReverse = [&](Value *X, Value *Y) {
2511 Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName());
2512 if (auto *BO = dyn_cast<BinaryOperator>(V))
2513 BO->copyIRFlags(&Inst);
2514 Module *M = Inst.getModule();
2516 M, Intrinsic::vector_reverse, V->getType());
2517 return CallInst::Create(F, V);
2518 };
2519
2520 // NOTE: Reverse shuffles don't require the speculative execution protection
2521 // below because they don't affect which lanes take part in the computation.
2522
2523 Value *V1, *V2;
2524 if (match(LHS, m_VecReverse(m_Value(V1)))) {
2525 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2526 if (match(RHS, m_VecReverse(m_Value(V2))) &&
2527 (LHS->hasOneUse() || RHS->hasOneUse() ||
2528 (LHS == RHS && LHS->hasNUses(2))))
2529 return createBinOpReverse(V1, V2);
2530
2531 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2532 if (LHS->hasOneUse() && isSplatValue(RHS))
2533 return createBinOpReverse(V1, RHS);
2534 }
2535 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2536 else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2)))))
2537 return createBinOpReverse(LHS, V2);
2538
2539 auto createBinOpVPReverse = [&](Value *X, Value *Y, Value *EVL) {
2540 Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName());
2541 if (auto *BO = dyn_cast<BinaryOperator>(V))
2542 BO->copyIRFlags(&Inst);
2543
2544 ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
2545 Value *AllTrueMask = Builder.CreateVectorSplat(EC, Builder.getTrue());
2546 Module *M = Inst.getModule();
2548 M, Intrinsic::experimental_vp_reverse, V->getType());
2549 return CallInst::Create(F, {V, AllTrueMask, EVL});
2550 };
2551
2552 Value *EVL;
2554 m_Value(V1), m_AllOnes(), m_Value(EVL)))) {
2555 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2557 m_Value(V2), m_AllOnes(), m_Specific(EVL))) &&
2558 (LHS->hasOneUse() || RHS->hasOneUse() ||
2559 (LHS == RHS && LHS->hasNUses(2))))
2560 return createBinOpVPReverse(V1, V2, EVL);
2561
2562 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2563 if (LHS->hasOneUse() && isSplatValue(RHS))
2564 return createBinOpVPReverse(V1, RHS, EVL);
2565 }
2566 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2567 else if (isSplatValue(LHS) &&
2569 m_Value(V2), m_AllOnes(), m_Value(EVL))))
2570 return createBinOpVPReverse(LHS, V2, EVL);
2571
2572 if (Instruction *Folded =
2574 return Folded;
2575 if (Instruction *Folded =
2577 return Folded;
2578
2579 // It may not be safe to reorder shuffles and things like div, urem, etc.
2580 // because we may trap when executing those ops on unknown vector elements.
2581 // See PR20059.
2583 return nullptr;
2584
2585 auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) {
2586 Value *XY = Builder.CreateBinOp(Opcode, X, Y);
2587 if (auto *BO = dyn_cast<BinaryOperator>(XY))
2588 BO->copyIRFlags(&Inst);
2589 return new ShuffleVectorInst(XY, M);
2590 };
2591
2592 // If both arguments of the binary operation are shuffles that use the same
2593 // mask and shuffle within a single vector, move the shuffle after the binop.
2594 if (match(LHS, m_Shuffle(m_Value(V1), m_Poison(), m_Mask(Mask))) &&
2595 match(RHS, m_Shuffle(m_Value(V2), m_Poison(), m_SpecificMask(Mask))) &&
2596 V1->getType() == V2->getType() &&
2597 (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
2598 // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
2599 return createBinOpShuffle(V1, V2, Mask);
2600 }
2601
2602 // If both arguments of a commutative binop are select-shuffles that use the
2603 // same mask with commuted operands, the shuffles are unnecessary.
2604 if (Inst.isCommutative() &&
2605 match(LHS, m_Shuffle(m_Value(V1), m_Value(V2), m_Mask(Mask))) &&
2606 match(RHS,
2608 auto *LShuf = cast<ShuffleVectorInst>(LHS);
2609 auto *RShuf = cast<ShuffleVectorInst>(RHS);
2610 // TODO: Allow shuffles that contain undefs in the mask?
2611 // That is legal, but it reduces undef knowledge.
2612 // TODO: Allow arbitrary shuffles by shuffling after binop?
2613 // That might be legal, but we have to deal with poison.
2614 if (LShuf->isSelect() &&
2615 !is_contained(LShuf->getShuffleMask(), PoisonMaskElem) &&
2616 RShuf->isSelect() &&
2617 !is_contained(RShuf->getShuffleMask(), PoisonMaskElem)) {
2618 // Example:
2619 // LHS = shuffle V1, V2, <0, 5, 6, 3>
2620 // RHS = shuffle V2, V1, <0, 5, 6, 3>
2621 // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
2622 Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
2623 NewBO->copyIRFlags(&Inst);
2624 return NewBO;
2625 }
2626 }
2627
2628 // If one argument is a shuffle within one vector and the other is a constant,
2629 // try moving the shuffle after the binary operation. This canonicalization
2630 // intends to move shuffles closer to other shuffles and binops closer to
2631 // other binops, so they can be folded. It may also enable demanded elements
2632 // transforms.
2633 Constant *C;
2635 m_Mask(Mask))),
2636 m_ImmConstant(C)))) {
2637 assert(Inst.getType()->getScalarType() == V1->getType()->getScalarType() &&
2638 "Shuffle should not change scalar type");
2639
2640 bool ConstOp1 = isa<Constant>(RHS);
2641 if (Constant *NewC =
2642 unshuffleConstant(Mask, C, cast<VectorType>(V1->getType()))) {
2643 // For fixed vectors, lanes of NewC not used by the shuffle will be poison
2644 // which will cause UB for div/rem. Mask them with a safe constant.
2645 if (isa<FixedVectorType>(V1->getType()) && Inst.isIntDivRem())
2646 NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1);
2647
2648 // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
2649 // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
2650 Value *NewLHS = ConstOp1 ? V1 : NewC;
2651 Value *NewRHS = ConstOp1 ? NewC : V1;
2652 return createBinOpShuffle(NewLHS, NewRHS, Mask);
2653 }
2654 }
2655
2656 // Try to reassociate to sink a splat shuffle after a binary operation.
2657 if (Inst.isAssociative() && Inst.isCommutative()) {
2658 // Canonicalize shuffle operand as LHS.
2659 if (isa<ShuffleVectorInst>(RHS))
2660 std::swap(LHS, RHS);
2661
2662 Value *X;
2663 ArrayRef<int> MaskC;
2664 int SplatIndex;
2665 Value *Y, *OtherOp;
2666 if (!match(LHS,
2667 m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(MaskC)))) ||
2668 !match(MaskC, m_SplatOrPoisonMask(SplatIndex)) ||
2669 X->getType() != Inst.getType() ||
2670 !match(RHS, m_OneUse(m_BinOp(Opcode, m_Value(Y), m_Value(OtherOp)))))
2671 return nullptr;
2672
2673 // FIXME: This may not be safe if the analysis allows undef elements. By
2674 // moving 'Y' before the splat shuffle, we are implicitly assuming
2675 // that it is not undef/poison at the splat index.
2676 if (isSplatValue(OtherOp, SplatIndex)) {
2677 std::swap(Y, OtherOp);
2678 } else if (!isSplatValue(Y, SplatIndex)) {
2679 return nullptr;
2680 }
2681
2682 // X and Y are splatted values, so perform the binary operation on those
2683 // values followed by a splat followed by the 2nd binary operation:
2684 // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
2685 Value *NewBO = Builder.CreateBinOp(Opcode, X, Y);
2686 SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
2687 Value *NewSplat = Builder.CreateShuffleVector(NewBO, NewMask);
2688 Instruction *R = BinaryOperator::Create(Opcode, NewSplat, OtherOp);
2689
2690 // Intersect FMF on both new binops. Other (poison-generating) flags are
2691 // dropped to be safe.
2692 if (isa<FPMathOperator>(R)) {
2693 R->copyFastMathFlags(&Inst);
2694 R->andIRFlags(RHS);
2695 }
2696 if (auto *NewInstBO = dyn_cast<BinaryOperator>(NewBO))
2697 NewInstBO->copyIRFlags(R);
2698 return R;
2699 }
2700
2701 return nullptr;
2702}
2703
2704/// Try to narrow the width of a binop if at least 1 operand is an extend of
2705/// of a value. This requires a potentially expensive known bits check to make
2706/// sure the narrow op does not overflow.
2707Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
2708 // We need at least one extended operand.
2709 Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1);
2710
2711 // If this is a sub, we swap the operands since we always want an extension
2712 // on the RHS. The LHS can be an extension or a constant.
2713 if (BO.getOpcode() == Instruction::Sub)
2714 std::swap(Op0, Op1);
2715
2716 Value *X;
2717 bool IsSext = match(Op0, m_SExt(m_Value(X)));
2718 if (!IsSext && !match(Op0, m_ZExt(m_Value(X))))
2719 return nullptr;
2720
2721 // If both operands are the same extension from the same source type and we
2722 // can eliminate at least one (hasOneUse), this might work.
2723 CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
2724 Value *Y;
2725 if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
2726 cast<Operator>(Op1)->getOpcode() == CastOpc &&
2727 (Op0->hasOneUse() || Op1->hasOneUse()))) {
2728 // If that did not match, see if we have a suitable constant operand.
2729 // Truncating and extending must produce the same constant.
2730 Constant *WideC;
2731 if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC)))
2732 return nullptr;
2733 Constant *NarrowC = getLosslessInvCast(WideC, X->getType(), CastOpc, DL);
2734 if (!NarrowC)
2735 return nullptr;
2736 Y = NarrowC;
2737 }
2738
2739 // Swap back now that we found our operands.
2740 if (BO.getOpcode() == Instruction::Sub)
2741 std::swap(X, Y);
2742
2743 // Both operands have narrow versions. Last step: the math must not overflow
2744 // in the narrow width.
2745 if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext))
2746 return nullptr;
2747
2748 // bo (ext X), (ext Y) --> ext (bo X, Y)
2749 // bo (ext X), C --> ext (bo X, C')
2750 Value *NarrowBO = Builder.CreateBinOp(BO.getOpcode(), X, Y, "narrow");
2751 if (auto *NewBinOp = dyn_cast<BinaryOperator>(NarrowBO)) {
2752 if (IsSext)
2753 NewBinOp->setHasNoSignedWrap();
2754 else
2755 NewBinOp->setHasNoUnsignedWrap();
2756 }
2757 return CastInst::Create(CastOpc, NarrowBO, BO.getType());
2758}
2759
2760/// Determine nowrap flags for (gep (gep p, x), y) to (gep p, (x + y))
2761/// transform.
2766
2767/// Thread a GEP operation with constant indices through the constant true/false
2768/// arms of a select.
2770 InstCombiner::BuilderTy &Builder) {
2771 if (!GEP.hasAllConstantIndices())
2772 return nullptr;
2773
2774 Instruction *Sel;
2775 Value *Cond;
2776 Constant *TrueC, *FalseC;
2777 if (!match(GEP.getPointerOperand(), m_Instruction(Sel)) ||
2778 !match(Sel,
2779 m_Select(m_Value(Cond), m_Constant(TrueC), m_Constant(FalseC))))
2780 return nullptr;
2781
2782 // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
2783 // Propagate 'inbounds' and metadata from existing instructions.
2784 // Note: using IRBuilder to create the constants for efficiency.
2785 SmallVector<Value *, 4> IndexC(GEP.indices());
2786 GEPNoWrapFlags NW = GEP.getNoWrapFlags();
2787 Type *Ty = GEP.getSourceElementType();
2788 Value *NewTrueC = Builder.CreateGEP(Ty, TrueC, IndexC, "", NW);
2789 Value *NewFalseC = Builder.CreateGEP(Ty, FalseC, IndexC, "", NW);
2790 return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel);
2791}
2792
2793// Canonicalization:
2794// gep T, (gep i8, base, C1), (Index + C2) into
2795// gep T, (gep i8, base, C1 + C2 * sizeof(T)), Index
2797 GEPOperator *Src,
2798 InstCombinerImpl &IC) {
2799 if (GEP.getNumIndices() != 1)
2800 return nullptr;
2801 auto &DL = IC.getDataLayout();
2802 Value *Base;
2803 const APInt *C1;
2804 if (!match(Src, m_PtrAdd(m_Value(Base), m_APInt(C1))))
2805 return nullptr;
2806 Value *VarIndex;
2807 const APInt *C2;
2808 Type *PtrTy = Src->getType()->getScalarType();
2809 unsigned IndexSizeInBits = DL.getIndexTypeSizeInBits(PtrTy);
2810 if (!match(GEP.getOperand(1), m_AddLike(m_Value(VarIndex), m_APInt(C2))))
2811 return nullptr;
2812 if (C1->getBitWidth() != IndexSizeInBits ||
2813 C2->getBitWidth() != IndexSizeInBits)
2814 return nullptr;
2815 Type *BaseType = GEP.getSourceElementType();
2817 return nullptr;
2818 APInt TypeSize(IndexSizeInBits, DL.getTypeAllocSize(BaseType));
2819 APInt NewOffset = TypeSize * *C2 + *C1;
2820 if (NewOffset.isZero() ||
2821 (Src->hasOneUse() && GEP.getOperand(1)->hasOneUse())) {
2823 if (GEP.hasNoUnsignedWrap() &&
2824 cast<GEPOperator>(Src)->hasNoUnsignedWrap() &&
2825 match(GEP.getOperand(1), m_NUWAddLike(m_Value(), m_Value()))) {
2827 if (GEP.isInBounds() && cast<GEPOperator>(Src)->isInBounds())
2828 Flags |= GEPNoWrapFlags::inBounds();
2829 }
2830
2831 Value *GEPConst =
2832 IC.Builder.CreatePtrAdd(Base, IC.Builder.getInt(NewOffset), "", Flags);
2833 return GetElementPtrInst::Create(BaseType, GEPConst, VarIndex, Flags);
2834 }
2835
2836 return nullptr;
2837}
2838
2839/// Combine constant offsets separated by variable offsets.
2840/// ptradd (ptradd (ptradd p, C1), x), C2 -> ptradd (ptradd p, x), C1+C2
2842 InstCombinerImpl &IC) {
2843 if (!GEP.hasAllConstantIndices())
2844 return nullptr;
2845
2848 auto *InnerGEP = dyn_cast<GetElementPtrInst>(GEP.getPointerOperand());
2849 while (true) {
2850 if (!InnerGEP)
2851 return nullptr;
2852
2853 NW = NW.intersectForReassociate(InnerGEP->getNoWrapFlags());
2854 if (InnerGEP->hasAllConstantIndices())
2855 break;
2856
2857 if (!InnerGEP->hasOneUse())
2858 return nullptr;
2859
2860 Skipped.push_back(InnerGEP);
2861 InnerGEP = dyn_cast<GetElementPtrInst>(InnerGEP->getPointerOperand());
2862 }
2863
2864 // The two constant offset GEPs are directly adjacent: Let normal offset
2865 // merging handle it.
2866 if (Skipped.empty())
2867 return nullptr;
2868
2869 // FIXME: This one-use check is not strictly necessary. Consider relaxing it
2870 // if profitable.
2871 if (!InnerGEP->hasOneUse())
2872 return nullptr;
2873
2874 // Don't bother with vector splats.
2875 Type *Ty = GEP.getType();
2876 if (InnerGEP->getType() != Ty)
2877 return nullptr;
2878
2879 const DataLayout &DL = IC.getDataLayout();
2880 APInt Offset(DL.getIndexTypeSizeInBits(Ty), 0);
2881 if (!GEP.accumulateConstantOffset(DL, Offset) ||
2882 !InnerGEP->accumulateConstantOffset(DL, Offset))
2883 return nullptr;
2884
2885 IC.replaceOperand(*Skipped.back(), 0, InnerGEP->getPointerOperand());
2886 for (GetElementPtrInst *SkippedGEP : Skipped)
2887 SkippedGEP->setNoWrapFlags(NW);
2888
2889 return IC.replaceInstUsesWith(
2890 GEP,
2891 IC.Builder.CreatePtrAdd(Skipped.front(), IC.Builder.getInt(Offset), "",
2892 NW.intersectForOffsetAdd(GEP.getNoWrapFlags())));
2893}
2894
2896 GEPOperator *Src) {
2897 // Combine Indices - If the source pointer to this getelementptr instruction
2898 // is a getelementptr instruction with matching element type, combine the
2899 // indices of the two getelementptr instructions into a single instruction.
2900 if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
2901 return nullptr;
2902
2903 if (auto *I = canonicalizeGEPOfConstGEPI8(GEP, Src, *this))
2904 return I;
2905
2906 if (auto *I = combineConstantOffsets(GEP, *this))
2907 return I;
2908
2909 if (Src->getResultElementType() != GEP.getSourceElementType())
2910 return nullptr;
2911
2912 // Fold chained GEP with constant base into single GEP:
2913 // gep i8, (gep i8, %base, C1), (select Cond, C2, C3)
2914 // -> gep i8, %base, (select Cond, C1+C2, C1+C3)
2915 if (Src->hasOneUse() && GEP.getNumIndices() == 1 &&
2916 Src->getNumIndices() == 1) {
2917 Value *SrcIdx = *Src->idx_begin();
2918 Value *GEPIdx = *GEP.idx_begin();
2919 const APInt *ConstOffset, *TrueVal, *FalseVal;
2920 Value *Cond;
2921
2922 if ((match(SrcIdx, m_APInt(ConstOffset)) &&
2923 match(GEPIdx,
2924 m_Select(m_Value(Cond), m_APInt(TrueVal), m_APInt(FalseVal)))) ||
2925 (match(GEPIdx, m_APInt(ConstOffset)) &&
2926 match(SrcIdx,
2927 m_Select(m_Value(Cond), m_APInt(TrueVal), m_APInt(FalseVal))))) {
2928 auto *Select = isa<SelectInst>(GEPIdx) ? cast<SelectInst>(GEPIdx)
2929 : cast<SelectInst>(SrcIdx);
2930
2931 // Make sure the select has only one use.
2932 if (!Select->hasOneUse())
2933 return nullptr;
2934
2935 if (TrueVal->getBitWidth() != ConstOffset->getBitWidth() ||
2936 FalseVal->getBitWidth() != ConstOffset->getBitWidth())
2937 return nullptr;
2938
2939 APInt NewTrueVal = *ConstOffset + *TrueVal;
2940 APInt NewFalseVal = *ConstOffset + *FalseVal;
2941 Constant *NewTrue = ConstantInt::get(Select->getType(), NewTrueVal);
2942 Constant *NewFalse = ConstantInt::get(Select->getType(), NewFalseVal);
2943 Value *NewSelect = Builder.CreateSelect(
2944 Cond, NewTrue, NewFalse, /*Name=*/"",
2945 /*MDFrom=*/(ProfcheckDisableMetadataFixes ? nullptr : Select));
2946 GEPNoWrapFlags Flags =
2948 return replaceInstUsesWith(GEP,
2949 Builder.CreateGEP(GEP.getResultElementType(),
2950 Src->getPointerOperand(),
2951 NewSelect, "", Flags));
2952 }
2953 }
2954
2955 // Find out whether the last index in the source GEP is a sequential idx.
2956 bool EndsWithSequential = false;
2957 for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
2958 I != E; ++I)
2959 EndsWithSequential = I.isSequential();
2960 if (!EndsWithSequential)
2961 return nullptr;
2962
2963 // Replace: gep (gep %P, long B), long A, ...
2964 // With: T = long A+B; gep %P, T, ...
2965 Value *SO1 = Src->getOperand(Src->getNumOperands() - 1);
2966 Value *GO1 = GEP.getOperand(1);
2967
2968 // If they aren't the same type, then the input hasn't been processed
2969 // by the loop above yet (which canonicalizes sequential index types to
2970 // intptr_t). Just avoid transforming this until the input has been
2971 // normalized.
2972 if (SO1->getType() != GO1->getType())
2973 return nullptr;
2974
2975 Value *Sum =
2976 simplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP));
2977 // Only do the combine when we are sure the cost after the
2978 // merge is never more than that before the merge.
2979 if (Sum == nullptr)
2980 return nullptr;
2981
2983 Indices.append(Src->op_begin() + 1, Src->op_end() - 1);
2984 Indices.push_back(Sum);
2985 Indices.append(GEP.op_begin() + 2, GEP.op_end());
2986
2987 // Don't create GEPs with more than one non-zero index.
2988 unsigned NumNonZeroIndices = count_if(Indices, [](Value *Idx) {
2989 auto *C = dyn_cast<Constant>(Idx);
2990 return !C || !C->isNullValue();
2991 });
2992 if (NumNonZeroIndices > 1)
2993 return nullptr;
2994
2995 return replaceInstUsesWith(
2996 GEP, Builder.CreateGEP(
2997 Src->getSourceElementType(), Src->getOperand(0), Indices, "",
2999}
3000
3003 bool &DoesConsume, unsigned Depth) {
3004 static Value *const NonNull = reinterpret_cast<Value *>(uintptr_t(1));
3005 // ~(~(X)) -> X.
3006 Value *A, *B;
3007 if (match(V, m_Not(m_Value(A)))) {
3008 DoesConsume = true;
3009 return A;
3010 }
3011
3012 Constant *C;
3013 // Constants can be considered to be not'ed values.
3014 if (match(V, m_ImmConstant(C)))
3015 return ConstantExpr::getNot(C);
3016
3018 return nullptr;
3019
3020 // The rest of the cases require that we invert all uses so don't bother
3021 // doing the analysis if we know we can't use the result.
3022 if (!WillInvertAllUses)
3023 return nullptr;
3024
3025 // Compares can be inverted if all of their uses are being modified to use
3026 // the ~V.
3027 if (auto *I = dyn_cast<CmpInst>(V)) {
3028 if (Builder != nullptr)
3029 return Builder->CreateCmp(I->getInversePredicate(), I->getOperand(0),
3030 I->getOperand(1));
3031 return NonNull;
3032 }
3033
3034 // If `V` is of the form `A + B` then `-1 - V` can be folded into
3035 // `(-1 - B) - A` if we are willing to invert all of the uses.
3036 if (match(V, m_Add(m_Value(A), m_Value(B)))) {
3037 if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
3038 DoesConsume, Depth))
3039 return Builder ? Builder->CreateSub(BV, A) : NonNull;
3040 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3041 DoesConsume, Depth))
3042 return Builder ? Builder->CreateSub(AV, B) : NonNull;
3043 return nullptr;
3044 }
3045
3046 // If `V` is of the form `A ^ ~B` then `~(A ^ ~B)` can be folded
3047 // into `A ^ B` if we are willing to invert all of the uses.
3048 if (match(V, m_Xor(m_Value(A), m_Value(B)))) {
3049 if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
3050 DoesConsume, Depth))
3051 return Builder ? Builder->CreateXor(A, BV) : NonNull;
3052 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3053 DoesConsume, Depth))
3054 return Builder ? Builder->CreateXor(AV, B) : NonNull;
3055 return nullptr;
3056 }
3057
3058 // If `V` is of the form `B - A` then `-1 - V` can be folded into
3059 // `A + (-1 - B)` if we are willing to invert all of the uses.
3060 if (match(V, m_Sub(m_Value(A), m_Value(B)))) {
3061 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3062 DoesConsume, Depth))
3063 return Builder ? Builder->CreateAdd(AV, B) : NonNull;
3064 return nullptr;
3065 }
3066
3067 // If `V` is of the form `(~A) s>> B` then `~((~A) s>> B)` can be folded
3068 // into `A s>> B` if we are willing to invert all of the uses.
3069 if (match(V, m_AShr(m_Value(A), m_Value(B)))) {
3070 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3071 DoesConsume, Depth))
3072 return Builder ? Builder->CreateAShr(AV, B) : NonNull;
3073 return nullptr;
3074 }
3075
3076 Value *Cond;
3077 // LogicOps are special in that we canonicalize them at the cost of an
3078 // instruction.
3079 bool IsSelect = match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))) &&
3081 // Selects/min/max with invertible operands are freely invertible
3082 if (IsSelect || match(V, m_MaxOrMin(m_Value(A), m_Value(B)))) {
3083 bool LocalDoesConsume = DoesConsume;
3084 if (!getFreelyInvertedImpl(B, B->hasOneUse(), /*Builder*/ nullptr,
3085 LocalDoesConsume, Depth))
3086 return nullptr;
3087 if (Value *NotA = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3088 LocalDoesConsume, Depth)) {
3089 DoesConsume = LocalDoesConsume;
3090 if (Builder != nullptr) {
3091 Value *NotB = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
3092 DoesConsume, Depth);
3093 assert(NotB != nullptr &&
3094 "Unable to build inverted value for known freely invertable op");
3095 if (auto *II = dyn_cast<IntrinsicInst>(V))
3096 return Builder->CreateBinaryIntrinsic(
3097 getInverseMinMaxIntrinsic(II->getIntrinsicID()), NotA, NotB);
3098 return Builder->CreateSelect(
3099 Cond, NotA, NotB, "",
3101 }
3102 return NonNull;
3103 }
3104 }
3105
3106 if (PHINode *PN = dyn_cast<PHINode>(V)) {
3107 bool LocalDoesConsume = DoesConsume;
3109 for (Use &U : PN->operands()) {
3110 BasicBlock *IncomingBlock = PN->getIncomingBlock(U);
3111 Value *NewIncomingVal = getFreelyInvertedImpl(
3112 U.get(), /*WillInvertAllUses=*/false,
3113 /*Builder=*/nullptr, LocalDoesConsume, MaxAnalysisRecursionDepth - 1);
3114 if (NewIncomingVal == nullptr)
3115 return nullptr;
3116 // Make sure that we can safely erase the original PHI node.
3117 if (NewIncomingVal == V)
3118 return nullptr;
3119 if (Builder != nullptr)
3120 IncomingValues.emplace_back(NewIncomingVal, IncomingBlock);
3121 }
3122
3123 DoesConsume = LocalDoesConsume;
3124 if (Builder != nullptr) {
3126 Builder->SetInsertPoint(PN);
3127 PHINode *NewPN =
3128 Builder->CreatePHI(PN->getType(), PN->getNumIncomingValues());
3129 for (auto [Val, Pred] : IncomingValues)
3130 NewPN->addIncoming(Val, Pred);
3131 return NewPN;
3132 }
3133 return NonNull;
3134 }
3135
3136 if (match(V, m_SExtLike(m_Value(A)))) {
3137 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3138 DoesConsume, Depth))
3139 return Builder ? Builder->CreateSExt(AV, V->getType()) : NonNull;
3140 return nullptr;
3141 }
3142
3143 if (match(V, m_Trunc(m_Value(A)))) {
3144 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3145 DoesConsume, Depth))
3146 return Builder ? Builder->CreateTrunc(AV, V->getType()) : NonNull;
3147 return nullptr;
3148 }
3149
3150 // De Morgan's Laws:
3151 // (~(A | B)) -> (~A & ~B)
3152 // (~(A & B)) -> (~A | ~B)
3153 auto TryInvertAndOrUsingDeMorgan = [&](Instruction::BinaryOps Opcode,
3154 bool IsLogical, Value *A,
3155 Value *B) -> Value * {
3156 bool LocalDoesConsume = DoesConsume;
3157 if (!getFreelyInvertedImpl(B, B->hasOneUse(), /*Builder=*/nullptr,
3158 LocalDoesConsume, Depth))
3159 return nullptr;
3160 if (auto *NotA = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3161 LocalDoesConsume, Depth)) {
3162 auto *NotB = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
3163 LocalDoesConsume, Depth);
3164 DoesConsume = LocalDoesConsume;
3165 if (IsLogical)
3166 return Builder ? Builder->CreateLogicalOp(Opcode, NotA, NotB) : NonNull;
3167 return Builder ? Builder->CreateBinOp(Opcode, NotA, NotB) : NonNull;
3168 }
3169
3170 return nullptr;
3171 };
3172
3173 if (match(V, m_Or(m_Value(A), m_Value(B))))
3174 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/false, A,
3175 B);
3176
3177 if (match(V, m_And(m_Value(A), m_Value(B))))
3178 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/false, A,
3179 B);
3180
3181 if (match(V, m_LogicalOr(m_Value(A), m_Value(B))))
3182 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/true, A,
3183 B);
3184
3185 if (match(V, m_LogicalAnd(m_Value(A), m_Value(B))))
3186 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/true, A,
3187 B);
3188
3189 return nullptr;
3190}
3191
3192/// Return true if we should canonicalize the gep to an i8 ptradd.
3194 Value *PtrOp = GEP.getOperand(0);
3195 Type *GEPEltType = GEP.getSourceElementType();
3196 if (GEPEltType->isIntegerTy(8))
3197 return false;
3198
3199 // Canonicalize scalable GEPs to an explicit offset using the llvm.vscale
3200 // intrinsic. This has better support in BasicAA.
3201 if (GEPEltType->isScalableTy())
3202 return true;
3203
3204 // gep i32 p, mul(O, C) -> gep i8, p, mul(O, C*4) to fold the two multiplies
3205 // together.
3206 if (GEP.getNumIndices() == 1 &&
3207 match(GEP.getOperand(1),
3209 m_Shl(m_Value(), m_ConstantInt())))))
3210 return true;
3211
3212 // gep (gep %p, C1), %x, C2 is expanded so the two constants can
3213 // possibly be merged together.
3214 auto PtrOpGep = dyn_cast<GEPOperator>(PtrOp);
3215 return PtrOpGep && PtrOpGep->hasAllConstantIndices() &&
3216 any_of(GEP.indices(), [](Value *V) {
3217 const APInt *C;
3218 return match(V, m_APInt(C)) && !C->isZero();
3219 });
3220}
3221
3223 IRBuilderBase &Builder) {
3224 auto *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
3225 if (!Op1)
3226 return nullptr;
3227
3228 // Don't fold a GEP into itself through a PHI node. This can only happen
3229 // through the back-edge of a loop. Folding a GEP into itself means that
3230 // the value of the previous iteration needs to be stored in the meantime,
3231 // thus requiring an additional register variable to be live, but not
3232 // actually achieving anything (the GEP still needs to be executed once per
3233 // loop iteration).
3234 if (Op1 == &GEP)
3235 return nullptr;
3236 GEPNoWrapFlags NW = Op1->getNoWrapFlags();
3237
3238 int DI = -1;
3239
3240 for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
3241 auto *Op2 = dyn_cast<GetElementPtrInst>(*I);
3242 if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() ||
3243 Op1->getSourceElementType() != Op2->getSourceElementType())
3244 return nullptr;
3245
3246 // As for Op1 above, don't try to fold a GEP into itself.
3247 if (Op2 == &GEP)
3248 return nullptr;
3249
3250 // Keep track of the type as we walk the GEP.
3251 Type *CurTy = nullptr;
3252
3253 for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
3254 if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType())
3255 return nullptr;
3256
3257 if (Op1->getOperand(J) != Op2->getOperand(J)) {
3258 if (DI == -1) {
3259 // We have not seen any differences yet in the GEPs feeding the
3260 // PHI yet, so we record this one if it is allowed to be a
3261 // variable.
3262
3263 // The first two arguments can vary for any GEP, the rest have to be
3264 // static for struct slots
3265 if (J > 1) {
3266 assert(CurTy && "No current type?");
3267 if (CurTy->isStructTy())
3268 return nullptr;
3269 }
3270
3271 DI = J;
3272 } else {
3273 // The GEP is different by more than one input. While this could be
3274 // extended to support GEPs that vary by more than one variable it
3275 // doesn't make sense since it greatly increases the complexity and
3276 // would result in an R+R+R addressing mode which no backend
3277 // directly supports and would need to be broken into several
3278 // simpler instructions anyway.
3279 return nullptr;
3280 }
3281 }
3282
3283 // Sink down a layer of the type for the next iteration.
3284 if (J > 0) {
3285 if (J == 1) {
3286 CurTy = Op1->getSourceElementType();
3287 } else {
3288 CurTy =
3289 GetElementPtrInst::getTypeAtIndex(CurTy, Op1->getOperand(J));
3290 }
3291 }
3292 }
3293
3294 NW &= Op2->getNoWrapFlags();
3295 }
3296
3297 // If not all GEPs are identical we'll have to create a new PHI node.
3298 // Check that the old PHI node has only one use so that it will get
3299 // removed.
3300 if (DI != -1 && !PN->hasOneUse())
3301 return nullptr;
3302
3303 auto *NewGEP = cast<GetElementPtrInst>(Op1->clone());
3304 NewGEP->setNoWrapFlags(NW);
3305
3306 if (DI == -1) {
3307 // All the GEPs feeding the PHI are identical. Clone one down into our
3308 // BB so that it can be merged with the current GEP.
3309 } else {
3310 // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
3311 // into the current block so it can be merged, and create a new PHI to
3312 // set that index.
3313 PHINode *NewPN;
3314 {
3315 IRBuilderBase::InsertPointGuard Guard(Builder);
3316 Builder.SetInsertPoint(PN);
3317 NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(),
3318 PN->getNumOperands());
3319 }
3320
3321 for (auto &I : PN->operands())
3322 NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
3323 PN->getIncomingBlock(I));
3324
3325 NewGEP->setOperand(DI, NewPN);
3326 }
3327
3328 NewGEP->insertBefore(*GEP.getParent(), GEP.getParent()->getFirstInsertionPt());
3329 return NewGEP;
3330}
3331
3333 Value *PtrOp = GEP.getOperand(0);
3334 SmallVector<Value *, 8> Indices(GEP.indices());
3335 Type *GEPType = GEP.getType();
3336 Type *GEPEltType = GEP.getSourceElementType();
3337 if (Value *V =
3338 simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.getNoWrapFlags(),
3339 SQ.getWithInstruction(&GEP)))
3340 return replaceInstUsesWith(GEP, V);
3341
3342 // For vector geps, use the generic demanded vector support.
3343 // Skip if GEP return type is scalable. The number of elements is unknown at
3344 // compile-time.
3345 if (auto *GEPFVTy = dyn_cast<FixedVectorType>(GEPType)) {
3346 auto VWidth = GEPFVTy->getNumElements();
3347 APInt PoisonElts(VWidth, 0);
3348 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
3349 if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
3350 PoisonElts)) {
3351 if (V != &GEP)
3352 return replaceInstUsesWith(GEP, V);
3353 return &GEP;
3354 }
3355 }
3356
3357 // Eliminate unneeded casts for indices, and replace indices which displace
3358 // by multiples of a zero size type with zero.
3359 bool MadeChange = false;
3360
3361 // Index width may not be the same width as pointer width.
3362 // Data layout chooses the right type based on supported integer types.
3363 Type *NewScalarIndexTy =
3364 DL.getIndexType(GEP.getPointerOperandType()->getScalarType());
3365
3367 for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
3368 ++I, ++GTI) {
3369 // Skip indices into struct types.
3370 if (GTI.isStruct())
3371 continue;
3372
3373 Type *IndexTy = (*I)->getType();
3374 Type *NewIndexType =
3375 IndexTy->isVectorTy()
3376 ? VectorType::get(NewScalarIndexTy,
3377 cast<VectorType>(IndexTy)->getElementCount())
3378 : NewScalarIndexTy;
3379
3380 // If the element type has zero size then any index over it is equivalent
3381 // to an index of zero, so replace it with zero if it is not zero already.
3382 Type *EltTy = GTI.getIndexedType();
3383 if (EltTy->isSized() && DL.getTypeAllocSize(EltTy).isZero())
3384 if (!isa<Constant>(*I) || !match(I->get(), m_Zero())) {
3385 *I = Constant::getNullValue(NewIndexType);
3386 MadeChange = true;
3387 }
3388
3389 if (IndexTy != NewIndexType) {
3390 // If we are using a wider index than needed for this platform, shrink
3391 // it to what we need. If narrower, sign-extend it to what we need.
3392 // This explicit cast can make subsequent optimizations more obvious.
3393 if (IndexTy->getScalarSizeInBits() <
3394 NewIndexType->getScalarSizeInBits()) {
3395 if (GEP.hasNoUnsignedWrap() && GEP.hasNoUnsignedSignedWrap())
3396 *I = Builder.CreateZExt(*I, NewIndexType, "", /*IsNonNeg=*/true);
3397 else
3398 *I = Builder.CreateSExt(*I, NewIndexType);
3399 } else {
3400 *I = Builder.CreateTrunc(*I, NewIndexType, "", GEP.hasNoUnsignedWrap(),
3401 GEP.hasNoUnsignedSignedWrap());
3402 }
3403 MadeChange = true;
3404 }
3405 }
3406 if (MadeChange)
3407 return &GEP;
3408
3409 // Canonicalize constant GEPs to i8 type.
3410 if (!GEPEltType->isIntegerTy(8) && GEP.hasAllConstantIndices()) {
3411 APInt Offset(DL.getIndexTypeSizeInBits(GEPType), 0);
3412 if (GEP.accumulateConstantOffset(DL, Offset))
3413 return replaceInstUsesWith(
3414 GEP, Builder.CreatePtrAdd(PtrOp, Builder.getInt(Offset), "",
3415 GEP.getNoWrapFlags()));
3416 }
3417
3419 Value *Offset = EmitGEPOffset(cast<GEPOperator>(&GEP));
3420 Value *NewGEP =
3421 Builder.CreatePtrAdd(PtrOp, Offset, "", GEP.getNoWrapFlags());
3422 return replaceInstUsesWith(GEP, NewGEP);
3423 }
3424
3425 // Strip trailing zero indices.
3426 auto *LastIdx = dyn_cast<Constant>(Indices.back());
3427 if (LastIdx && LastIdx->isNullValue() && !LastIdx->getType()->isVectorTy()) {
3428 return replaceInstUsesWith(
3429 GEP, Builder.CreateGEP(GEP.getSourceElementType(), PtrOp,
3430 drop_end(Indices), "", GEP.getNoWrapFlags()));
3431 }
3432
3433 // Strip leading zero indices.
3434 auto *FirstIdx = dyn_cast<Constant>(Indices.front());
3435 if (FirstIdx && FirstIdx->isNullValue() &&
3436 !FirstIdx->getType()->isVectorTy()) {
3438 ++GTI;
3439 if (!GTI.isStruct() && GTI.getSequentialElementStride(DL) ==
3440 DL.getTypeAllocSize(GTI.getIndexedType()))
3441 return replaceInstUsesWith(GEP, Builder.CreateGEP(GTI.getIndexedType(),
3442 GEP.getPointerOperand(),
3443 drop_begin(Indices), "",
3444 GEP.getNoWrapFlags()));
3445 }
3446
3447 // Scalarize vector operands; prefer splat-of-gep.as canonical form.
3448 // Note that this looses information about undef lanes; we run it after
3449 // demanded bits to partially mitigate that loss.
3450 if (GEPType->isVectorTy() && llvm::any_of(GEP.operands(), [](Value *Op) {
3451 return Op->getType()->isVectorTy() && getSplatValue(Op);
3452 })) {
3453 SmallVector<Value *> NewOps;
3454 for (auto &Op : GEP.operands()) {
3455 if (Op->getType()->isVectorTy())
3456 if (Value *Scalar = getSplatValue(Op)) {
3457 NewOps.push_back(Scalar);
3458 continue;
3459 }
3460 NewOps.push_back(Op);
3461 }
3462
3463 Value *Res = Builder.CreateGEP(GEP.getSourceElementType(), NewOps[0],
3464 ArrayRef(NewOps).drop_front(), GEP.getName(),
3465 GEP.getNoWrapFlags());
3466 if (!Res->getType()->isVectorTy()) {
3467 ElementCount EC = cast<VectorType>(GEPType)->getElementCount();
3468 Res = Builder.CreateVectorSplat(EC, Res);
3469 }
3470 return replaceInstUsesWith(GEP, Res);
3471 }
3472
3473 bool SeenNonZeroIndex = false;
3474 for (auto [IdxNum, Idx] : enumerate(Indices)) {
3475 // Ignore one leading zero index.
3476 auto *C = dyn_cast<Constant>(Idx);
3477 if (C && C->isNullValue() && IdxNum == 0)
3478 continue;
3479
3480 if (!SeenNonZeroIndex) {
3481 SeenNonZeroIndex = true;
3482 continue;
3483 }
3484
3485 // GEP has multiple non-zero indices: Split it.
3486 ArrayRef<Value *> FrontIndices = ArrayRef(Indices).take_front(IdxNum);
3487 Value *FrontGEP =
3488 Builder.CreateGEP(GEPEltType, PtrOp, FrontIndices,
3489 GEP.getName() + ".split", GEP.getNoWrapFlags());
3490
3491 SmallVector<Value *> BackIndices;
3492 BackIndices.push_back(Constant::getNullValue(NewScalarIndexTy));
3493 append_range(BackIndices, drop_begin(Indices, IdxNum));
3495 GetElementPtrInst::getIndexedType(GEPEltType, FrontIndices), FrontGEP,
3496 BackIndices, GEP.getNoWrapFlags());
3497 }
3498
3499 // Canonicalize gep %T to gep [sizeof(%T) x i8]:
3500 auto IsCanonicalType = [](Type *Ty) {
3501 if (auto *AT = dyn_cast<ArrayType>(Ty))
3502 Ty = AT->getElementType();
3503 return Ty->isIntegerTy(8);
3504 };
3505 if (Indices.size() == 1 && !IsCanonicalType(GEPEltType)) {
3506 TypeSize Scale = DL.getTypeAllocSize(GEPEltType);
3507 assert(!Scale.isScalable() && "Should have been handled earlier");
3508 Type *NewElemTy = Builder.getInt8Ty();
3509 if (Scale.getFixedValue() != 1)
3510 NewElemTy = ArrayType::get(NewElemTy, Scale.getFixedValue());
3511 GEP.setSourceElementType(NewElemTy);
3512 GEP.setResultElementType(NewElemTy);
3513 // Don't bother revisiting the GEP after this change.
3514 MadeIRChange = true;
3515 }
3516
3517 // Check to see if the inputs to the PHI node are getelementptr instructions.
3518 if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
3519 if (Value *NewPtrOp = foldGEPOfPhi(GEP, PN, Builder))
3520 return replaceOperand(GEP, 0, NewPtrOp);
3521 }
3522
3523 if (auto *Src = dyn_cast<GEPOperator>(PtrOp))
3524 if (Instruction *I = visitGEPOfGEP(GEP, Src))
3525 return I;
3526
3527 if (GEP.getNumIndices() == 1) {
3528 unsigned AS = GEP.getPointerAddressSpace();
3529 if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
3530 DL.getIndexSizeInBits(AS)) {
3531 uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedValue();
3532
3533 if (TyAllocSize == 1) {
3534 // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y),
3535 // but only if the result pointer is only used as if it were an integer.
3536 // (The case where the underlying object is the same is handled by
3537 // InstSimplify.)
3538 Value *X = GEP.getPointerOperand();
3539 Value *Y;
3540 if (match(GEP.getOperand(1), m_Sub(m_PtrToIntOrAddr(m_Value(Y)),
3542 GEPType == Y->getType()) {
3543 bool HasNonAddressBits =
3544 DL.getAddressSizeInBits(AS) != DL.getPointerSizeInBits(AS);
3545 bool Changed = GEP.replaceUsesWithIf(Y, [&](Use &U) {
3546 return isa<PtrToAddrInst, ICmpInst>(U.getUser()) ||
3547 (!HasNonAddressBits && isa<PtrToIntInst>(U.getUser()));
3548 });
3549 return Changed ? &GEP : nullptr;
3550 }
3551 } else if (auto *ExactIns =
3552 dyn_cast<PossiblyExactOperator>(GEP.getOperand(1))) {
3553 // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V)
3554 Value *V;
3555 if (ExactIns->isExact()) {
3556 if ((has_single_bit(TyAllocSize) &&
3557 match(GEP.getOperand(1),
3558 m_Shr(m_Value(V),
3559 m_SpecificInt(countr_zero(TyAllocSize))))) ||
3560 match(GEP.getOperand(1),
3561 m_IDiv(m_Value(V), m_SpecificInt(TyAllocSize)))) {
3562 return GetElementPtrInst::Create(Builder.getInt8Ty(),
3563 GEP.getPointerOperand(), V,
3564 GEP.getNoWrapFlags());
3565 }
3566 }
3567 if (ExactIns->isExact() && ExactIns->hasOneUse()) {
3568 // Try to canonicalize non-i8 element type to i8 if the index is an
3569 // exact instruction. If the index is an exact instruction (div/shr)
3570 // with a constant RHS, we can fold the non-i8 element scale into the
3571 // div/shr (similiar to the mul case, just inverted).
3572 const APInt *C;
3573 std::optional<APInt> NewC;
3574 if (has_single_bit(TyAllocSize) &&
3575 match(ExactIns, m_Shr(m_Value(V), m_APInt(C))) &&
3576 C->uge(countr_zero(TyAllocSize)))
3577 NewC = *C - countr_zero(TyAllocSize);
3578 else if (match(ExactIns, m_UDiv(m_Value(V), m_APInt(C)))) {
3579 APInt Quot;
3580 uint64_t Rem;
3581 APInt::udivrem(*C, TyAllocSize, Quot, Rem);
3582 if (Rem == 0)
3583 NewC = Quot;
3584 } else if (match(ExactIns, m_SDiv(m_Value(V), m_APInt(C)))) {
3585 APInt Quot;
3586 int64_t Rem;
3587 APInt::sdivrem(*C, TyAllocSize, Quot, Rem);
3588 // For sdiv we need to make sure we arent creating INT_MIN / -1.
3589 if (!Quot.isAllOnes() && Rem == 0)
3590 NewC = Quot;
3591 }
3592
3593 if (NewC.has_value()) {
3594 Value *NewOp = Builder.CreateExactBinOp(
3595 static_cast<Instruction::BinaryOps>(ExactIns->getOpcode()), V,
3596 ConstantInt::get(V->getType(), *NewC), /*IsExact=*/true);
3597 return GetElementPtrInst::Create(Builder.getInt8Ty(),
3598 GEP.getPointerOperand(), NewOp,
3599 GEP.getNoWrapFlags());
3600 }
3601 }
3602 }
3603 }
3604 }
3605 // We do not handle pointer-vector geps here.
3606 if (GEPType->isVectorTy())
3607 return nullptr;
3608
3609 if (!GEP.isInBounds()) {
3610 unsigned IdxWidth =
3611 DL.getIndexSizeInBits(PtrOp->getType()->getPointerAddressSpace());
3612 APInt BasePtrOffset(IdxWidth, 0);
3613 Value *UnderlyingPtrOp =
3614 PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL, BasePtrOffset);
3615 bool CanBeNull, CanBeFreed;
3616 uint64_t DerefBytes = UnderlyingPtrOp->getPointerDereferenceableBytes(
3617 DL, CanBeNull, CanBeFreed);
3618 // We can ignore CanBeFreed here, because inbounds is explicitly allowed to
3619 // refer to a deallocated object.
3620 if (!CanBeNull && DerefBytes != 0) {
3621 if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
3622 BasePtrOffset.isNonNegative()) {
3623 APInt AllocSize(IdxWidth, DerefBytes);
3624 if (BasePtrOffset.ule(AllocSize)) {
3626 GEP.getSourceElementType(), PtrOp, Indices, GEP.getName());
3627 }
3628 }
3629 }
3630 }
3631
3632 // nusw + nneg -> nuw
3633 if (GEP.hasNoUnsignedSignedWrap() && !GEP.hasNoUnsignedWrap() &&
3634 all_of(GEP.indices(), [&](Value *Idx) {
3635 return isKnownNonNegative(Idx, SQ.getWithInstruction(&GEP));
3636 })) {
3637 GEP.setNoWrapFlags(GEP.getNoWrapFlags() | GEPNoWrapFlags::noUnsignedWrap());
3638 return &GEP;
3639 }
3640
3641 // These rewrites are trying to preserve inbounds/nuw attributes. So we want
3642 // to do this after having tried to derive "nuw" above.
3643 if (GEP.getNumIndices() == 1) {
3644 // Given (gep p, x+y) we want to determine the common nowrap flags for both
3645 // geps if transforming into (gep (gep p, x), y).
3646 auto GetPreservedNoWrapFlags = [&](bool AddIsNUW) {
3647 // We can preserve both "inbounds nuw", "nusw nuw" and "nuw" if we know
3648 // that x + y does not have unsigned wrap.
3649 if (GEP.hasNoUnsignedWrap() && AddIsNUW)
3650 return GEP.getNoWrapFlags();
3651 return GEPNoWrapFlags::none();
3652 };
3653
3654 // Try to replace ADD + GEP with GEP + GEP.
3655 Value *Idx1, *Idx2;
3656 if (match(GEP.getOperand(1),
3657 m_OneUse(m_AddLike(m_Value(Idx1), m_Value(Idx2))))) {
3658 // %idx = add i64 %idx1, %idx2
3659 // %gep = getelementptr i32, ptr %ptr, i64 %idx
3660 // as:
3661 // %newptr = getelementptr i32, ptr %ptr, i64 %idx1
3662 // %newgep = getelementptr i32, ptr %newptr, i64 %idx2
3663 bool NUW = match(GEP.getOperand(1), m_NUWAddLike(m_Value(), m_Value()));
3664 GEPNoWrapFlags NWFlags = GetPreservedNoWrapFlags(NUW);
3665 auto *NewPtr =
3666 Builder.CreateGEP(GEP.getSourceElementType(), GEP.getPointerOperand(),
3667 Idx1, "", NWFlags);
3668 return replaceInstUsesWith(GEP,
3669 Builder.CreateGEP(GEP.getSourceElementType(),
3670 NewPtr, Idx2, "", NWFlags));
3671 }
3672 ConstantInt *C;
3673 if (match(GEP.getOperand(1), m_OneUse(m_SExtLike(m_OneUse(m_NSWAddLike(
3674 m_Value(Idx1), m_ConstantInt(C))))))) {
3675 // %add = add nsw i32 %idx1, idx2
3676 // %sidx = sext i32 %add to i64
3677 // %gep = getelementptr i32, ptr %ptr, i64 %sidx
3678 // as:
3679 // %newptr = getelementptr i32, ptr %ptr, i32 %idx1
3680 // %newgep = getelementptr i32, ptr %newptr, i32 idx2
3681 bool NUW = match(GEP.getOperand(1),
3683 GEPNoWrapFlags NWFlags = GetPreservedNoWrapFlags(NUW);
3684 auto *NewPtr = Builder.CreateGEP(
3685 GEP.getSourceElementType(), GEP.getPointerOperand(),
3686 Builder.CreateSExt(Idx1, GEP.getOperand(1)->getType()), "", NWFlags);
3687 return replaceInstUsesWith(
3688 GEP,
3689 Builder.CreateGEP(GEP.getSourceElementType(), NewPtr,
3690 Builder.CreateSExt(C, GEP.getOperand(1)->getType()),
3691 "", NWFlags));
3692 }
3693 }
3694
3696 return R;
3697
3698 // srem -> (and/urem) for inbounds+nuw GEP
3699 if (Indices.size() == 1 && GEP.isInBounds() && GEP.hasNoUnsignedWrap()) {
3700 Value *X, *Y;
3701
3702 // Match: idx = srem X, Y -- where Y is a power-of-two value.
3703 if (match(Indices[0], m_OneUse(m_SRem(m_Value(X), m_Value(Y)))) &&
3704 isKnownToBeAPowerOfTwo(Y, /*OrZero=*/true, &GEP)) {
3705 // If GEP is inbounds+nuw, the offset cannot be negative
3706 // -> srem by power-of-two can be treated as urem,
3707 // and urem by power-of-two folds to 'and' later.
3708 // OrZero=true is fine here because division by zero is UB.
3709 Instruction *OldIdxI = cast<Instruction>(Indices[0]);
3710 Value *NewIdx = Builder.CreateURem(X, Y, OldIdxI->getName());
3711
3712 return GetElementPtrInst::Create(GEPEltType, PtrOp, {NewIdx},
3713 GEP.getNoWrapFlags());
3714 }
3715 }
3716
3717 return nullptr;
3718}
3719
3721 Instruction *AI) {
3723 return true;
3724 if (auto *LI = dyn_cast<LoadInst>(V))
3725 return isa<GlobalVariable>(LI->getPointerOperand());
3726 // Two distinct allocations will never be equal.
3727 return isAllocLikeFn(V, &TLI) && V != AI;
3728}
3729
3730/// Given a call CB which uses an address UsedV, return true if we can prove the
3731/// call's only possible effect is storing to V.
3732static bool isRemovableWrite(CallBase &CB, Value *UsedV,
3733 const TargetLibraryInfo &TLI) {
3734 if (!CB.use_empty())
3735 // TODO: add recursion if returned attribute is present
3736 return false;
3737
3738 if (CB.isTerminator())
3739 // TODO: remove implementation restriction
3740 return false;
3741
3742 if (!CB.willReturn() || !CB.doesNotThrow())
3743 return false;
3744
3745 // If the only possible side effect of the call is writing to the alloca,
3746 // and the result isn't used, we can safely remove any reads implied by the
3747 // call including those which might read the alloca itself.
3748 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(&CB, TLI);
3749 return Dest && Dest->Ptr == UsedV;
3750}
3751
3752static std::optional<ModRefInfo>
3754 const TargetLibraryInfo &TLI, bool KnowInit) {
3756 const std::optional<StringRef> Family = getAllocationFamily(AI, &TLI);
3757 Worklist.push_back(AI);
3759
3760 do {
3761 Instruction *PI = Worklist.pop_back_val();
3762 for (User *U : PI->users()) {
3764 if (Users.size() >= MaxAllocSiteRemovableUsers)
3765 return std::nullopt;
3766 switch (I->getOpcode()) {
3767 default:
3768 // Give up the moment we see something we can't handle.
3769 return std::nullopt;
3770
3771 case Instruction::AddrSpaceCast:
3772 case Instruction::BitCast:
3773 case Instruction::GetElementPtr:
3774 Users.emplace_back(I);
3775 Worklist.push_back(I);
3776 continue;
3777
3778 case Instruction::ICmp: {
3779 ICmpInst *ICI = cast<ICmpInst>(I);
3780 // We can fold eq/ne comparisons with null to false/true, respectively.
3781 // We also fold comparisons in some conditions provided the alloc has
3782 // not escaped (see isNeverEqualToUnescapedAlloc).
3783 if (!ICI->isEquality())
3784 return std::nullopt;
3785 unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0;
3786 if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))
3787 return std::nullopt;
3788
3789 // Do not fold compares to aligned_alloc calls, as they may have to
3790 // return null in case the required alignment cannot be satisfied,
3791 // unless we can prove that both alignment and size are valid.
3792 auto AlignmentAndSizeKnownValid = [](CallBase *CB) {
3793 // Check if alignment and size of a call to aligned_alloc is valid,
3794 // that is alignment is a power-of-2 and the size is a multiple of the
3795 // alignment.
3796 const APInt *Alignment;
3797 const APInt *Size;
3798 return match(CB->getArgOperand(0), m_APInt(Alignment)) &&
3799 match(CB->getArgOperand(1), m_APInt(Size)) &&
3800 Alignment->isPowerOf2() && Size->urem(*Alignment).isZero();
3801 };
3802 auto *CB = dyn_cast<CallBase>(AI);
3803 LibFunc TheLibFunc;
3804 if (CB && TLI.getLibFunc(*CB->getCalledFunction(), TheLibFunc) &&
3805 TLI.has(TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc &&
3806 !AlignmentAndSizeKnownValid(CB))
3807 return std::nullopt;
3808 Users.emplace_back(I);
3809 continue;
3810 }
3811
3812 case Instruction::Call:
3813 // Ignore no-op and store intrinsics.
3815 switch (II->getIntrinsicID()) {
3816 default:
3817 return std::nullopt;
3818
3819 case Intrinsic::memmove:
3820 case Intrinsic::memcpy:
3821 case Intrinsic::memset: {
3823 if (MI->isVolatile())
3824 return std::nullopt;
3825 // Note: this could also be ModRef, but we can still interpret that
3826 // as just Mod in that case.
3827 ModRefInfo NewAccess =
3828 MI->getRawDest() == PI ? ModRefInfo::Mod : ModRefInfo::Ref;
3829 if ((Access & ~NewAccess) != ModRefInfo::NoModRef)
3830 return std::nullopt;
3831 Access |= NewAccess;
3832 [[fallthrough]];
3833 }
3834 case Intrinsic::assume:
3835 case Intrinsic::invariant_start:
3836 case Intrinsic::invariant_end:
3837 case Intrinsic::lifetime_start:
3838 case Intrinsic::lifetime_end:
3839 case Intrinsic::objectsize:
3840 Users.emplace_back(I);
3841 continue;
3842 case Intrinsic::launder_invariant_group:
3843 case Intrinsic::strip_invariant_group:
3844 Users.emplace_back(I);
3845 Worklist.push_back(I);
3846 continue;
3847 }
3848 }
3849
3850 if (Family && getFreedOperand(cast<CallBase>(I), &TLI) == PI &&
3851 getAllocationFamily(I, &TLI) == Family) {
3852 Users.emplace_back(I);
3853 continue;
3854 }
3855
3856 if (Family && getReallocatedOperand(cast<CallBase>(I)) == PI &&
3857 getAllocationFamily(I, &TLI) == Family) {
3858 Users.emplace_back(I);
3859 Worklist.push_back(I);
3860 continue;
3861 }
3862
3863 if (!isRefSet(Access) &&
3864 isRemovableWrite(*cast<CallBase>(I), PI, TLI)) {
3866 Users.emplace_back(I);
3867 continue;
3868 }
3869
3870 return std::nullopt;
3871
3872 case Instruction::Store: {
3874 if (SI->isVolatile() || SI->getPointerOperand() != PI)
3875 return std::nullopt;
3876 if (isRefSet(Access))
3877 return std::nullopt;
3879 Users.emplace_back(I);
3880 continue;
3881 }
3882
3883 case Instruction::Load: {
3884 LoadInst *LI = cast<LoadInst>(I);
3885 if (LI->isVolatile() || LI->getPointerOperand() != PI)
3886 return std::nullopt;
3887 if (isModSet(Access))
3888 return std::nullopt;
3890 Users.emplace_back(I);
3891 continue;
3892 }
3893 }
3894 llvm_unreachable("missing a return?");
3895 }
3896 } while (!Worklist.empty());
3897
3899 return Access;
3900}
3901
3904
3905 // If we have a malloc call which is only used in any amount of comparisons to
3906 // null and free calls, delete the calls and replace the comparisons with true
3907 // or false as appropriate.
3908
3909 // This is based on the principle that we can substitute our own allocation
3910 // function (which will never return null) rather than knowledge of the
3911 // specific function being called. In some sense this can change the permitted
3912 // outputs of a program (when we convert a malloc to an alloca, the fact that
3913 // the allocation is now on the stack is potentially visible, for example),
3914 // but we believe in a permissible manner.
3915 //
3916 // Collect into Instruction* first to avoid expensive WeakTrackingVH
3917 // register/unregister overhead; convert to WeakTrackingVH only when the
3918 // site is actually removable.
3920
3921 // If we are removing an alloca with a dbg.declare, insert dbg.value calls
3922 // before each store.
3924 std::unique_ptr<DIBuilder> DIB;
3925 if (isa<AllocaInst>(MI)) {
3926 findDbgUsers(&MI, DVRs);
3927 DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
3928 }
3929
3930 // Determine what getInitialValueOfAllocation would return without actually
3931 // allocating the result.
3932 bool KnowInitUndef = false;
3933 bool KnowInitZero = false;
3934 Constant *Init =
3936 if (Init) {
3937 if (isa<UndefValue>(Init))
3938 KnowInitUndef = true;
3939 else if (Init->isNullValue())
3940 KnowInitZero = true;
3941 }
3942 // The various sanitizers don't actually return undef memory, but rather
3943 // memory initialized with special forms of runtime poison
3944 auto &F = *MI.getFunction();
3945 if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
3946 F.hasFnAttribute(Attribute::SanitizeAddress))
3947 KnowInitUndef = false;
3948
3949 auto Removable =
3950 isAllocSiteRemovable(&MI, RawUsers, TLI, KnowInitZero | KnowInitUndef);
3951 if (Removable) {
3952 SmallVector<WeakTrackingVH, 64> Users(RawUsers.begin(), RawUsers.end());
3953 for (WeakTrackingVH &User : Users) {
3954 // Lowering all @llvm.objectsize and MTI calls first because they may use
3955 // a bitcast/GEP of the alloca we are removing.
3956 if (!User)
3957 continue;
3958
3960
3962 if (II->getIntrinsicID() == Intrinsic::objectsize) {
3963 SmallVector<Instruction *> InsertedInstructions;
3964 Value *Result = lowerObjectSizeCall(
3965 II, DL, &TLI, AA, /*MustSucceed=*/true, &InsertedInstructions);
3966 for (Instruction *Inserted : InsertedInstructions)
3967 Worklist.add(Inserted);
3968 replaceInstUsesWith(*I, Result);
3970 User = nullptr; // Skip examining in the next loop.
3971 continue;
3972 }
3973 if (auto *MTI = dyn_cast<MemTransferInst>(I)) {
3974 if (KnowInitZero && isRefSet(*Removable)) {
3976 Builder.SetInsertPoint(MTI);
3977 auto *M = Builder.CreateMemSet(
3978 MTI->getRawDest(),
3979 ConstantInt::get(Type::getInt8Ty(MI.getContext()), 0),
3980 MTI->getLength(), MTI->getDestAlign());
3981 M->copyMetadata(*MTI);
3982 }
3983 }
3984 }
3985 }
3986 for (WeakTrackingVH &User : Users) {
3987 if (!User)
3988 continue;
3989
3991
3992 if (ICmpInst *C = dyn_cast<ICmpInst>(I)) {
3994 *C, ConstantInt::get(C->getType(), C->isFalseWhenEqual()));
3995 } else if (auto *SI = dyn_cast<StoreInst>(I)) {
3996 for (auto *DVR : DVRs)
3997 if (DVR->isAddressOfVariable())
3999 } else {
4000 // Casts, GEP, or anything else: we're about to delete this instruction,
4001 // so it can not have any valid uses.
4003 if (isa<LoadInst>(I)) {
4004 assert(KnowInitZero || KnowInitUndef);
4005 Replace = KnowInitUndef ? UndefValue::get(I->getType())
4006 : Constant::getNullValue(I->getType());
4007 } else
4008 Replace = PoisonValue::get(I->getType());
4010 }
4012 }
4013
4015 // Replace invoke with a NOP intrinsic to maintain the original CFG
4016 Module *M = II->getModule();
4017 Function *F = Intrinsic::getOrInsertDeclaration(M, Intrinsic::donothing);
4018 auto *NewII = InvokeInst::Create(
4019 F, II->getNormalDest(), II->getUnwindDest(), {}, "", II->getParent());
4020 NewII->setDebugLoc(II->getDebugLoc());
4021 }
4022
4023 // Remove debug intrinsics which describe the value contained within the
4024 // alloca. In addition to removing dbg.{declare,addr} which simply point to
4025 // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
4026 //
4027 // ```
4028 // define void @foo(i32 %0) {
4029 // %a = alloca i32 ; Deleted.
4030 // store i32 %0, i32* %a
4031 // dbg.value(i32 %0, "arg0") ; Not deleted.
4032 // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
4033 // call void @trivially_inlinable_no_op(i32* %a)
4034 // ret void
4035 // }
4036 // ```
4037 //
4038 // This may not be required if we stop describing the contents of allocas
4039 // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
4040 // the LowerDbgDeclare utility.
4041 //
4042 // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
4043 // "arg0" dbg.value may be stale after the call. However, failing to remove
4044 // the DW_OP_deref dbg.value causes large gaps in location coverage.
4045 //
4046 // FIXME: the Assignment Tracking project has now likely made this
4047 // redundant (and it's sometimes harmful).
4048 for (auto *DVR : DVRs)
4049 if (DVR->isAddressOfVariable() || DVR->getExpression()->startsWithDeref())
4050 DVR->eraseFromParent();
4051
4052 return eraseInstFromFunction(MI);
4053 }
4054 return nullptr;
4055}
4056
4057/// Move the call to free before a NULL test.
4058///
4059/// Check if this free is accessed after its argument has been test
4060/// against NULL (property 0).
4061/// If yes, it is legal to move this call in its predecessor block.
4062///
4063/// The move is performed only if the block containing the call to free
4064/// will be removed, i.e.:
4065/// 1. it has only one predecessor P, and P has two successors
4066/// 2. it contains the call, noops, and an unconditional branch
4067/// 3. its successor is the same as its predecessor's successor
4068///
4069/// The profitability is out-of concern here and this function should
4070/// be called only if the caller knows this transformation would be
4071/// profitable (e.g., for code size).
4073 const DataLayout &DL) {
4074 Value *Op = FI.getArgOperand(0);
4075 BasicBlock *FreeInstrBB = FI.getParent();
4076 BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
4077
4078 // Validate part of constraint #1: Only one predecessor
4079 // FIXME: We can extend the number of predecessor, but in that case, we
4080 // would duplicate the call to free in each predecessor and it may
4081 // not be profitable even for code size.
4082 if (!PredBB)
4083 return nullptr;
4084
4085 // Validate constraint #2: Does this block contains only the call to
4086 // free, noops, and an unconditional branch?
4087 BasicBlock *SuccBB;
4088 Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
4089 if (!match(FreeInstrBBTerminator, m_UnconditionalBr(SuccBB)))
4090 return nullptr;
4091
4092 // If there are only 2 instructions in the block, at this point,
4093 // this is the call to free and unconditional.
4094 // If there are more than 2 instructions, check that they are noops
4095 // i.e., they won't hurt the performance of the generated code.
4096 if (FreeInstrBB->size() != 2) {
4097 for (const Instruction &Inst : *FreeInstrBB) {
4098 if (&Inst == &FI || &Inst == FreeInstrBBTerminator ||
4100 continue;
4101 auto *Cast = dyn_cast<CastInst>(&Inst);
4102 if (!Cast || !Cast->isNoopCast(DL))
4103 return nullptr;
4104 }
4105 }
4106 // Validate the rest of constraint #1 by matching on the pred branch.
4107 Instruction *TI = PredBB->getTerminator();
4108 BasicBlock *TrueBB, *FalseBB;
4109 CmpPredicate Pred;
4110 if (!match(TI, m_Br(m_ICmp(Pred,
4112 m_Specific(Op->stripPointerCasts())),
4113 m_Zero()),
4114 TrueBB, FalseBB)))
4115 return nullptr;
4116 if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
4117 return nullptr;
4118
4119 // Validate constraint #3: Ensure the null case just falls through.
4120 if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
4121 return nullptr;
4122 assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
4123 "Broken CFG: missing edge from predecessor to successor");
4124
4125 // At this point, we know that everything in FreeInstrBB can be moved
4126 // before TI.
4127 for (Instruction &Instr : llvm::make_early_inc_range(*FreeInstrBB)) {
4128 if (&Instr == FreeInstrBBTerminator)
4129 break;
4130 Instr.moveBeforePreserving(TI->getIterator());
4131 }
4132 assert(FreeInstrBB->size() == 1 &&
4133 "Only the branch instruction should remain");
4134
4135 // Now that we've moved the call to free before the NULL check, we have to
4136 // remove any attributes on its parameter that imply it's non-null, because
4137 // those attributes might have only been valid because of the NULL check, and
4138 // we can get miscompiles if we keep them. This is conservative if non-null is
4139 // also implied by something other than the NULL check, but it's guaranteed to
4140 // be correct, and the conservativeness won't matter in practice, since the
4141 // attributes are irrelevant for the call to free itself and the pointer
4142 // shouldn't be used after the call.
4143 AttributeList Attrs = FI.getAttributes();
4144 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::NonNull);
4145 Attribute Dereferenceable = Attrs.getParamAttr(0, Attribute::Dereferenceable);
4146 if (Dereferenceable.isValid()) {
4147 uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
4148 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0,
4149 Attribute::Dereferenceable);
4150 Attrs = Attrs.addDereferenceableOrNullParamAttr(FI.getContext(), 0, Bytes);
4151 }
4152 FI.setAttributes(Attrs);
4153
4154 return &FI;
4155}
4156
4158 // free undef -> unreachable.
4159 if (isa<UndefValue>(Op)) {
4160 // Leave a marker since we can't modify the CFG here.
4162 return eraseInstFromFunction(FI);
4163 }
4164
4165 // If we have 'free null' delete the instruction. This can happen in stl code
4166 // when lots of inlining happens.
4168 return eraseInstFromFunction(FI);
4169
4170 // If we had free(realloc(...)) with no intervening uses, then eliminate the
4171 // realloc() entirely.
4173 if (CI && CI->hasOneUse())
4174 if (Value *ReallocatedOp = getReallocatedOperand(CI))
4175 return eraseInstFromFunction(*replaceInstUsesWith(*CI, ReallocatedOp));
4176
4177 // If we optimize for code size, try to move the call to free before the null
4178 // test so that simplify cfg can remove the empty block and dead code
4179 // elimination the branch. I.e., helps to turn something like:
4180 // if (foo) free(foo);
4181 // into
4182 // free(foo);
4183 //
4184 // Note that we can only do this for 'free' and not for any flavor of
4185 // 'operator delete'; there is no 'operator delete' symbol for which we are
4186 // permitted to invent a call, even if we're passing in a null pointer.
4187 if (MinimizeSize) {
4188 LibFunc Func;
4189 if (TLI.getLibFunc(FI, Func) && TLI.has(Func) && Func == LibFunc_free)
4191 return I;
4192 }
4193
4194 return nullptr;
4195}
4196
4198 Value *RetVal = RI.getReturnValue();
4199 if (!RetVal)
4200 return nullptr;
4201
4202 Function *F = RI.getFunction();
4203 Type *RetTy = RetVal->getType();
4204 if (RetTy->isPointerTy()) {
4205 bool HasDereferenceable =
4206 F->getAttributes().getRetDereferenceableBytes() > 0;
4207 if (F->hasRetAttribute(Attribute::NonNull) ||
4208 (HasDereferenceable &&
4210 if (Value *V = simplifyNonNullOperand(RetVal, HasDereferenceable))
4211 return replaceOperand(RI, 0, V);
4212 }
4213 }
4214
4215 if (!AttributeFuncs::isNoFPClassCompatibleType(RetTy))
4216 return nullptr;
4217
4218 FPClassTest ReturnClass = F->getAttributes().getRetNoFPClass();
4219 if (ReturnClass == fcNone)
4220 return nullptr;
4221
4222 KnownFPClass KnownClass;
4223 if (SimplifyDemandedFPClass(&RI, 0, ~ReturnClass, KnownClass,
4224 SQ.getWithInstruction(&RI)))
4225 return &RI;
4226
4227 return nullptr;
4228}
4229
4230// WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
4232 // Try to remove the previous instruction if it must lead to unreachable.
4233 // This includes instructions like stores and "llvm.assume" that may not get
4234 // removed by simple dead code elimination.
4235 bool Changed = false;
4236 while (Instruction *Prev = I.getPrevNode()) {
4237 // While we theoretically can erase EH, that would result in a block that
4238 // used to start with an EH no longer starting with EH, which is invalid.
4239 // To make it valid, we'd need to fixup predecessors to no longer refer to
4240 // this block, but that changes CFG, which is not allowed in InstCombine.
4241 if (Prev->isEHPad())
4242 break; // Can not drop any more instructions. We're done here.
4243
4245 break; // Can not drop any more instructions. We're done here.
4246 // Otherwise, this instruction can be freely erased,
4247 // even if it is not side-effect free.
4248
4249 // A value may still have uses before we process it here (for example, in
4250 // another unreachable block), so convert those to poison.
4251 replaceInstUsesWith(*Prev, PoisonValue::get(Prev->getType()));
4252 eraseInstFromFunction(*Prev);
4253 Changed = true;
4254 }
4255 return Changed;
4256}
4257
4262
4264 // If this store is the second-to-last instruction in the basic block
4265 // (excluding debug info) and if the block ends with
4266 // an unconditional branch, try to move the store to the successor block.
4267
4268 auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
4269 BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
4270 do {
4271 if (BBI != FirstInstr)
4272 --BBI;
4273 } while (BBI != FirstInstr && BBI->isDebugOrPseudoInst());
4274
4275 return dyn_cast<StoreInst>(BBI);
4276 };
4277
4278 if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
4280 return &BI;
4281
4282 return nullptr;
4283}
4284
4287 if (!DeadEdges.insert({From, To}).second)
4288 return;
4289
4290 // Replace phi node operands in successor with poison.
4291 for (PHINode &PN : To->phis())
4292 for (Use &U : PN.incoming_values())
4293 if (PN.getIncomingBlock(U) == From && !isa<PoisonValue>(U)) {
4294 replaceUse(U, PoisonValue::get(PN.getType()));
4295 addToWorklist(&PN);
4296 MadeIRChange = true;
4297 }
4298
4299 Worklist.push_back(To);
4300}
4301
4302// Under the assumption that I is unreachable, remove it and following
4303// instructions. Changes are reported directly to MadeIRChange.
4306 BasicBlock *BB = I->getParent();
4307 for (Instruction &Inst : make_early_inc_range(
4308 make_range(std::next(BB->getTerminator()->getReverseIterator()),
4309 std::next(I->getReverseIterator())))) {
4310 if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) {
4311 replaceInstUsesWith(Inst, PoisonValue::get(Inst.getType()));
4312 MadeIRChange = true;
4313 }
4314 if (Inst.isEHPad() || Inst.getType()->isTokenTy())
4315 continue;
4316 // RemoveDIs: erase debug-info on this instruction manually.
4317 Inst.dropDbgRecords();
4319 MadeIRChange = true;
4320 }
4321
4324 MadeIRChange = true;
4325 for (Value *V : Changed)
4327 }
4328
4329 // Handle potentially dead successors.
4330 for (BasicBlock *Succ : successors(BB))
4331 addDeadEdge(BB, Succ, Worklist);
4332}
4333
4336 while (!Worklist.empty()) {
4337 BasicBlock *BB = Worklist.pop_back_val();
4338 if (!all_of(predecessors(BB), [&](BasicBlock *Pred) {
4339 return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred);
4340 }))
4341 continue;
4342
4344 }
4345}
4346
4348 BasicBlock *LiveSucc) {
4350 for (BasicBlock *Succ : successors(BB)) {
4351 // The live successor isn't dead.
4352 if (Succ == LiveSucc)
4353 continue;
4354
4355 addDeadEdge(BB, Succ, Worklist);
4356 }
4357
4359}
4360
4362 // Change br (not X), label True, label False to: br X, label False, True
4363 Value *Cond = BI.getCondition();
4364 Value *X;
4365 if (match(Cond, m_Not(m_Value(X))) && !isa<Constant>(X)) {
4366 // Swap Destinations and condition...
4367 BI.swapSuccessors();
4368 if (BPI)
4369 BPI->swapSuccEdgesProbabilities(BI.getParent());
4370 return replaceOperand(BI, 0, X);
4371 }
4372
4373 // Canonicalize logical-and-with-invert as logical-or-with-invert.
4374 // This is done by inverting the condition and swapping successors:
4375 // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T
4376 Value *Y;
4377 if (isa<SelectInst>(Cond) &&
4378 match(Cond,
4380 Value *NotX = Builder.CreateNot(X, "not." + X->getName());
4381 Value *Or = Builder.CreateLogicalOr(NotX, Y);
4382
4383 // Set weights for the new OR select instruction too.
4385 if (auto *OrInst = dyn_cast<Instruction>(Or)) {
4386 if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
4387 SmallVector<uint32_t> Weights;
4388 if (extractBranchWeights(*CondInst, Weights)) {
4389 assert(Weights.size() == 2 &&
4390 "Unexpected number of branch weights!");
4391 std::swap(Weights[0], Weights[1]);
4392 setBranchWeights(*OrInst, Weights, /*IsExpected=*/false);
4393 }
4394 }
4395 }
4396 }
4397 BI.swapSuccessors();
4398 if (BPI)
4399 BPI->swapSuccEdgesProbabilities(BI.getParent());
4400 return replaceOperand(BI, 0, Or);
4401 }
4402
4403 // If the condition is irrelevant, remove the use so that other
4404 // transforms on the condition become more effective.
4405 if (!isa<ConstantInt>(Cond) && BI.getSuccessor(0) == BI.getSuccessor(1))
4406 return replaceOperand(BI, 0, ConstantInt::getFalse(Cond->getType()));
4407
4408 // Canonicalize, for example, fcmp_one -> fcmp_oeq.
4409 CmpPredicate Pred;
4410 if (match(Cond, m_OneUse(m_FCmp(Pred, m_Value(), m_Value()))) &&
4411 !isCanonicalPredicate(Pred)) {
4412 // Swap destinations and condition.
4413 auto *Cmp = cast<CmpInst>(Cond);
4414 Cmp->setPredicate(CmpInst::getInversePredicate(Pred));
4415 BI.swapSuccessors();
4416 if (BPI)
4417 BPI->swapSuccEdgesProbabilities(BI.getParent());
4418 Worklist.push(Cmp);
4419 return &BI;
4420 }
4421
4422 if (isa<UndefValue>(Cond)) {
4423 handlePotentiallyDeadSuccessors(BI.getParent(), /*LiveSucc*/ nullptr);
4424 return nullptr;
4425 }
4426 if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
4427 handlePotentiallyDeadSuccessors(BI.getParent(),
4428 BI.getSuccessor(!CI->getZExtValue()));
4429 return nullptr;
4430 }
4431
4432 // Replace all dominated uses of the condition with true/false
4433 // Ignore constant expressions to avoid iterating over uses on other
4434 // functions.
4435 if (!isa<Constant>(Cond) && BI.getSuccessor(0) != BI.getSuccessor(1)) {
4436 for (auto &U : make_early_inc_range(Cond->uses())) {
4437 BasicBlockEdge Edge0(BI.getParent(), BI.getSuccessor(0));
4438 if (DT.dominates(Edge0, U)) {
4439 replaceUse(U, ConstantInt::getTrue(Cond->getType()));
4440 addToWorklist(cast<Instruction>(U.getUser()));
4441 continue;
4442 }
4443 BasicBlockEdge Edge1(BI.getParent(), BI.getSuccessor(1));
4444 if (DT.dominates(Edge1, U)) {
4445 replaceUse(U, ConstantInt::getFalse(Cond->getType()));
4446 addToWorklist(cast<Instruction>(U.getUser()));
4447 }
4448 }
4449 }
4450
4451 DC.registerBranch(&BI);
4452 return nullptr;
4453}
4454
4455// Replaces (switch (select cond, X, C)/(select cond, C, X)) with (switch X) if
4456// we can prove that both (switch C) and (switch X) go to the default when cond
4457// is false/true.
4460 bool IsTrueArm) {
4461 unsigned CstOpIdx = IsTrueArm ? 1 : 2;
4462 auto *C = dyn_cast<ConstantInt>(Select->getOperand(CstOpIdx));
4463 if (!C)
4464 return nullptr;
4465
4466 BasicBlock *CstBB = SI.findCaseValue(C)->getCaseSuccessor();
4467 if (CstBB != SI.getDefaultDest())
4468 return nullptr;
4469 Value *X = Select->getOperand(3 - CstOpIdx);
4470 CmpPredicate Pred;
4471 const APInt *RHSC;
4472 if (!match(Select->getCondition(),
4473 m_ICmp(Pred, m_Specific(X), m_APInt(RHSC))))
4474 return nullptr;
4475 if (IsTrueArm)
4476 Pred = ICmpInst::getInversePredicate(Pred);
4477
4478 // See whether we can replace the select with X
4480 for (auto Case : SI.cases())
4481 if (!CR.contains(Case.getCaseValue()->getValue()))
4482 return nullptr;
4483
4484 return X;
4485}
4486
4488 Value *Cond = SI.getCondition();
4489 Value *Op0;
4490 const APInt *CondOpC;
4491 using InvertFn = std::function<APInt(const APInt &Case, const APInt &C)>;
4492
4493 auto MaybeInvertible = [&](Value *Cond) -> InvertFn {
4494 if (match(Cond, m_Add(m_Value(Op0), m_APInt(CondOpC))))
4495 // Change 'switch (X+C) case Case:' into 'switch (X) case Case-C'.
4496 return [](const APInt &Case, const APInt &C) { return Case - C; };
4497
4498 if (match(Cond, m_Sub(m_APInt(CondOpC), m_Value(Op0))))
4499 // Change 'switch (C-X) case Case:' into 'switch (X) case C-Case'.
4500 return [](const APInt &Case, const APInt &C) { return C - Case; };
4501
4502 if (match(Cond, m_Xor(m_Value(Op0), m_APInt(CondOpC))) &&
4503 !CondOpC->isMinSignedValue() && !CondOpC->isMaxSignedValue())
4504 // Change 'switch (X^C) case Case:' into 'switch (X) case Case^C'.
4505 // Prevent creation of large case values by excluding extremes.
4506 return [](const APInt &Case, const APInt &C) { return Case ^ C; };
4507
4508 return nullptr;
4509 };
4510
4511 // Attempt to invert and simplify the switch condition, as long as the
4512 // condition is not used further, as it may not be profitable otherwise.
4513 if (auto InvertFn = MaybeInvertible(Cond); InvertFn && Cond->hasOneUse()) {
4514 for (auto &Case : SI.cases()) {
4515 const APInt &New = InvertFn(Case.getCaseValue()->getValue(), *CondOpC);
4516 Case.setValue(ConstantInt::get(SI.getContext(), New));
4517 }
4518 return replaceOperand(SI, 0, Op0);
4519 }
4520
4521 uint64_t ShiftAmt;
4522 if (match(Cond, m_Shl(m_Value(Op0), m_ConstantInt(ShiftAmt))) &&
4523 ShiftAmt < Op0->getType()->getScalarSizeInBits() &&
4524 all_of(SI.cases(), [&](const auto &Case) {
4525 return Case.getCaseValue()->getValue().countr_zero() >= ShiftAmt;
4526 })) {
4527 // Change 'switch (X << 2) case 4:' into 'switch (X) case 1:'.
4529 if (Shl->hasNoUnsignedWrap() || Shl->hasNoSignedWrap() ||
4530 Shl->hasOneUse()) {
4531 Value *NewCond = Op0;
4532 if (!Shl->hasNoUnsignedWrap() && !Shl->hasNoSignedWrap()) {
4533 // If the shift may wrap, we need to mask off the shifted bits.
4534 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
4535 NewCond = Builder.CreateAnd(
4536 Op0, APInt::getLowBitsSet(BitWidth, BitWidth - ShiftAmt));
4537 }
4538 for (auto Case : SI.cases()) {
4539 const APInt &CaseVal = Case.getCaseValue()->getValue();
4540 APInt ShiftedCase = Shl->hasNoSignedWrap() ? CaseVal.ashr(ShiftAmt)
4541 : CaseVal.lshr(ShiftAmt);
4542 Case.setValue(ConstantInt::get(SI.getContext(), ShiftedCase));
4543 }
4544 return replaceOperand(SI, 0, NewCond);
4545 }
4546 }
4547
4548 // Fold switch(zext/sext(X)) into switch(X) if possible.
4549 if (match(Cond, m_ZExtOrSExt(m_Value(Op0)))) {
4550 bool IsZExt = isa<ZExtInst>(Cond);
4551 Type *SrcTy = Op0->getType();
4552 unsigned NewWidth = SrcTy->getScalarSizeInBits();
4553
4554 if (all_of(SI.cases(), [&](const auto &Case) {
4555 const APInt &CaseVal = Case.getCaseValue()->getValue();
4556 return IsZExt ? CaseVal.isIntN(NewWidth)
4557 : CaseVal.isSignedIntN(NewWidth);
4558 })) {
4559 for (auto &Case : SI.cases()) {
4560 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
4561 Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
4562 }
4563 return replaceOperand(SI, 0, Op0);
4564 }
4565 }
4566
4567 // Fold switch(select cond, X, Y) into switch(X/Y) if possible
4568 if (auto *Select = dyn_cast<SelectInst>(Cond)) {
4569 if (Value *V =
4570 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/true))
4571 return replaceOperand(SI, 0, V);
4572 if (Value *V =
4573 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/false))
4574 return replaceOperand(SI, 0, V);
4575 }
4576
4577 KnownBits Known = computeKnownBits(Cond, &SI);
4578 unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
4579 unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
4580
4581 // Compute the number of leading bits we can ignore.
4582 // TODO: A better way to determine this would use ComputeNumSignBits().
4583 for (const auto &C : SI.cases()) {
4584 LeadingKnownZeros =
4585 std::min(LeadingKnownZeros, C.getCaseValue()->getValue().countl_zero());
4586 LeadingKnownOnes =
4587 std::min(LeadingKnownOnes, C.getCaseValue()->getValue().countl_one());
4588 }
4589
4590 unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
4591
4592 // Shrink the condition operand if the new type is smaller than the old type.
4593 // But do not shrink to a non-standard type, because backend can't generate
4594 // good code for that yet.
4595 // TODO: We can make it aggressive again after fixing PR39569.
4596 if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
4597 shouldChangeType(Known.getBitWidth(), NewWidth)) {
4598 IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
4599 Builder.SetInsertPoint(&SI);
4600 Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc");
4601
4602 for (auto Case : SI.cases()) {
4603 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
4604 Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
4605 }
4606 return replaceOperand(SI, 0, NewCond);
4607 }
4608
4609 if (isa<UndefValue>(Cond)) {
4610 handlePotentiallyDeadSuccessors(SI.getParent(), /*LiveSucc*/ nullptr);
4611 return nullptr;
4612 }
4613 if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
4615 SI.findCaseValue(CI)->getCaseSuccessor());
4616 return nullptr;
4617 }
4618
4619 return nullptr;
4620}
4621
4623InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
4625 if (!WO)
4626 return nullptr;
4627
4628 Intrinsic::ID OvID = WO->getIntrinsicID();
4629 const APInt *C = nullptr;
4630 if (match(WO->getRHS(), m_APIntAllowPoison(C))) {
4631 if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow ||
4632 OvID == Intrinsic::umul_with_overflow)) {
4633 // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
4634 if (C->isAllOnes())
4635 return BinaryOperator::CreateNeg(WO->getLHS());
4636 // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
4637 if (C->isPowerOf2()) {
4638 return BinaryOperator::CreateShl(
4639 WO->getLHS(),
4640 ConstantInt::get(WO->getLHS()->getType(), C->logBase2()));
4641 }
4642 }
4643 }
4644
4645 // We're extracting from an overflow intrinsic. See if we're the only user.
4646 // That allows us to simplify multiple result intrinsics to simpler things
4647 // that just get one value.
4648 if (!WO->hasOneUse())
4649 return nullptr;
4650
4651 // Check if we're grabbing only the result of a 'with overflow' intrinsic
4652 // and replace it with a traditional binary instruction.
4653 if (*EV.idx_begin() == 0) {
4654 Instruction::BinaryOps BinOp = WO->getBinaryOp();
4655 Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
4656 // Replace the old instruction's uses with poison.
4657 replaceInstUsesWith(*WO, PoisonValue::get(WO->getType()));
4659 return BinaryOperator::Create(BinOp, LHS, RHS);
4660 }
4661
4662 assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst");
4663
4664 // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS.
4665 if (OvID == Intrinsic::usub_with_overflow)
4666 return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS());
4667
4668 // smul with i1 types overflows when both sides are set: -1 * -1 == +1, but
4669 // +1 is not possible because we assume signed values.
4670 if (OvID == Intrinsic::smul_with_overflow &&
4671 WO->getLHS()->getType()->isIntOrIntVectorTy(1))
4672 return BinaryOperator::CreateAnd(WO->getLHS(), WO->getRHS());
4673
4674 // extractvalue (umul_with_overflow X, X), 1 -> X u> 2^(N/2)-1
4675 if (OvID == Intrinsic::umul_with_overflow && WO->getLHS() == WO->getRHS()) {
4676 unsigned BitWidth = WO->getLHS()->getType()->getScalarSizeInBits();
4677 // Only handle even bitwidths for performance reasons.
4678 if (BitWidth % 2 == 0)
4679 return new ICmpInst(
4680 ICmpInst::ICMP_UGT, WO->getLHS(),
4681 ConstantInt::get(WO->getLHS()->getType(),
4683 }
4684
4685 // If only the overflow result is used, and the right hand side is a
4686 // constant (or constant splat), we can remove the intrinsic by directly
4687 // checking for overflow.
4688 if (C) {
4689 // Compute the no-wrap range for LHS given RHS=C, then construct an
4690 // equivalent icmp, potentially using an offset.
4691 ConstantRange NWR = ConstantRange::makeExactNoWrapRegion(
4692 WO->getBinaryOp(), *C, WO->getNoWrapKind());
4693
4694 CmpInst::Predicate Pred;
4695 APInt NewRHSC, Offset;
4696 NWR.getEquivalentICmp(Pred, NewRHSC, Offset);
4697 auto *OpTy = WO->getRHS()->getType();
4698 auto *NewLHS = WO->getLHS();
4699 if (Offset != 0)
4700 NewLHS = Builder.CreateAdd(NewLHS, ConstantInt::get(OpTy, Offset));
4701 return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS,
4702 ConstantInt::get(OpTy, NewRHSC));
4703 }
4704
4705 return nullptr;
4706}
4707
4710 InstCombiner::BuilderTy &Builder) {
4711 // Helper to fold frexp of select to select of frexp.
4712
4713 if (!SelectInst->hasOneUse() || !FrexpCall->hasOneUse())
4714 return nullptr;
4716 Value *TrueVal = SelectInst->getTrueValue();
4717 Value *FalseVal = SelectInst->getFalseValue();
4718
4719 const APFloat *ConstVal = nullptr;
4720 Value *VarOp = nullptr;
4721 bool ConstIsTrue = false;
4722
4723 if (match(TrueVal, m_APFloat(ConstVal))) {
4724 VarOp = FalseVal;
4725 ConstIsTrue = true;
4726 } else if (match(FalseVal, m_APFloat(ConstVal))) {
4727 VarOp = TrueVal;
4728 ConstIsTrue = false;
4729 } else {
4730 return nullptr;
4731 }
4732
4733 Builder.SetInsertPoint(&EV);
4734
4735 CallInst *NewFrexp =
4736 Builder.CreateCall(FrexpCall->getCalledFunction(), {VarOp}, "frexp");
4737 NewFrexp->copyIRFlags(FrexpCall);
4738
4739 Value *NewEV = Builder.CreateExtractValue(NewFrexp, 0, "mantissa");
4740
4741 int Exp;
4742 APFloat Mantissa = frexp(*ConstVal, Exp, APFloat::rmNearestTiesToEven);
4743
4744 Constant *ConstantMantissa = ConstantFP::get(TrueVal->getType(), Mantissa);
4745
4746 Value *NewSel = Builder.CreateSelectFMF(
4747 Cond, ConstIsTrue ? ConstantMantissa : NewEV,
4748 ConstIsTrue ? NewEV : ConstantMantissa, SelectInst, "select.frexp");
4749 return NewSel;
4750}
4752 Value *Agg = EV.getAggregateOperand();
4753
4754 if (!EV.hasIndices())
4755 return replaceInstUsesWith(EV, Agg);
4756
4757 if (Value *V = simplifyExtractValueInst(Agg, EV.getIndices(),
4758 SQ.getWithInstruction(&EV)))
4759 return replaceInstUsesWith(EV, V);
4760
4761 Value *Cond, *TrueVal, *FalseVal;
4763 m_Value(Cond), m_Value(TrueVal), m_Value(FalseVal)))))) {
4764 auto *SelInst =
4765 cast<SelectInst>(cast<IntrinsicInst>(Agg)->getArgOperand(0));
4766 if (Value *Result =
4767 foldFrexpOfSelect(EV, cast<IntrinsicInst>(Agg), SelInst, Builder))
4768 return replaceInstUsesWith(EV, Result);
4769 }
4771 // We're extracting from an insertvalue instruction, compare the indices
4772 const unsigned *exti, *exte, *insi, *inse;
4773 for (exti = EV.idx_begin(), insi = IV->idx_begin(),
4774 exte = EV.idx_end(), inse = IV->idx_end();
4775 exti != exte && insi != inse;
4776 ++exti, ++insi) {
4777 if (*insi != *exti)
4778 // The insert and extract both reference distinctly different elements.
4779 // This means the extract is not influenced by the insert, and we can
4780 // replace the aggregate operand of the extract with the aggregate
4781 // operand of the insert. i.e., replace
4782 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
4783 // %E = extractvalue { i32, { i32 } } %I, 0
4784 // with
4785 // %E = extractvalue { i32, { i32 } } %A, 0
4786 return ExtractValueInst::Create(IV->getAggregateOperand(),
4787 EV.getIndices());
4788 }
4789 if (exti == exte && insi == inse)
4790 // Both iterators are at the end: Index lists are identical. Replace
4791 // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
4792 // %C = extractvalue { i32, { i32 } } %B, 1, 0
4793 // with "i32 42"
4794 return replaceInstUsesWith(EV, IV->getInsertedValueOperand());
4795 if (exti == exte) {
4796 // The extract list is a prefix of the insert list. i.e. replace
4797 // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
4798 // %E = extractvalue { i32, { i32 } } %I, 1
4799 // with
4800 // %X = extractvalue { i32, { i32 } } %A, 1
4801 // %E = insertvalue { i32 } %X, i32 42, 0
4802 // by switching the order of the insert and extract (though the
4803 // insertvalue should be left in, since it may have other uses).
4804 Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(),
4805 EV.getIndices());
4806 return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
4807 ArrayRef(insi, inse));
4808 }
4809 if (insi == inse)
4810 // The insert list is a prefix of the extract list
4811 // We can simply remove the common indices from the extract and make it
4812 // operate on the inserted value instead of the insertvalue result.
4813 // i.e., replace
4814 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
4815 // %E = extractvalue { i32, { i32 } } %I, 1, 0
4816 // with
4817 // %E extractvalue { i32 } { i32 42 }, 0
4818 return ExtractValueInst::Create(IV->getInsertedValueOperand(),
4819 ArrayRef(exti, exte));
4820 }
4821
4822 if (Instruction *R = foldExtractOfOverflowIntrinsic(EV))
4823 return R;
4824
4825 if (LoadInst *L = dyn_cast<LoadInst>(Agg)) {
4826 // Bail out if the aggregate contains scalable vector type
4827 if (auto *STy = dyn_cast<StructType>(Agg->getType());
4828 STy && STy->isScalableTy())
4829 return nullptr;
4830
4831 // If the (non-volatile) load only has one use, we can rewrite this to a
4832 // load from a GEP. This reduces the size of the load. If a load is used
4833 // only by extractvalue instructions then this either must have been
4834 // optimized before, or it is a struct with padding, in which case we
4835 // don't want to do the transformation as it loses padding knowledge.
4836 if (L->isSimple() && L->hasOneUse()) {
4837 // extractvalue has integer indices, getelementptr has Value*s. Convert.
4838 SmallVector<Value*, 4> Indices;
4839 // Prefix an i32 0 since we need the first element.
4840 Indices.push_back(Builder.getInt32(0));
4841 for (unsigned Idx : EV.indices())
4842 Indices.push_back(Builder.getInt32(Idx));
4843
4844 // We need to insert these at the location of the old load, not at that of
4845 // the extractvalue.
4846 Builder.SetInsertPoint(L);
4847 Value *GEP = Builder.CreateInBoundsGEP(L->getType(),
4848 L->getPointerOperand(), Indices);
4849 Instruction *NL = Builder.CreateLoad(EV.getType(), GEP);
4850 // Whatever aliasing information we had for the orignal load must also
4851 // hold for the smaller load, so propagate the annotations.
4852 NL->setAAMetadata(L->getAAMetadata());
4853 // Returning the load directly will cause the main loop to insert it in
4854 // the wrong spot, so use replaceInstUsesWith().
4855 return replaceInstUsesWith(EV, NL);
4856 }
4857 }
4858
4859 if (auto *PN = dyn_cast<PHINode>(Agg))
4860 if (Instruction *Res = foldOpIntoPhi(EV, PN))
4861 return Res;
4862
4863 // Canonicalize extract (select Cond, TV, FV)
4864 // -> select cond, (extract TV), (extract FV)
4865 if (auto *SI = dyn_cast<SelectInst>(Agg))
4866 if (Instruction *R = FoldOpIntoSelect(EV, SI, /*FoldWithMultiUse=*/true))
4867 return R;
4868
4869 // We could simplify extracts from other values. Note that nested extracts may
4870 // already be simplified implicitly by the above: extract (extract (insert) )
4871 // will be translated into extract ( insert ( extract ) ) first and then just
4872 // the value inserted, if appropriate. Similarly for extracts from single-use
4873 // loads: extract (extract (load)) will be translated to extract (load (gep))
4874 // and if again single-use then via load (gep (gep)) to load (gep).
4875 // However, double extracts from e.g. function arguments or return values
4876 // aren't handled yet.
4877 return nullptr;
4878}
4879
4880/// Return 'true' if the given typeinfo will match anything.
4881static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
4882 switch (Personality) {
4886 // The GCC C EH and Rust personality only exists to support cleanups, so
4887 // it's not clear what the semantics of catch clauses are.
4888 return false;
4890 return false;
4892 // While __gnat_all_others_value will match any Ada exception, it doesn't
4893 // match foreign exceptions (or didn't, before gcc-4.7).
4894 return false;
4905 return isa<ConstantPointerNull>(TypeInfo);
4906 }
4907 llvm_unreachable("invalid enum");
4908}
4909
4910static bool shorter_filter(const Value *LHS, const Value *RHS) {
4911 return
4912 cast<ArrayType>(LHS->getType())->getNumElements()
4913 <
4914 cast<ArrayType>(RHS->getType())->getNumElements();
4915}
4916
4918 // The logic here should be correct for any real-world personality function.
4919 // However if that turns out not to be true, the offending logic can always
4920 // be conditioned on the personality function, like the catch-all logic is.
4921 EHPersonality Personality =
4922 classifyEHPersonality(LI.getParent()->getParent()->getPersonalityFn());
4923
4924 // Simplify the list of clauses, eg by removing repeated catch clauses
4925 // (these are often created by inlining).
4926 bool MakeNewInstruction = false; // If true, recreate using the following:
4927 SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
4928 bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
4929
4930 SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
4931 for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
4932 bool isLastClause = i + 1 == e;
4933 if (LI.isCatch(i)) {
4934 // A catch clause.
4935 Constant *CatchClause = LI.getClause(i);
4936 Constant *TypeInfo = CatchClause->stripPointerCasts();
4937
4938 // If we already saw this clause, there is no point in having a second
4939 // copy of it.
4940 if (AlreadyCaught.insert(TypeInfo).second) {
4941 // This catch clause was not already seen.
4942 NewClauses.push_back(CatchClause);
4943 } else {
4944 // Repeated catch clause - drop the redundant copy.
4945 MakeNewInstruction = true;
4946 }
4947
4948 // If this is a catch-all then there is no point in keeping any following
4949 // clauses or marking the landingpad as having a cleanup.
4950 if (isCatchAll(Personality, TypeInfo)) {
4951 if (!isLastClause)
4952 MakeNewInstruction = true;
4953 CleanupFlag = false;
4954 break;
4955 }
4956 } else {
4957 // A filter clause. If any of the filter elements were already caught
4958 // then they can be dropped from the filter. It is tempting to try to
4959 // exploit the filter further by saying that any typeinfo that does not
4960 // occur in the filter can't be caught later (and thus can be dropped).
4961 // However this would be wrong, since typeinfos can match without being
4962 // equal (for example if one represents a C++ class, and the other some
4963 // class derived from it).
4964 assert(LI.isFilter(i) && "Unsupported landingpad clause!");
4965 Constant *FilterClause = LI.getClause(i);
4966 ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
4967 unsigned NumTypeInfos = FilterType->getNumElements();
4968
4969 // An empty filter catches everything, so there is no point in keeping any
4970 // following clauses or marking the landingpad as having a cleanup. By
4971 // dealing with this case here the following code is made a bit simpler.
4972 if (!NumTypeInfos) {
4973 NewClauses.push_back(FilterClause);
4974 if (!isLastClause)
4975 MakeNewInstruction = true;
4976 CleanupFlag = false;
4977 break;
4978 }
4979
4980 bool MakeNewFilter = false; // If true, make a new filter.
4981 SmallVector<Constant *, 16> NewFilterElts; // New elements.
4982 if (isa<ConstantAggregateZero>(FilterClause)) {
4983 // Not an empty filter - it contains at least one null typeinfo.
4984 assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
4985 Constant *TypeInfo =
4987 // If this typeinfo is a catch-all then the filter can never match.
4988 if (isCatchAll(Personality, TypeInfo)) {
4989 // Throw the filter away.
4990 MakeNewInstruction = true;
4991 continue;
4992 }
4993
4994 // There is no point in having multiple copies of this typeinfo, so
4995 // discard all but the first copy if there is more than one.
4996 NewFilterElts.push_back(TypeInfo);
4997 if (NumTypeInfos > 1)
4998 MakeNewFilter = true;
4999 } else {
5000 ConstantArray *Filter = cast<ConstantArray>(FilterClause);
5001 SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
5002 NewFilterElts.reserve(NumTypeInfos);
5003
5004 // Remove any filter elements that were already caught or that already
5005 // occurred in the filter. While there, see if any of the elements are
5006 // catch-alls. If so, the filter can be discarded.
5007 bool SawCatchAll = false;
5008 for (unsigned j = 0; j != NumTypeInfos; ++j) {
5009 Constant *Elt = Filter->getOperand(j);
5010 Constant *TypeInfo = Elt->stripPointerCasts();
5011 if (isCatchAll(Personality, TypeInfo)) {
5012 // This element is a catch-all. Bail out, noting this fact.
5013 SawCatchAll = true;
5014 break;
5015 }
5016
5017 // Even if we've seen a type in a catch clause, we don't want to
5018 // remove it from the filter. An unexpected type handler may be
5019 // set up for a call site which throws an exception of the same
5020 // type caught. In order for the exception thrown by the unexpected
5021 // handler to propagate correctly, the filter must be correctly
5022 // described for the call site.
5023 //
5024 // Example:
5025 //
5026 // void unexpected() { throw 1;}
5027 // void foo() throw (int) {
5028 // std::set_unexpected(unexpected);
5029 // try {
5030 // throw 2.0;
5031 // } catch (int i) {}
5032 // }
5033
5034 // There is no point in having multiple copies of the same typeinfo in
5035 // a filter, so only add it if we didn't already.
5036 if (SeenInFilter.insert(TypeInfo).second)
5037 NewFilterElts.push_back(cast<Constant>(Elt));
5038 }
5039 // A filter containing a catch-all cannot match anything by definition.
5040 if (SawCatchAll) {
5041 // Throw the filter away.
5042 MakeNewInstruction = true;
5043 continue;
5044 }
5045
5046 // If we dropped something from the filter, make a new one.
5047 if (NewFilterElts.size() < NumTypeInfos)
5048 MakeNewFilter = true;
5049 }
5050 if (MakeNewFilter) {
5051 FilterType = ArrayType::get(FilterType->getElementType(),
5052 NewFilterElts.size());
5053 FilterClause = ConstantArray::get(FilterType, NewFilterElts);
5054 MakeNewInstruction = true;
5055 }
5056
5057 NewClauses.push_back(FilterClause);
5058
5059 // If the new filter is empty then it will catch everything so there is
5060 // no point in keeping any following clauses or marking the landingpad
5061 // as having a cleanup. The case of the original filter being empty was
5062 // already handled above.
5063 if (MakeNewFilter && !NewFilterElts.size()) {
5064 assert(MakeNewInstruction && "New filter but not a new instruction!");
5065 CleanupFlag = false;
5066 break;
5067 }
5068 }
5069 }
5070
5071 // If several filters occur in a row then reorder them so that the shortest
5072 // filters come first (those with the smallest number of elements). This is
5073 // advantageous because shorter filters are more likely to match, speeding up
5074 // unwinding, but mostly because it increases the effectiveness of the other
5075 // filter optimizations below.
5076 for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
5077 unsigned j;
5078 // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
5079 for (j = i; j != e; ++j)
5080 if (!isa<ArrayType>(NewClauses[j]->getType()))
5081 break;
5082
5083 // Check whether the filters are already sorted by length. We need to know
5084 // if sorting them is actually going to do anything so that we only make a
5085 // new landingpad instruction if it does.
5086 for (unsigned k = i; k + 1 < j; ++k)
5087 if (shorter_filter(NewClauses[k+1], NewClauses[k])) {
5088 // Not sorted, so sort the filters now. Doing an unstable sort would be
5089 // correct too but reordering filters pointlessly might confuse users.
5090 std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j,
5092 MakeNewInstruction = true;
5093 break;
5094 }
5095
5096 // Look for the next batch of filters.
5097 i = j + 1;
5098 }
5099
5100 // If typeinfos matched if and only if equal, then the elements of a filter L
5101 // that occurs later than a filter F could be replaced by the intersection of
5102 // the elements of F and L. In reality two typeinfos can match without being
5103 // equal (for example if one represents a C++ class, and the other some class
5104 // derived from it) so it would be wrong to perform this transform in general.
5105 // However the transform is correct and useful if F is a subset of L. In that
5106 // case L can be replaced by F, and thus removed altogether since repeating a
5107 // filter is pointless. So here we look at all pairs of filters F and L where
5108 // L follows F in the list of clauses, and remove L if every element of F is
5109 // an element of L. This can occur when inlining C++ functions with exception
5110 // specifications.
5111 for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
5112 // Examine each filter in turn.
5113 Value *Filter = NewClauses[i];
5114 ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType());
5115 if (!FTy)
5116 // Not a filter - skip it.
5117 continue;
5118 unsigned FElts = FTy->getNumElements();
5119 // Examine each filter following this one. Doing this backwards means that
5120 // we don't have to worry about filters disappearing under us when removed.
5121 for (unsigned j = NewClauses.size() - 1; j != i; --j) {
5122 Value *LFilter = NewClauses[j];
5123 ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType());
5124 if (!LTy)
5125 // Not a filter - skip it.
5126 continue;
5127 // If Filter is a subset of LFilter, i.e. every element of Filter is also
5128 // an element of LFilter, then discard LFilter.
5129 SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
5130 // If Filter is empty then it is a subset of LFilter.
5131 if (!FElts) {
5132 // Discard LFilter.
5133 NewClauses.erase(J);
5134 MakeNewInstruction = true;
5135 // Move on to the next filter.
5136 continue;
5137 }
5138 unsigned LElts = LTy->getNumElements();
5139 // If Filter is longer than LFilter then it cannot be a subset of it.
5140 if (FElts > LElts)
5141 // Move on to the next filter.
5142 continue;
5143 // At this point we know that LFilter has at least one element.
5144 if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros.
5145 // Filter is a subset of LFilter iff Filter contains only zeros (as we
5146 // already know that Filter is not longer than LFilter).
5148 assert(FElts <= LElts && "Should have handled this case earlier!");
5149 // Discard LFilter.
5150 NewClauses.erase(J);
5151 MakeNewInstruction = true;
5152 }
5153 // Move on to the next filter.
5154 continue;
5155 }
5156 ConstantArray *LArray = cast<ConstantArray>(LFilter);
5157 if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros.
5158 // Since Filter is non-empty and contains only zeros, it is a subset of
5159 // LFilter iff LFilter contains a zero.
5160 assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
5161 for (unsigned l = 0; l != LElts; ++l)
5162 if (isa<ConstantPointerNull>(LArray->getOperand(l))) {
5163 // LFilter contains a zero - discard it.
5164 NewClauses.erase(J);
5165 MakeNewInstruction = true;
5166 break;
5167 }
5168 // Move on to the next filter.
5169 continue;
5170 }
5171 // At this point we know that both filters are ConstantArrays. Loop over
5172 // operands to see whether every element of Filter is also an element of
5173 // LFilter. Since filters tend to be short this is probably faster than
5174 // using a method that scales nicely.
5176 bool AllFound = true;
5177 for (unsigned f = 0; f != FElts; ++f) {
5178 Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts();
5179 AllFound = false;
5180 for (unsigned l = 0; l != LElts; ++l) {
5181 Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts();
5182 if (LTypeInfo == FTypeInfo) {
5183 AllFound = true;
5184 break;
5185 }
5186 }
5187 if (!AllFound)
5188 break;
5189 }
5190 if (AllFound) {
5191 // Discard LFilter.
5192 NewClauses.erase(J);
5193 MakeNewInstruction = true;
5194 }
5195 // Move on to the next filter.
5196 }
5197 }
5198
5199 // If we changed any of the clauses, replace the old landingpad instruction
5200 // with a new one.
5201 if (MakeNewInstruction) {
5203 NewClauses.size());
5204 for (Constant *C : NewClauses)
5205 NLI->addClause(C);
5206 // A landing pad with no clauses must have the cleanup flag set. It is
5207 // theoretically possible, though highly unlikely, that we eliminated all
5208 // clauses. If so, force the cleanup flag to true.
5209 if (NewClauses.empty())
5210 CleanupFlag = true;
5211 NLI->setCleanup(CleanupFlag);
5212 return NLI;
5213 }
5214
5215 // Even if none of the clauses changed, we may nonetheless have understood
5216 // that the cleanup flag is pointless. Clear it if so.
5217 if (LI.isCleanup() != CleanupFlag) {
5218 assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
5219 LI.setCleanup(CleanupFlag);
5220 return &LI;
5221 }
5222
5223 return nullptr;
5224}
5225
5226Value *
5228 // Try to push freeze through instructions that propagate but don't produce
5229 // poison as far as possible. If an operand of freeze follows three
5230 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
5231 // guaranteed-non-poison operands then push the freeze through to the one
5232 // operand that is not guaranteed non-poison. The actual transform is as
5233 // follows.
5234 // Op1 = ... ; Op1 can be posion
5235 // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
5236 // ; single guaranteed-non-poison operands
5237 // ... = Freeze(Op0)
5238 // =>
5239 // Op1 = ...
5240 // Op1.fr = Freeze(Op1)
5241 // ... = Inst(Op1.fr, NonPoisonOps...)
5242 auto *OrigOp = OrigFI.getOperand(0);
5243 auto *OrigOpInst = dyn_cast<Instruction>(OrigOp);
5244
5245 // While we could change the other users of OrigOp to use freeze(OrigOp), that
5246 // potentially reduces their optimization potential, so let's only do this iff
5247 // the OrigOp is only used by the freeze.
5248 if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(OrigOp))
5249 return nullptr;
5250
5251 // We can't push the freeze through an instruction which can itself create
5252 // poison. If the only source of new poison is flags, we can simply
5253 // strip them (since we know the only use is the freeze and nothing can
5254 // benefit from them.)
5256 /*ConsiderFlagsAndMetadata*/ false))
5257 return nullptr;
5258
5259 // If operand is guaranteed not to be poison, there is no need to add freeze
5260 // to the operand. So we first find the operand that is not guaranteed to be
5261 // poison.
5262 Value *MaybePoisonOperand = nullptr;
5263 for (Value *V : OrigOpInst->operands()) {
5265 // Treat identical operands as a single operand.
5266 (MaybePoisonOperand && MaybePoisonOperand == V))
5267 continue;
5268 if (!MaybePoisonOperand)
5269 MaybePoisonOperand = V;
5270 else
5271 return nullptr;
5272 }
5273
5274 OrigOpInst->dropPoisonGeneratingAnnotations();
5275
5276 // If all operands are guaranteed to be non-poison, we can drop freeze.
5277 if (!MaybePoisonOperand)
5278 return OrigOp;
5279
5280 Builder.SetInsertPoint(OrigOpInst);
5281 Value *FrozenMaybePoisonOperand = Builder.CreateFreeze(
5282 MaybePoisonOperand, MaybePoisonOperand->getName() + ".fr");
5283
5284 OrigOpInst->replaceUsesOfWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
5285 return OrigOp;
5286}
5287
5289 PHINode *PN) {
5290 // Detect whether this is a recurrence with a start value and some number of
5291 // backedge values. We'll check whether we can push the freeze through the
5292 // backedge values (possibly dropping poison flags along the way) until we
5293 // reach the phi again. In that case, we can move the freeze to the start
5294 // value.
5295 Use *StartU = nullptr;
5297 for (Use &U : PN->incoming_values()) {
5298 if (DT.dominates(PN->getParent(), PN->getIncomingBlock(U))) {
5299 // Add backedge value to worklist.
5300 Worklist.push_back(U.get());
5301 continue;
5302 }
5303
5304 // Don't bother handling multiple start values.
5305 if (StartU)
5306 return nullptr;
5307 StartU = &U;
5308 }
5309
5310 if (!StartU || Worklist.empty())
5311 return nullptr; // Not a recurrence.
5312
5313 Value *StartV = StartU->get();
5314 BasicBlock *StartBB = PN->getIncomingBlock(*StartU);
5315 bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(StartV);
5316 // We can't insert freeze if the start value is the result of the
5317 // terminator (e.g. an invoke).
5318 if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
5319 return nullptr;
5320
5323 while (!Worklist.empty()) {
5324 Value *V = Worklist.pop_back_val();
5325 if (!Visited.insert(V).second)
5326 continue;
5327
5328 if (Visited.size() > 32)
5329 return nullptr; // Limit the total number of values we inspect.
5330
5331 // Assume that PN is non-poison, because it will be after the transform.
5332 if (V == PN || isGuaranteedNotToBeUndefOrPoison(V))
5333 continue;
5334
5337 /*ConsiderFlagsAndMetadata*/ false))
5338 return nullptr;
5339
5340 DropFlags.push_back(I);
5341 append_range(Worklist, I->operands());
5342 }
5343
5344 for (Instruction *I : DropFlags)
5345 I->dropPoisonGeneratingAnnotations();
5346
5347 if (StartNeedsFreeze) {
5348 Builder.SetInsertPoint(StartBB->getTerminator());
5349 Value *FrozenStartV = Builder.CreateFreeze(StartV,
5350 StartV->getName() + ".fr");
5351 replaceUse(*StartU, FrozenStartV);
5352 }
5353 return replaceInstUsesWith(FI, PN);
5354}
5355
5357 Value *Op = FI.getOperand(0);
5358
5359 if (isa<Constant>(Op) || Op->hasOneUse())
5360 return false;
5361
5362 // Move the freeze directly after the definition of its operand, so that
5363 // it dominates the maximum number of uses. Note that it may not dominate
5364 // *all* uses if the operand is an invoke/callbr and the use is in a phi on
5365 // the normal/default destination. This is why the domination check in the
5366 // replacement below is still necessary.
5367 BasicBlock::iterator MoveBefore;
5368 if (isa<Argument>(Op)) {
5369 MoveBefore =
5371 } else {
5372 auto MoveBeforeOpt = cast<Instruction>(Op)->getInsertionPointAfterDef();
5373 if (!MoveBeforeOpt)
5374 return false;
5375 MoveBefore = *MoveBeforeOpt;
5376 }
5377
5378 // Re-point iterator to come after any debug-info records.
5379 MoveBefore.setHeadBit(false);
5380
5381 bool Changed = false;
5382 if (&FI != &*MoveBefore) {
5383 FI.moveBefore(*MoveBefore->getParent(), MoveBefore);
5384 Changed = true;
5385 }
5386
5388 Changed |= Op->replaceUsesWithIf(&FI, [&](Use &U) -> bool {
5389 if (!DT.dominates(&FI, U))
5390 return false;
5391
5392 Users.push_back(U.getUser());
5393 return true;
5394 });
5395
5396 for (auto *U : Users) {
5397 for (auto &AssumeVH : AC.assumptionsFor(U)) {
5398 if (!AssumeVH)
5399 continue;
5400 AC.updateAffectedValues(cast<AssumeInst>(AssumeVH));
5401 }
5402 }
5403
5404 return Changed;
5405}
5406
5407// Check if any direct or bitcast user of this value is a shuffle instruction.
5409 for (auto *U : V->users()) {
5411 return true;
5412 else if (match(U, m_BitCast(m_Specific(V))) && isUsedWithinShuffleVector(U))
5413 return true;
5414 }
5415 return false;
5416}
5417
5419 Value *Op0 = I.getOperand(0);
5420
5421 if (Value *V = simplifyFreezeInst(Op0, SQ.getWithInstruction(&I)))
5422 return replaceInstUsesWith(I, V);
5423
5424 // freeze (phi const, x) --> phi const, (freeze x)
5425 if (auto *PN = dyn_cast<PHINode>(Op0)) {
5426 if (Instruction *NV = foldOpIntoPhi(I, PN))
5427 return NV;
5428 if (Instruction *NV = foldFreezeIntoRecurrence(I, PN))
5429 return NV;
5430 }
5431
5433 return replaceInstUsesWith(I, NI);
5434
5435 // If I is freeze(undef), check its uses and fold it to a fixed constant.
5436 // - or: pick -1
5437 // - select's condition: if the true value is constant, choose it by making
5438 // the condition true.
5439 // - phi: pick the common constant across operands
5440 // - default: pick 0
5441 //
5442 // Note that this transform is intentionally done here rather than
5443 // via an analysis in InstSimplify or at individual user sites. That is
5444 // because we must produce the same value for all uses of the freeze -
5445 // it's the reason "freeze" exists!
5446 //
5447 // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
5448 // duplicating logic for binops at least.
5449 auto getUndefReplacement = [&](Type *Ty) {
5450 auto pickCommonConstantFromPHI = [](PHINode &PN) -> Value * {
5451 // phi(freeze(undef), C, C). Choose C for freeze so the PHI can be
5452 // removed.
5453 Constant *BestValue = nullptr;
5454 for (Value *V : PN.incoming_values()) {
5455 if (match(V, m_Freeze(m_Undef())))
5456 continue;
5457
5459 if (!C)
5460 return nullptr;
5461
5463 return nullptr;
5464
5465 if (BestValue && BestValue != C)
5466 return nullptr;
5467
5468 BestValue = C;
5469 }
5470 return BestValue;
5471 };
5472
5473 Value *NullValue = Constant::getNullValue(Ty);
5474 Value *BestValue = nullptr;
5475 for (auto *U : I.users()) {
5476 Value *V = NullValue;
5477 if (match(U, m_Or(m_Value(), m_Value())))
5479 else if (match(U, m_Select(m_Specific(&I), m_Constant(), m_Value())))
5480 V = ConstantInt::getTrue(Ty);
5481 else if (match(U, m_c_Select(m_Specific(&I), m_Value(V)))) {
5482 if (V == &I || !isGuaranteedNotToBeUndefOrPoison(V, &AC, &I, &DT))
5483 V = NullValue;
5484 } else if (auto *PHI = dyn_cast<PHINode>(U)) {
5485 if (Value *MaybeV = pickCommonConstantFromPHI(*PHI))
5486 V = MaybeV;
5487 }
5488
5489 if (!BestValue)
5490 BestValue = V;
5491 else if (BestValue != V)
5492 BestValue = NullValue;
5493 }
5494 assert(BestValue && "Must have at least one use");
5495 assert(BestValue != &I && "Cannot replace with itself");
5496 return BestValue;
5497 };
5498
5499 if (match(Op0, m_Undef())) {
5500 // Don't fold freeze(undef/poison) if it's used as a vector operand in
5501 // a shuffle. This may improve codegen for shuffles that allow
5502 // unspecified inputs.
5504 return nullptr;
5505 return replaceInstUsesWith(I, getUndefReplacement(I.getType()));
5506 }
5507
5508 auto getFreezeVectorReplacement = [](Constant *C) -> Constant * {
5509 Type *Ty = C->getType();
5510 auto *VTy = dyn_cast<FixedVectorType>(Ty);
5511 if (!VTy)
5512 return nullptr;
5513 unsigned NumElts = VTy->getNumElements();
5514 Constant *BestValue = Constant::getNullValue(VTy->getScalarType());
5515 for (unsigned i = 0; i != NumElts; ++i) {
5516 Constant *EltC = C->getAggregateElement(i);
5517 if (EltC && !match(EltC, m_Undef())) {
5518 BestValue = EltC;
5519 break;
5520 }
5521 }
5522 return Constant::replaceUndefsWith(C, BestValue);
5523 };
5524
5525 Constant *C;
5526 if (match(Op0, m_Constant(C)) && C->containsUndefOrPoisonElement() &&
5527 !C->containsConstantExpression()) {
5528 if (Constant *Repl = getFreezeVectorReplacement(C))
5529 return replaceInstUsesWith(I, Repl);
5530 }
5531
5532 // Replace uses of Op with freeze(Op).
5533 if (freezeOtherUses(I))
5534 return &I;
5535
5536 return nullptr;
5537}
5538
5539/// Check for case where the call writes to an otherwise dead alloca. This
5540/// shows up for unused out-params in idiomatic C/C++ code. Note that this
5541/// helper *only* analyzes the write; doesn't check any other legality aspect.
5543 auto *CB = dyn_cast<CallBase>(I);
5544 if (!CB)
5545 // TODO: handle e.g. store to alloca here - only worth doing if we extend
5546 // to allow reload along used path as described below. Otherwise, this
5547 // is simply a store to a dead allocation which will be removed.
5548 return false;
5549 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CB, TLI);
5550 if (!Dest)
5551 return false;
5552 auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(Dest->Ptr));
5553 if (!AI)
5554 // TODO: allow malloc?
5555 return false;
5556 // TODO: allow memory access dominated by move point? Note that since AI
5557 // could have a reference to itself captured by the call, we would need to
5558 // account for cycles in doing so.
5559 SmallVector<const User *> AllocaUsers;
5561 auto pushUsers = [&](const Instruction &I) {
5562 for (const User *U : I.users()) {
5563 if (Visited.insert(U).second)
5564 AllocaUsers.push_back(U);
5565 }
5566 };
5567 pushUsers(*AI);
5568 while (!AllocaUsers.empty()) {
5569 auto *UserI = cast<Instruction>(AllocaUsers.pop_back_val());
5570 if (isa<GetElementPtrInst>(UserI) || isa<AddrSpaceCastInst>(UserI)) {
5571 pushUsers(*UserI);
5572 continue;
5573 }
5574 if (UserI == CB)
5575 continue;
5576 // TODO: support lifetime.start/end here
5577 return false;
5578 }
5579 return true;
5580}
5581
5582/// Try to move the specified instruction from its current block into the
5583/// beginning of DestBlock, which can only happen if it's safe to move the
5584/// instruction past all of the instructions between it and the end of its
5585/// block.
5587 BasicBlock *DestBlock) {
5588 BasicBlock *SrcBlock = I->getParent();
5589
5590 // Cannot move control-flow-involving, volatile loads, vaarg, etc.
5591 if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
5592 I->isTerminator())
5593 return false;
5594
5595 // Do not sink static or dynamic alloca instructions. Static allocas must
5596 // remain in the entry block, and dynamic allocas must not be sunk in between
5597 // a stacksave / stackrestore pair, which would incorrectly shorten its
5598 // lifetime.
5599 if (isa<AllocaInst>(I))
5600 return false;
5601
5602 // Do not sink into catchswitch blocks.
5603 if (isa<CatchSwitchInst>(DestBlock->getTerminator()))
5604 return false;
5605
5606 // Do not sink convergent call instructions.
5607 if (auto *CI = dyn_cast<CallInst>(I)) {
5608 if (CI->isConvergent())
5609 return false;
5610 }
5611
5612 // Unless we can prove that the memory write isn't visibile except on the
5613 // path we're sinking to, we must bail.
5614 if (I->mayWriteToMemory()) {
5615 if (!SoleWriteToDeadLocal(I, TLI))
5616 return false;
5617 }
5618
5619 // We can only sink load instructions if there is nothing between the load and
5620 // the end of block that could change the value.
5621 if (I->mayReadFromMemory() &&
5622 !I->hasMetadata(LLVMContext::MD_invariant_load)) {
5623 // We don't want to do any sophisticated alias analysis, so we only check
5624 // the instructions after I in I's parent block if we try to sink to its
5625 // successor block.
5626 if (DestBlock->getUniquePredecessor() != I->getParent())
5627 return false;
5628 for (BasicBlock::iterator Scan = std::next(I->getIterator()),
5629 E = I->getParent()->end();
5630 Scan != E; ++Scan)
5631 if (Scan->mayWriteToMemory())
5632 return false;
5633 }
5634
5635 I->dropDroppableUses([&](const Use *U) {
5636 auto *I = dyn_cast<Instruction>(U->getUser());
5637 if (I && I->getParent() != DestBlock) {
5638 Worklist.add(I);
5639 return true;
5640 }
5641 return false;
5642 });
5643 /// FIXME: We could remove droppable uses that are not dominated by
5644 /// the new position.
5645
5646 BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
5647 I->moveBefore(*DestBlock, InsertPos);
5648 ++NumSunkInst;
5649
5650 // Also sink all related debug uses from the source basic block. Otherwise we
5651 // get debug use before the def. Attempt to salvage debug uses first, to
5652 // maximise the range variables have location for. If we cannot salvage, then
5653 // mark the location undef: we know it was supposed to receive a new location
5654 // here, but that computation has been sunk.
5655 SmallVector<DbgVariableRecord *, 2> DbgVariableRecords;
5656 findDbgUsers(I, DbgVariableRecords);
5657 if (!DbgVariableRecords.empty())
5658 tryToSinkInstructionDbgVariableRecords(I, InsertPos, SrcBlock, DestBlock,
5659 DbgVariableRecords);
5660
5661 // PS: there are numerous flaws with this behaviour, not least that right now
5662 // assignments can be re-ordered past other assignments to the same variable
5663 // if they use different Values. Creating more undef assignements can never be
5664 // undone. And salvaging all users outside of this block can un-necessarily
5665 // alter the lifetime of the live-value that the variable refers to.
5666 // Some of these things can be resolved by tolerating debug use-before-defs in
5667 // LLVM-IR, however it depends on the instruction-referencing CodeGen backend
5668 // being used for more architectures.
5669
5670 return true;
5671}
5672
5674 Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
5675 BasicBlock *DestBlock,
5676 SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords) {
5677 // For all debug values in the destination block, the sunk instruction
5678 // will still be available, so they do not need to be dropped.
5679
5680 // Fetch all DbgVariableRecords not already in the destination.
5681 SmallVector<DbgVariableRecord *, 2> DbgVariableRecordsToSalvage;
5682 for (auto &DVR : DbgVariableRecords)
5683 if (DVR->getParent() != DestBlock)
5684 DbgVariableRecordsToSalvage.push_back(DVR);
5685
5686 // Fetch a second collection, of DbgVariableRecords in the source block that
5687 // we're going to sink.
5688 SmallVector<DbgVariableRecord *> DbgVariableRecordsToSink;
5689 for (DbgVariableRecord *DVR : DbgVariableRecordsToSalvage)
5690 if (DVR->getParent() == SrcBlock)
5691 DbgVariableRecordsToSink.push_back(DVR);
5692
5693 // Sort DbgVariableRecords according to their position in the block. This is a
5694 // partial order: DbgVariableRecords attached to different instructions will
5695 // be ordered by the instruction order, but DbgVariableRecords attached to the
5696 // same instruction won't have an order.
5697 auto Order = [](DbgVariableRecord *A, DbgVariableRecord *B) -> bool {
5698 return B->getInstruction()->comesBefore(A->getInstruction());
5699 };
5700 llvm::stable_sort(DbgVariableRecordsToSink, Order);
5701
5702 // If there are two assignments to the same variable attached to the same
5703 // instruction, the ordering between the two assignments is important. Scan
5704 // for this (rare) case and establish which is the last assignment.
5705 using InstVarPair = std::pair<const Instruction *, DebugVariable>;
5707 if (DbgVariableRecordsToSink.size() > 1) {
5709 // Count how many assignments to each variable there is per instruction.
5710 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
5711 DebugVariable DbgUserVariable =
5712 DebugVariable(DVR->getVariable(), DVR->getExpression(),
5713 DVR->getDebugLoc()->getInlinedAt());
5714 CountMap[std::make_pair(DVR->getInstruction(), DbgUserVariable)] += 1;
5715 }
5716
5717 // If there are any instructions with two assignments, add them to the
5718 // FilterOutMap to record that they need extra filtering.
5720 for (auto It : CountMap) {
5721 if (It.second > 1) {
5722 FilterOutMap[It.first] = nullptr;
5723 DupSet.insert(It.first.first);
5724 }
5725 }
5726
5727 // For all instruction/variable pairs needing extra filtering, find the
5728 // latest assignment.
5729 for (const Instruction *Inst : DupSet) {
5730 for (DbgVariableRecord &DVR :
5731 llvm::reverse(filterDbgVars(Inst->getDbgRecordRange()))) {
5732 DebugVariable DbgUserVariable =
5733 DebugVariable(DVR.getVariable(), DVR.getExpression(),
5734 DVR.getDebugLoc()->getInlinedAt());
5735 auto FilterIt =
5736 FilterOutMap.find(std::make_pair(Inst, DbgUserVariable));
5737 if (FilterIt == FilterOutMap.end())
5738 continue;
5739 if (FilterIt->second != nullptr)
5740 continue;
5741 FilterIt->second = &DVR;
5742 }
5743 }
5744 }
5745
5746 // Perform cloning of the DbgVariableRecords that we plan on sinking, filter
5747 // out any duplicate assignments identified above.
5749 SmallSet<DebugVariable, 4> SunkVariables;
5750 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
5752 continue;
5753
5754 DebugVariable DbgUserVariable =
5755 DebugVariable(DVR->getVariable(), DVR->getExpression(),
5756 DVR->getDebugLoc()->getInlinedAt());
5757
5758 // For any variable where there were multiple assignments in the same place,
5759 // ignore all but the last assignment.
5760 if (!FilterOutMap.empty()) {
5761 InstVarPair IVP = std::make_pair(DVR->getInstruction(), DbgUserVariable);
5762 auto It = FilterOutMap.find(IVP);
5763
5764 // Filter out.
5765 if (It != FilterOutMap.end() && It->second != DVR)
5766 continue;
5767 }
5768
5769 if (!SunkVariables.insert(DbgUserVariable).second)
5770 continue;
5771
5772 if (DVR->isDbgAssign())
5773 continue;
5774
5775 DVRClones.emplace_back(DVR->clone());
5776 LLVM_DEBUG(dbgs() << "CLONE: " << *DVRClones.back() << '\n');
5777 }
5778
5779 // Perform salvaging without the clones, then sink the clones.
5780 if (DVRClones.empty())
5781 return;
5782
5783 salvageDebugInfoForDbgValues(*I, DbgVariableRecordsToSalvage);
5784
5785 // The clones are in reverse order of original appearance. Assert that the
5786 // head bit is set on the iterator as we _should_ have received it via
5787 // getFirstInsertionPt. Inserting like this will reverse the clone order as
5788 // we'll repeatedly insert at the head, such as:
5789 // DVR-3 (third insertion goes here)
5790 // DVR-2 (second insertion goes here)
5791 // DVR-1 (first insertion goes here)
5792 // Any-Prior-DVRs
5793 // InsertPtInst
5794 assert(InsertPos.getHeadBit());
5795 for (DbgVariableRecord *DVRClone : DVRClones) {
5796 InsertPos->getParent()->insertDbgRecordBefore(DVRClone, InsertPos);
5797 LLVM_DEBUG(dbgs() << "SINK: " << *DVRClone << '\n');
5798 }
5799}
5800
5802 while (!Worklist.isEmpty()) {
5803 // Walk deferred instructions in reverse order, and push them to the
5804 // worklist, which means they'll end up popped from the worklist in-order.
5805 while (Instruction *I = Worklist.popDeferred()) {
5806 // Check to see if we can DCE the instruction. We do this already here to
5807 // reduce the number of uses and thus allow other folds to trigger.
5808 // Note that eraseInstFromFunction() may push additional instructions on
5809 // the deferred worklist, so this will DCE whole instruction chains.
5812 ++NumDeadInst;
5813 continue;
5814 }
5815
5816 Worklist.push(I);
5817 }
5818
5819 Instruction *I = Worklist.removeOne();
5820 if (I == nullptr) continue; // skip null values.
5821
5822 // Check to see if we can DCE the instruction.
5825 ++NumDeadInst;
5826 continue;
5827 }
5828
5829 if (!DebugCounter::shouldExecute(VisitCounter))
5830 continue;
5831
5832 // See if we can trivially sink this instruction to its user if we can
5833 // prove that the successor is not executed more frequently than our block.
5834 // Return the UserBlock if successful.
5835 auto getOptionalSinkBlockForInst =
5836 [this](Instruction *I) -> std::optional<BasicBlock *> {
5837 if (!EnableCodeSinking)
5838 return std::nullopt;
5839
5840 BasicBlock *BB = I->getParent();
5841 BasicBlock *UserParent = nullptr;
5842 unsigned NumUsers = 0;
5843
5844 for (Use &U : I->uses()) {
5845 User *User = U.getUser();
5846 if (User->isDroppable()) {
5847 // Do not sink if there are dereferenceable assumes that would be
5848 // removed.
5850 if (II->getIntrinsicID() != Intrinsic::assume ||
5851 !II->getOperandBundle("dereferenceable"))
5852 continue;
5853 }
5854
5855 if (NumUsers > MaxSinkNumUsers)
5856 return std::nullopt;
5857
5858 Instruction *UserInst = cast<Instruction>(User);
5859 // Special handling for Phi nodes - get the block the use occurs in.
5860 BasicBlock *UserBB = UserInst->getParent();
5861 if (PHINode *PN = dyn_cast<PHINode>(UserInst))
5862 UserBB = PN->getIncomingBlock(U);
5863 // Bail out if we have uses in different blocks. We don't do any
5864 // sophisticated analysis (i.e finding NearestCommonDominator of these
5865 // use blocks).
5866 if (UserParent && UserParent != UserBB)
5867 return std::nullopt;
5868 UserParent = UserBB;
5869
5870 // Make sure these checks are done only once, naturally we do the checks
5871 // the first time we get the userparent, this will save compile time.
5872 if (NumUsers == 0) {
5873 // Try sinking to another block. If that block is unreachable, then do
5874 // not bother. SimplifyCFG should handle it.
5875 if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
5876 return std::nullopt;
5877
5878 auto *Term = UserParent->getTerminator();
5879 // See if the user is one of our successors that has only one
5880 // predecessor, so that we don't have to split the critical edge.
5881 // Another option where we can sink is a block that ends with a
5882 // terminator that does not pass control to other block (such as
5883 // return or unreachable or resume). In this case:
5884 // - I dominates the User (by SSA form);
5885 // - the User will be executed at most once.
5886 // So sinking I down to User is always profitable or neutral.
5887 if (UserParent->getUniquePredecessor() != BB && !succ_empty(Term))
5888 return std::nullopt;
5889
5890 assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
5891 }
5892
5893 NumUsers++;
5894 }
5895
5896 // No user or only has droppable users.
5897 if (!UserParent)
5898 return std::nullopt;
5899
5900 return UserParent;
5901 };
5902
5903 auto OptBB = getOptionalSinkBlockForInst(I);
5904 if (OptBB) {
5905 auto *UserParent = *OptBB;
5906 // Okay, the CFG is simple enough, try to sink this instruction.
5907 if (tryToSinkInstruction(I, UserParent)) {
5908 LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
5909 MadeIRChange = true;
5910 // We'll add uses of the sunk instruction below, but since
5911 // sinking can expose opportunities for it's *operands* add
5912 // them to the worklist
5913 for (Use &U : I->operands())
5914 if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
5915 Worklist.push(OpI);
5916 }
5917 }
5918
5919 // Now that we have an instruction, try combining it to simplify it.
5920 Builder.SetInsertPoint(I);
5921 Builder.SetCurrentDebugLocation(I->getDebugLoc());
5922 // Used by our IRBuilder inserter to copy annotation metadata.
5924
5925#ifndef NDEBUG
5926 std::string OrigI;
5927#endif
5928 LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS););
5929 LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
5930
5931 if (Instruction *Result = visit(*I)) {
5932 ++NumCombined;
5933 // Should we replace the old instruction with a new one?
5934 if (Result != I) {
5935 LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
5936 << " New = " << *Result << '\n');
5937
5938 // We copy the old instruction's DebugLoc to the new instruction, unless
5939 // InstCombine already assigned a DebugLoc to it, in which case we
5940 // should trust the more specifically selected DebugLoc.
5941 Result->setDebugLoc(Result->getDebugLoc().orElse(I->getDebugLoc()));
5942 // We also copy annotation metadata to the new instruction.
5943 Result->copyMetadata(*I, LLVMContext::MD_annotation);
5944 // Everything uses the new instruction now.
5945 I->replaceAllUsesWith(Result);
5946
5947 // Move the name to the new instruction first.
5948 Result->takeName(I);
5949
5950 // Insert the new instruction into the basic block...
5951 BasicBlock *InstParent = I->getParent();
5952 BasicBlock::iterator InsertPos = I->getIterator();
5953
5954 // Are we replace a PHI with something that isn't a PHI, or vice versa?
5955 if (isa<PHINode>(Result) != isa<PHINode>(I)) {
5956 // We need to fix up the insertion point.
5957 if (isa<PHINode>(I)) // PHI -> Non-PHI
5958 InsertPos = InstParent->getFirstInsertionPt();
5959 else // Non-PHI -> PHI
5960 InsertPos = InstParent->getFirstNonPHIIt();
5961 }
5962
5963 Result->insertInto(InstParent, InsertPos);
5964
5965 // Push the new instruction and any users onto the worklist.
5966 Worklist.pushUsersToWorkList(*Result);
5967 Worklist.push(Result);
5968
5970 } else {
5971 LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
5972 << " New = " << *I << '\n');
5973
5974 // If the instruction was modified, it's possible that it is now dead.
5975 // if so, remove it.
5978 } else {
5979 Worklist.pushUsersToWorkList(*I);
5980 Worklist.push(I);
5981 }
5982 }
5983 MadeIRChange = true;
5984 }
5985 }
5986
5987 Worklist.zap();
5988 return MadeIRChange;
5989}
5990
5991// Track the scopes used by !alias.scope and !noalias. In a function, a
5992// @llvm.experimental.noalias.scope.decl is only useful if that scope is used
5993// by both sets. If not, the declaration of the scope can be safely omitted.
5994// The MDNode of the scope can be omitted as well for the instructions that are
5995// part of this function. We do not do that at this point, as this might become
5996// too time consuming to do.
5998 SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists;
5999 SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists;
6000
6001public:
6003 // This seems to be faster than checking 'mayReadOrWriteMemory()'.
6004 if (!I->hasMetadataOtherThanDebugLoc())
6005 return;
6006
6007 auto Track = [](Metadata *ScopeList, auto &Container) {
6008 const auto *MDScopeList = dyn_cast_or_null<MDNode>(ScopeList);
6009 if (!MDScopeList || !Container.insert(MDScopeList).second)
6010 return;
6011 for (const auto &MDOperand : MDScopeList->operands())
6012 if (auto *MDScope = dyn_cast<MDNode>(MDOperand))
6013 Container.insert(MDScope);
6014 };
6015
6016 Track(I->getMetadata(LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
6017 Track(I->getMetadata(LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
6018 }
6019
6022 if (!Decl)
6023 return false;
6024
6025 assert(Decl->use_empty() &&
6026 "llvm.experimental.noalias.scope.decl in use ?");
6027 const MDNode *MDSL = Decl->getScopeList();
6028 assert(MDSL->getNumOperands() == 1 &&
6029 "llvm.experimental.noalias.scope should refer to a single scope");
6030 auto &MDOperand = MDSL->getOperand(0);
6031 if (auto *MD = dyn_cast<MDNode>(MDOperand))
6032 return !UsedAliasScopesAndLists.contains(MD) ||
6033 !UsedNoAliasScopesAndLists.contains(MD);
6034
6035 // Not an MDNode ? throw away.
6036 return true;
6037 }
6038};
6039
6040/// Populate the IC worklist from a function, by walking it in reverse
6041/// post-order and adding all reachable code to the worklist.
6042///
6043/// This has a couple of tricks to make the code faster and more powerful. In
6044/// particular, we constant fold and DCE instructions as we go, to avoid adding
6045/// them to the worklist (this significantly speeds up instcombine on code where
6046/// many instructions are dead or constant). Additionally, if we find a branch
6047/// whose condition is a known constant, we only visit the reachable successors.
6049 bool MadeIRChange = false;
6051 SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
6052 DenseMap<Constant *, Constant *> FoldedConstants;
6053 AliasScopeTracker SeenAliasScopes;
6054
6055 auto HandleOnlyLiveSuccessor = [&](BasicBlock *BB, BasicBlock *LiveSucc) {
6056 for (BasicBlock *Succ : successors(BB))
6057 if (Succ != LiveSucc && DeadEdges.insert({BB, Succ}).second)
6058 for (PHINode &PN : Succ->phis())
6059 for (Use &U : PN.incoming_values())
6060 if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(U)) {
6061 U.set(PoisonValue::get(PN.getType()));
6062 MadeIRChange = true;
6063 }
6064 };
6065
6066 for (BasicBlock *BB : RPOT) {
6067 if (!BB->isEntryBlock() && all_of(predecessors(BB), [&](BasicBlock *Pred) {
6068 return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred);
6069 })) {
6070 HandleOnlyLiveSuccessor(BB, nullptr);
6071 continue;
6072 }
6073 LiveBlocks.insert(BB);
6074
6075 for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
6076 // ConstantProp instruction if trivially constant.
6077 if (!Inst.use_empty() &&
6078 (Inst.getNumOperands() == 0 || isa<Constant>(Inst.getOperand(0))))
6079 if (Constant *C = ConstantFoldInstruction(&Inst, DL, &TLI)) {
6080 LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
6081 << '\n');
6082 Inst.replaceAllUsesWith(C);
6083 ++NumConstProp;
6084 if (isInstructionTriviallyDead(&Inst, &TLI))
6085 Inst.eraseFromParent();
6086 MadeIRChange = true;
6087 continue;
6088 }
6089
6090 // See if we can constant fold its operands.
6091 for (Use &U : Inst.operands()) {
6093 continue;
6094
6095 auto *C = cast<Constant>(U);
6096 Constant *&FoldRes = FoldedConstants[C];
6097 if (!FoldRes)
6098 FoldRes = ConstantFoldConstant(C, DL, &TLI);
6099
6100 if (FoldRes != C) {
6101 LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
6102 << "\n Old = " << *C
6103 << "\n New = " << *FoldRes << '\n');
6104 U = FoldRes;
6105 MadeIRChange = true;
6106 }
6107 }
6108
6109 // Skip processing debug and pseudo intrinsics in InstCombine. Processing
6110 // these call instructions consumes non-trivial amount of time and
6111 // provides no value for the optimization.
6112 if (!Inst.isDebugOrPseudoInst()) {
6113 InstrsForInstructionWorklist.push_back(&Inst);
6114 SeenAliasScopes.analyse(&Inst);
6115 }
6116 }
6117
6118 // If this is a branch or switch on a constant, mark only the single
6119 // live successor. Otherwise assume all successors are live.
6120 Instruction *TI = BB->getTerminator();
6121 if (CondBrInst *BI = dyn_cast<CondBrInst>(TI)) {
6122 if (isa<UndefValue>(BI->getCondition())) {
6123 // Branch on undef is UB.
6124 HandleOnlyLiveSuccessor(BB, nullptr);
6125 continue;
6126 }
6127 if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
6128 bool CondVal = Cond->getZExtValue();
6129 HandleOnlyLiveSuccessor(BB, BI->getSuccessor(!CondVal));
6130 continue;
6131 }
6132 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
6133 if (isa<UndefValue>(SI->getCondition())) {
6134 // Switch on undef is UB.
6135 HandleOnlyLiveSuccessor(BB, nullptr);
6136 continue;
6137 }
6138 if (auto *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
6139 HandleOnlyLiveSuccessor(BB,
6140 SI->findCaseValue(Cond)->getCaseSuccessor());
6141 continue;
6142 }
6143 }
6144 }
6145
6146 // Remove instructions inside unreachable blocks. This prevents the
6147 // instcombine code from having to deal with some bad special cases, and
6148 // reduces use counts of instructions.
6149 for (BasicBlock &BB : F) {
6150 if (LiveBlocks.count(&BB))
6151 continue;
6152
6153 unsigned NumDeadInstInBB;
6154 NumDeadInstInBB = removeAllNonTerminatorAndEHPadInstructions(&BB);
6155
6156 MadeIRChange |= NumDeadInstInBB != 0;
6157 NumDeadInst += NumDeadInstInBB;
6158 }
6159
6160 // Once we've found all of the instructions to add to instcombine's worklist,
6161 // add them in reverse order. This way instcombine will visit from the top
6162 // of the function down. This jives well with the way that it adds all uses
6163 // of instructions to the worklist after doing a transformation, thus avoiding
6164 // some N^2 behavior in pathological cases.
6165 Worklist.reserve(InstrsForInstructionWorklist.size());
6166 for (Instruction *Inst : reverse(InstrsForInstructionWorklist)) {
6167 // DCE instruction if trivially dead. As we iterate in reverse program
6168 // order here, we will clean up whole chains of dead instructions.
6169 if (isInstructionTriviallyDead(Inst, &TLI) ||
6170 SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
6171 ++NumDeadInst;
6172 LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
6173 salvageDebugInfo(*Inst);
6174 Inst->eraseFromParent();
6175 MadeIRChange = true;
6176 continue;
6177 }
6178
6179 Worklist.push(Inst);
6180 }
6181
6182 return MadeIRChange;
6183}
6184
6186 // Collect backedges.
6187 SmallVector<bool> Visited(F.getMaxBlockNumber());
6188 for (BasicBlock *BB : RPOT) {
6189 Visited[BB->getNumber()] = true;
6190 for (BasicBlock *Succ : successors(BB))
6191 if (Visited[Succ->getNumber()])
6192 BackEdges.insert({BB, Succ});
6193 }
6194 ComputedBackEdges = true;
6195}
6196
6202 const InstCombineOptions &Opts) {
6203 auto &DL = F.getDataLayout();
6204 bool VerifyFixpoint = Opts.VerifyFixpoint &&
6205 !F.hasFnAttribute("instcombine-no-verify-fixpoint");
6206
6208
6209 // Lower dbg.declare intrinsics otherwise their value may be clobbered
6210 // by instcombiner.
6211 bool MadeIRChange = false;
6213 MadeIRChange = LowerDbgDeclare(F);
6214
6215 // Iterate while there is work to do.
6216 unsigned Iteration = 0;
6217 while (true) {
6218 if (Iteration >= Opts.MaxIterations && !VerifyFixpoint) {
6219 LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations
6220 << " on " << F.getName()
6221 << " reached; stopping without verifying fixpoint\n");
6222 break;
6223 }
6224
6225 ++Iteration;
6226 ++NumWorklistIterations;
6227 LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
6228 << F.getName() << "\n");
6229
6230 InstCombinerImpl IC(Worklist, F, AA, AC, TLI, TTI, DT, ORE, BFI, BPI, PSI,
6231 DL, RPOT);
6233 bool MadeChangeInThisIteration = IC.prepareWorklist(F);
6234 MadeChangeInThisIteration |= IC.run();
6235 if (!MadeChangeInThisIteration)
6236 break;
6237
6238 MadeIRChange = true;
6239 if (Iteration > Opts.MaxIterations) {
6241 "Instruction Combining on " + Twine(F.getName()) +
6242 " did not reach a fixpoint after " + Twine(Opts.MaxIterations) +
6243 " iterations. " +
6244 "Use 'instcombine<no-verify-fixpoint>' or function attribute "
6245 "'instcombine-no-verify-fixpoint' to suppress this error.");
6246 }
6247 }
6248
6249 if (Iteration == 1)
6250 ++NumOneIteration;
6251 else if (Iteration == 2)
6252 ++NumTwoIterations;
6253 else if (Iteration == 3)
6254 ++NumThreeIterations;
6255 else
6256 ++NumFourOrMoreIterations;
6257
6258 return MadeIRChange;
6259}
6260
6262
6264 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
6265 static_cast<PassInfoMixin<InstCombinePass> *>(this)->printPipeline(
6266 OS, MapClassName2PassName);
6267 OS << '<';
6268 OS << "max-iterations=" << Options.MaxIterations << ";";
6269 OS << (Options.VerifyFixpoint ? "" : "no-") << "verify-fixpoint";
6270 OS << '>';
6271}
6272
6273char InstCombinePass::ID = 0;
6274
6277 auto &LRT = AM.getResult<LastRunTrackingAnalysis>(F);
6278 // No changes since last InstCombine pass, exit early.
6279 if (LRT.shouldSkip(&ID))
6280 return PreservedAnalyses::all();
6281
6282 auto &AC = AM.getResult<AssumptionAnalysis>(F);
6283 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
6284 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
6286 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
6287
6288 auto *AA = &AM.getResult<AAManager>(F);
6289 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
6290 ProfileSummaryInfo *PSI =
6291 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
6292 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
6293 &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
6295
6296 if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
6297 BFI, BPI, PSI, Options)) {
6298 // No changes, all analyses are preserved.
6299 LRT.update(&ID, /*Changed=*/false);
6300 return PreservedAnalyses::all();
6301 }
6302
6303 // Mark all the analyses that instcombine updates as preserved.
6305 LRT.update(&ID, /*Changed=*/true);
6308 return PA;
6309}
6310
6326
6328 if (skipFunction(F))
6329 return false;
6330
6331 // Required analyses.
6332 auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
6333 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
6334 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
6336 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
6338
6339 // Optional analyses.
6340 ProfileSummaryInfo *PSI =
6342 BlockFrequencyInfo *BFI =
6343 (PSI && PSI->hasProfileSummary()) ?
6345 nullptr;
6346 BranchProbabilityInfo *BPI = nullptr;
6347 if (auto *WrapperPass =
6349 BPI = &WrapperPass->getBPI();
6350
6351 return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
6352 BFI, BPI, PSI, InstCombineOptions());
6353}
6354
6356
6358
6360 "Combine redundant instructions", false, false)
6371 "Combine redundant instructions", false, false)
6372
6373// Initialization Routines.
6377
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This is the interface for LLVM's primary stateless and local alias analysis.
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool willNotOverflow(BinaryOpIntrinsic *BO, LazyValueInfo *LVI)
DXIL Resource Access
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
This file defines the DenseMap class.
static bool isSigned(unsigned Opcode)
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
IRTranslator LLVM IR MI
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
iv Induction Variable Users
Definition IVUsers.cpp:48
static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "(X ROp Y) LOp Z" is always equal to "(X LOp Z) ROp (Y LOp Z)".
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
This file provides internal interfaces used to implement the InstCombine.
This file provides the primary interface to the instcombine pass.
static Value * simplifySwitchOnSelectUsingRanges(SwitchInst &SI, SelectInst *Select, bool IsTrueArm)
static bool isUsedWithinShuffleVector(Value *V)
static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI, Instruction *AI)
static Constant * constantFoldBinOpWithSplat(unsigned Opcode, Constant *Vector, Constant *Splat, bool SplatLHS, const DataLayout &DL)
static bool shorter_filter(const Value *LHS, const Value *RHS)
static Instruction * combineConstantOffsets(GetElementPtrInst &GEP, InstCombinerImpl &IC)
Combine constant offsets separated by variable offsets.
static Instruction * foldSelectGEP(GetElementPtrInst &GEP, InstCombiner::BuilderTy &Builder)
Thread a GEP operation with constant indices through the constant true/false arms of a select.
static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src)
static cl::opt< unsigned > MaxArraySize("instcombine-maxarray-size", cl::init(1024), cl::desc("Maximum array size considered when doing a combine"))
static Instruction * foldSpliceBinOp(BinaryOperator &Inst, InstCombiner::BuilderTy &Builder)
static cl::opt< unsigned > ShouldLowerDbgDeclare("instcombine-lower-dbg-declare", cl::Hidden, cl::init(true))
static bool hasNoSignedWrap(BinaryOperator &I)
static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1, InstCombinerImpl &IC)
Combine constant operands of associative operations either before or after a cast to eliminate one of...
static bool combineInstructionsOverFunction(Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, ProfileSummaryInfo *PSI, const InstCombineOptions &Opts)
static Value * simplifyInstructionWithPHI(Instruction &I, PHINode *PN, Value *InValue, BasicBlock *InBB, const DataLayout &DL, const SimplifyQuery SQ)
static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP)
Return true if we should canonicalize the gep to an i8 ptradd.
static Value * getIdentityValue(Instruction::BinaryOps Opcode, Value *V)
This function returns identity value for given opcode, which can be used to factor patterns like (X *...
static Value * foldFrexpOfSelect(ExtractValueInst &EV, IntrinsicInst *FrexpCall, SelectInst *SelectInst, InstCombiner::BuilderTy &Builder)
static std::optional< std::pair< Value *, Value * > > matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS)
static std::optional< ModRefInfo > isAllocSiteRemovable(Instruction *AI, SmallVectorImpl< Instruction * > &Users, const TargetLibraryInfo &TLI, bool KnowInit)
static cl::opt< unsigned > MaxAllocSiteRemovableUsers("instcombine-max-allocsite-removable-users", cl::Hidden, cl::init(2048), cl::desc("Maximum number of users to visit in alloc-site " "removability analysis"))
static Value * foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, Value *NewOp, InstCombiner &IC)
static Instruction * canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP, GEPOperator *Src, InstCombinerImpl &IC)
static Instruction * tryToMoveFreeBeforeNullTest(CallInst &FI, const DataLayout &DL)
Move the call to free before a NULL test.
static Value * simplifyOperationIntoSelectOperand(Instruction &I, SelectInst *SI, bool IsTrueArm)
static Value * tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ, InstCombiner::BuilderTy &Builder, Instruction::BinaryOps InnerOpcode, Value *A, Value *B, Value *C, Value *D)
This tries to simplify binary operations by factorizing out common terms (e.
static bool isRemovableWrite(CallBase &CB, Value *UsedV, const TargetLibraryInfo &TLI)
Given a call CB which uses an address UsedV, return true if we can prove the call's only possible eff...
static Instruction::BinaryOps getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op, Value *&LHS, Value *&RHS, BinaryOperator *OtherOp)
This function predicates factorization using distributive laws.
static bool hasNoUnsignedWrap(BinaryOperator &I)
static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI)
Check for case where the call writes to an otherwise dead alloca.
static cl::opt< unsigned > MaxSinkNumUsers("instcombine-max-sink-users", cl::init(32), cl::desc("Maximum number of undroppable users for instruction sinking"))
static Instruction * foldGEPOfPhi(GetElementPtrInst &GEP, PHINode *PN, IRBuilderBase &Builder)
static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo)
Return 'true' if the given typeinfo will match anything.
static cl::opt< bool > EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"), cl::init(true))
static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C)
static GEPNoWrapFlags getMergedGEPNoWrapFlags(GEPOperator &GEP1, GEPOperator &GEP2)
Determine nowrap flags for (gep (gep p, x), y) to (gep p, (x + y)) transform.
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file contains the declarations for metadata subclasses.
#define T
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static bool IsSelect(unsigned Opcode, bool CheckOnlyCC=false)
Check if the opcode is a SELECT or SELECT_CC variant.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
const SmallVectorImpl< MachineOperand > & Cond
static unsigned getNumElements(Type *Ty)
unsigned OpIndex
BaseType
A given derived pointer can have multiple base pointers through phi/selects.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
static unsigned getScalarSizeInBits(Type *Ty)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
bool isNoAliasScopeDeclDead(Instruction *Inst)
void analyse(Instruction *I)
The Input class is used to parse a yaml document into in-memory structs and vectors.
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:227
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1810
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition APInt.h:424
static LLVM_ABI void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Definition APInt.cpp:1942
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
LLVM_ABI APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1980
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:834
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:2012
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition APInt.h:406
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1157
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1993
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition ArrayRef.h:218
size_t size() const
Get the array size.
Definition ArrayRef.h:141
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
uint64_t getNumElements() const
Type * getElementType() const
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
LLVM_ABI uint64_t getDereferenceableBytes() const
Returns the number of dereferenceable bytes from the dereferenceable attribute.
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition Attributes.h:261
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:530
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI bool isEntryBlock() const
Return true if this is the entry block of the containing function.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:484
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI const_iterator getFirstNonPHIOrDbgOrAlloca() const
Returns an iterator to the first instruction in this block that is not a PHINode, a debug intrinsic,...
size_t size() const
Definition BasicBlock.h:482
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
static LLVM_ABI BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
BinaryOps getOpcode() const
Definition InstrTypes.h:409
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateNUW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:329
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
void setAttributes(AttributeList A)
Set the attributes for this call.
bool doesNotThrow() const
Determine if the call cannot unwind.
Value * getArgOperand(unsigned i) const
AttributeList getAttributes() const
Return the attributes for this call.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ ICMP_NE
not equal
Definition InstrTypes.h:762
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:890
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:852
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Conditional Branch instruction.
LLVM_ABI void swapSuccessors()
Swap the successors of this branch instruction.
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
ConstantArray - Constant Array Declarations.
Definition Constants.h:584
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition Constants.h:945
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getNot(Constant *C)
static LLVM_ABI Constant * getAdd(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
This class represents a range of values.
LLVM_ABI bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI ConstantRange makeExactNoWrapRegion(Instruction::BinaryOps BinOp, const APInt &Other, unsigned NoWrapKind)
Produce the range that contains X if and only if "X BinOp Other" does not wrap.
Constant Vector Declarations.
Definition Constants.h:668
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * replaceUndefsWith(Constant *C, Constant *Replacement)
Try to replace undefined constant C or undefined elements in C with Replacement.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
const Constant * stripPointerCasts() const
Definition Constant.h:228
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
static LLVM_ABI DIExpression * appendOpsToArg(const DIExpression *Expr, ArrayRef< uint64_t > Ops, unsigned ArgNo, bool StackValue=false)
Create a copy of Expr by appending the given list of Ops to each instance of the operand DW_OP_LLVM_a...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Record of a variable value-assignment, aka a non instruction representation of the dbg....
static bool shouldExecute(CounterInfo &Counter)
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:252
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
bool empty() const
Definition DenseMap.h:173
iterator end()
Definition DenseMap.h:143
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:286
Analysis pass which computes a DominatorTree.
Definition Dominators.h:270
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:306
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:151
This instruction extracts a struct member or array element value from an aggregate value.
ArrayRef< unsigned > getIndices() const
iterator_range< idx_iterator > indices() const
idx_iterator idx_end() const
static ExtractValueInst * Create(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
idx_iterator idx_begin() const
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
FunctionPass(char &pid)
Definition Pass.h:316
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition Pass.cpp:188
const BasicBlock & getEntryBlock() const
Definition Function.h:809
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags inBounds()
static GEPNoWrapFlags all()
static GEPNoWrapFlags noUnsignedWrap()
GEPNoWrapFlags intersectForReassociate(GEPNoWrapFlags Other) const
Given (gep (gep p, x), y), determine the nowrap flags for (gep (gep, p, y), x).
bool hasNoUnsignedWrap() const
bool isInBounds() const
GEPNoWrapFlags intersectForOffsetAdd(GEPNoWrapFlags Other) const
Given (gep (gep p, x), y), determine the nowrap flags for (gep p, x+y).
static GEPNoWrapFlags none()
GEPNoWrapFlags getNoWrapFlags() const
Definition Operator.h:385
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static LLVM_ABI Type * getTypeAtIndex(Type *Ty, Value *Idx)
Return the type of the element at the given index of an indexable type.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Legacy wrapper pass to provide the GlobalsAAResult object.
This instruction compares its operands according to the predicate given to the constructor.
CmpPredicate getCmpPredicate() const
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition IRBuilder.h:2101
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
Definition IRBuilder.h:544
virtual void InsertHelper(Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const
Definition IRBuilder.h:65
This instruction inserts a struct field of array element value into an aggregate value.
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
LLVM_ABI InstCombinePass(InstCombineOptions Opts={})
LLVM_ABI void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Instruction * foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I)
Tries to simplify binops of select and cast of the select condition.
Instruction * visitCondBrInst(CondBrInst &BI)
Instruction * foldBinOpIntoSelectOrPhi(BinaryOperator &I)
This is a convenience wrapper function for the above two functions.
bool SimplifyAssociativeOrCommutative(BinaryOperator &I)
Performs a few simplifications for operators which are associative or commutative.
Instruction * visitGEPOfGEP(GetElementPtrInst &GEP, GEPOperator *Src)
Value * foldUsingDistributiveLaws(BinaryOperator &I)
Tries to simplify binary operations which some other binary operation distributes over.
Instruction * foldBinOpShiftWithShift(BinaryOperator &I)
Instruction * visitUnreachableInst(UnreachableInst &I)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
void handleUnreachableFrom(Instruction *I, SmallVectorImpl< BasicBlock * > &Worklist)
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Instruction * visitFreeze(FreezeInst &I)
Instruction * foldBinOpSelectBinOp(BinaryOperator &Op)
In some cases it is beneficial to fold a select into a binary operator.
void handlePotentiallyDeadBlocks(SmallVectorImpl< BasicBlock * > &Worklist)
bool prepareWorklist(Function &F)
Perform early cleanup and prepare the InstCombine worklist.
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false, bool SimplifyBothArms=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * visitFree(CallInst &FI, Value *FreedOp)
Instruction * visitExtractValueInst(ExtractValueInst &EV)
void handlePotentiallyDeadSuccessors(BasicBlock *BB, BasicBlock *LiveSucc)
Instruction * foldBinopWithRecurrence(BinaryOperator &BO)
Try to fold binary operators whose operands are simple interleaved recurrences to a single recurrence...
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * visitLandingPadInst(LandingPadInst &LI)
Instruction * visitReturnInst(ReturnInst &RI)
Instruction * visitSwitchInst(SwitchInst &SI)
Instruction * foldBinopWithPhiOperands(BinaryOperator &BO)
For a binary operator with 2 phi operands, try to hoist the binary operation before the phi.
bool SimplifyDemandedFPClass(Instruction *I, unsigned Op, FPClassTest DemandedMask, KnownFPClass &Known, const SimplifyQuery &Q, unsigned Depth=0)
bool mergeStoreIntoSuccessor(StoreInst &SI)
Try to transform: if () { *P = v1; } else { *P = v2 } or: *P = v1; if () { *P = v2; }...
Instruction * tryFoldInstWithCtpopWithNot(Instruction *I)
Instruction * visitUncondBrInst(UncondBrInst &BI)
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
Value * pushFreezeToPreventPoisonFromPropagating(FreezeInst &FI)
bool run()
Run the combiner over the entire worklist until it is empty.
Instruction * foldVectorBinop(BinaryOperator &Inst)
Canonicalize the position of binops relative to shufflevector.
bool removeInstructionsBeforeUnreachable(Instruction &I)
Value * SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS, Value *RHS)
void tryToSinkInstructionDbgVariableRecords(Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock, BasicBlock *DestBlock, SmallVectorImpl< DbgVariableRecord * > &DPUsers)
void addDeadEdge(BasicBlock *From, BasicBlock *To, SmallVectorImpl< BasicBlock * > &Worklist)
Constant * unshuffleConstant(ArrayRef< int > ShMask, Constant *C, VectorType *NewCTy)
Find a constant NewC that has property: shuffle(NewC, ShMask) = C Returns nullptr if such a constant ...
Instruction * visitAllocSite(Instruction &FI)
Instruction * visitGetElementPtrInst(GetElementPtrInst &GEP)
Value * tryFactorizationFolds(BinaryOperator &I)
This tries to simplify binary operations by factorizing out common terms (e.
Instruction * foldFreezeIntoRecurrence(FreezeInst &I, PHINode *PN)
bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock)
Try to move the specified instruction from its current block into the beginning of DestBlock,...
bool freezeOtherUses(FreezeInst &FI)
void freelyInvertAllUsersOf(Value *V, Value *IgnoredUser=nullptr)
Freely adapt every user of V as-if V was changed to !V.
The core instruction combiner logic.
SimplifyQuery SQ
const DataLayout & getDataLayout() const
bool isFreeToInvert(Value *V, bool WillInvertAllUses, bool &DoesConsume)
Return true if the specified value is free to invert (apply ~ to).
static unsigned getComplexity(Value *V)
Assign a complexity or rank value to LLVM Values.
TargetLibraryInfo & TLI
unsigned ComputeNumSignBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
static bool shouldAvoidAbsorbingNotIntoSelect(const SelectInst &SI)
void replaceUse(Use &U, Value *NewValue)
Replace use and add the previously used value to the worklist.
static bool isCanonicalPredicate(CmpPredicate Pred)
Predicate canonicalization reduces the number of patterns that need to be matched by other transforms...
Instruction * AnnotationMetadataSource
Source for annotation metadata, used by the IRBuilder inserter.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
BranchProbabilityInfo * BPI
ReversePostOrderTraversal< BasicBlock * > & RPOT
const DataLayout & DL
DomConditionCache DC
const bool MinimizeSize
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
IRBuilder< TargetFolder, IRBuilderInstCombineInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
LLVM_ABI std::optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
AssumptionCache & AC
void addToWorklist(Instruction *I)
LLVM_ABI Value * getFreelyInvertedImpl(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume, unsigned Depth)
Return nonnull value if V is free to invert under the condition of WillInvertAllUses.
SmallDenseSet< std::pair< const BasicBlock *, const BasicBlock * >, 8 > BackEdges
Backedges, used to avoid pushing instructions across backedges in cases where this may result in infi...
LLVM_ABI std::optional< Value * > targetSimplifyDemandedVectorEltsIntrinsic(IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
DominatorTree & DT
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
SmallDenseSet< std::pair< BasicBlock *, BasicBlock * >, 8 > DeadEdges
Edges that are known to never be taken.
LLVM_ABI std::optional< Value * > targetSimplifyDemandedUseBitsIntrinsic(IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)
LLVM_ABI bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Value * getFreelyInverted(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume)
bool isBackEdge(const BasicBlock *From, const BasicBlock *To)
bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero=false, const Instruction *CxtI=nullptr, unsigned Depth=0)
void visit(Iterator Start, Iterator End)
Definition InstVisitor.h:87
The legacy pass manager's instcombine pass.
Definition InstCombine.h:68
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef< unsigned > Keep={})
Drop any attributes or metadata that can cause immediate undefined behavior.
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI bool isAssociative() const LLVM_READONLY
Return true if the instruction is associative:
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
bool isTerminator() const
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI bool willReturn() const LLVM_READONLY
Return true if the instruction will return (unwinding is considered as a form of returning control fl...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isBitwiseLogicOp() const
Return true if this is and/or/xor.
bool isShift() const
LLVM_ABI void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
bool isIntDivRem() const
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:350
A wrapper class for inspecting calls to intrinsic functions.
Invoke instruction.
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
The landingpad instruction holds all of the information necessary to generate correct exception handl...
bool isCleanup() const
Return 'true' if this landingpad instruction is a cleanup.
unsigned getNumClauses() const
Get the number of clauses for this landing pad.
static LLVM_ABI LandingPadInst * Create(Type *RetTy, unsigned NumReservedClauses, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedClauses is a hint for the number of incoming clauses that this landingpad w...
LLVM_ABI void addClause(Constant *ClauseVal)
Add a catch or filter clause to the landing pad.
bool isCatch(unsigned Idx) const
Return 'true' if the clause and index Idx is a catch clause.
bool isFilter(unsigned Idx) const
Return 'true' if the clause and index Idx is a filter clause.
Constant * getClause(unsigned Idx) const
Get the value of the clause at index Idx.
void setCleanup(bool V)
Indicate that this landingpad instruction is a cleanup.
A function/module analysis which provides an empty LastRunTrackingInfo.
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Metadata node.
Definition Metadata.h:1069
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1433
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1439
Tracking metadata reference owned by Metadata.
Definition Metadata.h:891
This is the common base class for memset/memcpy/memmove.
static LLVM_ABI MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
Root of the metadata hierarchy.
Definition Metadata.h:64
Value * getLHS() const
Value * getRHS() const
static ICmpInst::Predicate getPredicate(Intrinsic::ID ID)
Returns the comparison predicate underlying the intrinsic.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MDNode * getScopeList() const
OptimizationRemarkEmitter legacy analysis pass.
The optimization diagnostic interface.
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition Operator.h:78
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition Operator.h:113
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition Operator.h:107
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
AnalysisType * getAnalysisIfAvailable() const
getAnalysisIfAvailable<AnalysisType>() - Subclasses use this function to get analysis information tha...
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition Constants.h:1673
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
A global registry used in conjunction with static constructors to make pluggable components (like tar...
Definition Registry.h:116
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
This class represents the LLVM 'select' instruction.
const Value * getFalseValue() const
const Value * getCondition() const
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
const Value * getTrueValue() const
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This instruction constructs a fixed permutation of two input vectors.
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:288
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:61
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:307
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:276
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:326
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
Unconditional Branch instruction.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Use * op_iterator
Definition User.h:254
op_range operands()
Definition User.h:267
op_iterator op_begin()
Definition User.h:259
LLVM_ABI bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
Definition User.cpp:119
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:25
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
op_iterator op_end()
Definition User.h:261
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition Value.h:727
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
Definition Value.cpp:162
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
bool hasUseList() const
Check if this Value has a use-list.
Definition Value.h:344
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition Value.cpp:146
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:712
bool use_empty() const
Definition Value.h:346
LLVM_ABI uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull, bool &CanBeFreed) const
Returns the number of bytes known to be dereferenceable for the pointer value.
Definition Value.cpp:898
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Value handle that is nullable, but tries to track the Value.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
An efficient, type-erasing, non-owning reference to a callable.
TypeSize getSequentialElementStride(const DataLayout &DL) const
const ParentTy * getParent() const
Definition ilist_node.h:34
reverse_self_iterator getReverseIterator()
Definition ilist_node.h:126
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
match_combine_and< Ty... > m_CombineAnd(const Ty &...Ps)
Combine pattern matchers matching all of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
PtrAdd_match< PointerOpTy, OffsetOpTy > m_PtrAdd(const PointerOpTy &PointerOp, const OffsetOpTy &OffsetOp)
Matches GEP with i8 source element type.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, FCmpInst > m_FCmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
auto m_PtrToIntOrAddr(const OpTy &Op)
Matches PtrToInt or PtrToAddr.
OneOps_match< OpTy, Instruction::Freeze > m_Freeze(const OpTy &Op)
Matches FreezeInst.
auto m_Poison()
Match an arbitrary poison constant.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
br_match m_UnconditionalBr(BasicBlock *&Succ)
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_idiv_op > m_IDiv(const LHS &L, const RHS &R)
Matches integer division operations.
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
constantexpr_match m_ConstantExpr()
Match a constant expression or a constant that contains a constant expression.
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
match_combine_or< CastInst_match< OpTy, UIToFPInst >, CastInst_match< OpTy, SIToFPInst > > m_IToFP(const OpTy &Op)
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
auto m_Constant()
Match an arbitrary Constant and ignore it.
NNegZExt_match< OpTy > m_NNegZExt(const OpTy &Op)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
Splat_match< T > m_ConstantSplat(const T &SubPattern)
Match a constant splat. TODO: Extend this to non-constant splats.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
SelectLike_match< CondTy, LTy, RTy > m_SelectLike(const CondTy &C, const LTy &TrueC, const RTy &FalseC)
Matches a value that behaves like a boolean-controlled select, i.e.
auto m_MaxOrMin(const LHS &L, const RHS &R)
match_combine_or< BinaryOp_match< LHS, RHS, Instruction::Add >, DisjointOr_match< LHS, RHS > > m_AddLike(const LHS &L, const RHS &R)
Match either "add" or "or disjoint".
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
BinaryOp_match< LHS, RHS, Instruction::SDiv > m_SDiv(const LHS &L, const RHS &R)
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap >, DisjointOr_match< LHS, RHS > > m_NSWAddLike(const LHS &L, const RHS &R)
Match either "add nsw" or "or disjoint".
m_Intrinsic_Ty< Opnd0 >::Ty m_Ctpop(const Opnd0 &Op0)
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
cstfp_pred_ty< is_non_zero_fp > m_NonZeroFP()
Match a floating-point non-zero.
m_Intrinsic_Ty< Opnd0 >::Ty m_VecReverse(const Opnd0 &Op0)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
brc_match< Cond_t, match_bind< BasicBlock >, match_bind< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::SRem > m_SRem(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap >, DisjointOr_match< LHS, RHS > > m_NUWAddLike(const LHS &L, const RHS &R)
Match either "add nuw" or "or disjoint".
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_VectorInsert(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
initializer< Ty > init(const Ty &Val)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
LLVM_ABI Intrinsic::ID getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID)
@ Offset
Definition DWP.cpp:558
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2115
LLVM_ABI void initializeInstructionCombiningPassPass(PassRegistry &)
cl::opt< bool > ProfcheckDisableMetadataFixes
Definition LoopInfo.cpp:60
LLVM_ABI unsigned removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB)
Remove all instructions from a basic block other than its terminator and any present EH pad instructi...
Definition Local.cpp:2511
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
LLVM_ABI Value * simplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef< Value * > Indices, GEPNoWrapFlags NW, const SimplifyQuery &Q)
Given operands for a GetElementPtrInst, fold the result or return null.
LLVM_ABI Constant * getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty)
If this is a call to an allocation function that initializes memory to a fixed value,...
bool succ_empty(const Instruction *I)
Definition CFG.h:141
LLVM_ABI Value * simplifyFreezeInst(Value *Op, const SimplifyQuery &Q)
Given an operand for a Freeze, see if we can fold the result.
LLVM_ABI FunctionPass * createInstructionCombiningPass()
LLVM_ABI void findDbgValues(Value *V, SmallVectorImpl< DbgVariableRecord * > &DbgVariableRecords)
Finds the dbg.values describing a value.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1687
auto successors(const MachineBasicBlock *BB)
LLVM_ABI Constant * ConstantFoldInstruction(const Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
LLVM_ABI bool isRemovableAlloc(const CallBase *V, const TargetLibraryInfo *TLI)
Return true if this is a call to an allocation function that does not have side effects that we are r...
LLVM_ABI std::optional< StringRef > getAllocationFamily(const Value *I, const TargetLibraryInfo *TLI)
If a function is part of an allocation family (e.g.
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI Value * simplifyInstructionWithOperands(Instruction *I, ArrayRef< Value * > NewOps, const SimplifyQuery &Q)
Like simplifyInstruction but the operands of I are replaced with NewOps.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2207
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
gep_type_iterator gep_type_end(const User *GEP)
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI Value * getReallocatedOperand(const CallBase *CB)
If this is a call to a realloc function, return the reallocated operand.
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition APFloat.h:1652
LLVM_ABI bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc,...
LLVM_ABI bool handleUnreachableTerminator(Instruction *I, SmallVectorImpl< Value * > &PoisonedValues)
If a terminator in an unreachable basic block has an operand of type Instruction, transform it into p...
Definition Local.cpp:2494
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
LLVM_ABI bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
LLVM_ABI Value * simplifyAddInst(Value *LHS, Value *RHS, bool IsNSW, bool IsNUW, const SimplifyQuery &Q)
Given operands for an Add, fold the result or return null.
LLVM_ABI Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:403
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_ABI Value * emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL, User *GEP, bool NoAssumptions=false)
Given a getelementptr instruction/constantexpr, emit the code necessary to compute the offset from th...
Definition Local.cpp:22
constexpr unsigned MaxAnalysisRecursionDepth
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI bool LowerDbgDeclare(Function &F)
Lowers dbg.declare records into appropriate set of dbg.value records.
Definition Local.cpp:1818
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
generic_gep_type_iterator<> gep_type_iterator
LLVM_ABI void ConvertDebugDeclareToDebugValue(DbgVariableRecord *DVR, StoreInst *SI, DIBuilder &Builder)
Inserts a dbg.value record before a store to an alloca'd value that has an associated dbg....
Definition Local.cpp:1662
LLVM_ABI void salvageDebugInfoForDbgValues(Instruction &I, ArrayRef< DbgVariableRecord * > DPInsns)
Implementation of salvageDebugInfo, applying only to instructions in Insns, rather than all debug use...
Definition Local.cpp:2063
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
LLVM_ABI bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI Value * simplifyExtractValueInst(Value *Agg, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an ExtractValueInst, fold the result or return null.
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
LLVM_ABI bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition Local.cpp:2440
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:322
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition ModRef.h:28
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
@ ModRef
The access may reference and may modify the value stored in memory.
Definition ModRef.h:36
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
@ NoModRef
The access neither references nor modifies the value stored in memory.
Definition ModRef.h:30
TargetTransformInfo TTI
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I, bool IgnoreUBImplyingAttrs=true)
Don't use information from its non-constant operands.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
constexpr unsigned BitWidth
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2018
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
gep_type_iterator gep_type_begin(const User *GEP)
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2145
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI void initializeInstCombine(PassRegistry &)
Initialize all passes linked into the InstCombine library.
LLVM_ABI void findDbgUsers(Value *V, SmallVectorImpl< DbgVariableRecord * > &DbgVariableRecords)
Finds the debug info records describing a value.
LLVM_ABI Constant * ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2)
bool isRefSet(const ModRefInfo MRI)
Definition ModRef.h:52
LLVM_ABI std::optional< bool > isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool LHSIsTrue=true, unsigned Depth=0)
Return true if RHS is known to be implied true by LHS.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
unsigned countMinLeadingOnes() const
Returns the minimum number of leading one bits.
Definition KnownBits.h:265
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:89
SimplifyQuery getWithInstruction(const Instruction *I) const