LLVM 19.0.0git
InstructionCombining.cpp
Go to the documentation of this file.
1//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// InstructionCombining - Combine instructions to form fewer, simple
10// instructions. This pass does not modify the CFG. This pass is where
11// algebraic simplification happens.
12//
13// This pass combines things like:
14// %Y = add i32 %X, 1
15// %Z = add i32 %Y, 1
16// into:
17// %Z = add i32 %X, 2
18//
19// This is a simple worklist driven algorithm.
20//
21// This pass guarantees that the following canonicalizations are performed on
22// the program:
23// 1. If a binary operator has a constant operand, it is moved to the RHS
24// 2. Bitwise operators with constant operands are always grouped so that
25// shifts are performed first, then or's, then and's, then xor's.
26// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27// 4. All cmp instructions on boolean values are replaced with logical ops
28// 5. add X, X is represented as (X*2) => (X << 1)
29// 6. Multiplies with a power-of-two constant argument are transformed into
30// shifts.
31// ... etc.
32//
33//===----------------------------------------------------------------------===//
34
35#include "InstCombineInternal.h"
36#include "llvm/ADT/APInt.h"
37#include "llvm/ADT/ArrayRef.h"
38#include "llvm/ADT/DenseMap.h"
41#include "llvm/ADT/Statistic.h"
46#include "llvm/Analysis/CFG.h"
61#include "llvm/IR/BasicBlock.h"
62#include "llvm/IR/CFG.h"
63#include "llvm/IR/Constant.h"
64#include "llvm/IR/Constants.h"
65#include "llvm/IR/DIBuilder.h"
66#include "llvm/IR/DataLayout.h"
67#include "llvm/IR/DebugInfo.h"
69#include "llvm/IR/Dominators.h"
71#include "llvm/IR/Function.h"
73#include "llvm/IR/IRBuilder.h"
74#include "llvm/IR/InstrTypes.h"
75#include "llvm/IR/Instruction.h"
78#include "llvm/IR/Intrinsics.h"
79#include "llvm/IR/Metadata.h"
80#include "llvm/IR/Operator.h"
81#include "llvm/IR/PassManager.h"
83#include "llvm/IR/Type.h"
84#include "llvm/IR/Use.h"
85#include "llvm/IR/User.h"
86#include "llvm/IR/Value.h"
87#include "llvm/IR/ValueHandle.h"
92#include "llvm/Support/Debug.h"
100#include <algorithm>
101#include <cassert>
102#include <cstdint>
103#include <memory>
104#include <optional>
105#include <string>
106#include <utility>
107
108#define DEBUG_TYPE "instcombine"
110#include <optional>
111
112using namespace llvm;
113using namespace llvm::PatternMatch;
114
115STATISTIC(NumWorklistIterations,
116 "Number of instruction combining iterations performed");
117STATISTIC(NumOneIteration, "Number of functions with one iteration");
118STATISTIC(NumTwoIterations, "Number of functions with two iterations");
119STATISTIC(NumThreeIterations, "Number of functions with three iterations");
120STATISTIC(NumFourOrMoreIterations,
121 "Number of functions with four or more iterations");
122
123STATISTIC(NumCombined , "Number of insts combined");
124STATISTIC(NumConstProp, "Number of constant folds");
125STATISTIC(NumDeadInst , "Number of dead inst eliminated");
126STATISTIC(NumSunkInst , "Number of instructions sunk");
127STATISTIC(NumExpand, "Number of expansions");
128STATISTIC(NumFactor , "Number of factorizations");
129STATISTIC(NumReassoc , "Number of reassociations");
130DEBUG_COUNTER(VisitCounter, "instcombine-visit",
131 "Controls which instructions are visited");
132
133static cl::opt<bool>
134EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
135 cl::init(true));
136
138 "instcombine-max-sink-users", cl::init(32),
139 cl::desc("Maximum number of undroppable users for instruction sinking"));
140
142MaxArraySize("instcombine-maxarray-size", cl::init(1024),
143 cl::desc("Maximum array size considered when doing a combine"));
144
145// FIXME: Remove this flag when it is no longer necessary to convert
146// llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
147// increases variable availability at the cost of accuracy. Variables that
148// cannot be promoted by mem2reg or SROA will be described as living in memory
149// for their entire lifetime. However, passes like DSE and instcombine can
150// delete stores to the alloca, leading to misleading and inaccurate debug
151// information. This flag can be removed when those passes are fixed.
152static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
153 cl::Hidden, cl::init(true));
154
155std::optional<Instruction *>
157 // Handle target specific intrinsics
159 return TTI.instCombineIntrinsic(*this, II);
160 }
161 return std::nullopt;
162}
163
165 IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
166 bool &KnownBitsComputed) {
167 // Handle target specific intrinsics
169 return TTI.simplifyDemandedUseBitsIntrinsic(*this, II, DemandedMask, Known,
170 KnownBitsComputed);
171 }
172 return std::nullopt;
173}
174
176 IntrinsicInst &II, APInt DemandedElts, APInt &PoisonElts,
177 APInt &PoisonElts2, APInt &PoisonElts3,
178 std::function<void(Instruction *, unsigned, APInt, APInt &)>
179 SimplifyAndSetOp) {
180 // Handle target specific intrinsics
183 *this, II, DemandedElts, PoisonElts, PoisonElts2, PoisonElts3,
184 SimplifyAndSetOp);
185 }
186 return std::nullopt;
187}
188
189bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
190 return TTI.isValidAddrSpaceCast(FromAS, ToAS);
191}
192
193Value *InstCombinerImpl::EmitGEPOffset(User *GEP) {
195}
196
197/// Legal integers and common types are considered desirable. This is used to
198/// avoid creating instructions with types that may not be supported well by the
199/// the backend.
200/// NOTE: This treats i8, i16 and i32 specially because they are common
201/// types in frontend languages.
202bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
203 switch (BitWidth) {
204 case 8:
205 case 16:
206 case 32:
207 return true;
208 default:
209 return DL.isLegalInteger(BitWidth);
210 }
211}
212
213/// Return true if it is desirable to convert an integer computation from a
214/// given bit width to a new bit width.
215/// We don't want to convert from a legal or desirable type (like i8) to an
216/// illegal type or from a smaller to a larger illegal type. A width of '1'
217/// is always treated as a desirable type because i1 is a fundamental type in
218/// IR, and there are many specialized optimizations for i1 types.
219/// Common/desirable widths are equally treated as legal to convert to, in
220/// order to open up more combining opportunities.
221bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
222 unsigned ToWidth) const {
223 bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth);
224 bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth);
225
226 // Convert to desirable widths even if they are not legal types.
227 // Only shrink types, to prevent infinite loops.
228 if (ToWidth < FromWidth && isDesirableIntType(ToWidth))
229 return true;
230
231 // If this is a legal or desiable integer from type, and the result would be
232 // an illegal type, don't do the transformation.
233 if ((FromLegal || isDesirableIntType(FromWidth)) && !ToLegal)
234 return false;
235
236 // Otherwise, if both are illegal, do not increase the size of the result. We
237 // do allow things like i160 -> i64, but not i64 -> i160.
238 if (!FromLegal && !ToLegal && ToWidth > FromWidth)
239 return false;
240
241 return true;
242}
243
244/// Return true if it is desirable to convert a computation from 'From' to 'To'.
245/// We don't want to convert from a legal to an illegal type or from a smaller
246/// to a larger illegal type. i1 is always treated as a legal type because it is
247/// a fundamental type in IR, and there are many specialized optimizations for
248/// i1 types.
249bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const {
250 // TODO: This could be extended to allow vectors. Datalayout changes might be
251 // needed to properly support that.
252 if (!From->isIntegerTy() || !To->isIntegerTy())
253 return false;
254
255 unsigned FromWidth = From->getPrimitiveSizeInBits();
256 unsigned ToWidth = To->getPrimitiveSizeInBits();
257 return shouldChangeType(FromWidth, ToWidth);
258}
259
260// Return true, if No Signed Wrap should be maintained for I.
261// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
262// where both B and C should be ConstantInts, results in a constant that does
263// not overflow. This function only handles the Add and Sub opcodes. For
264// all other opcodes, the function conservatively returns false.
266 auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
267 if (!OBO || !OBO->hasNoSignedWrap())
268 return false;
269
270 // We reason about Add and Sub Only.
271 Instruction::BinaryOps Opcode = I.getOpcode();
272 if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
273 return false;
274
275 const APInt *BVal, *CVal;
276 if (!match(B, m_APInt(BVal)) || !match(C, m_APInt(CVal)))
277 return false;
278
279 bool Overflow = false;
280 if (Opcode == Instruction::Add)
281 (void)BVal->sadd_ov(*CVal, Overflow);
282 else
283 (void)BVal->ssub_ov(*CVal, Overflow);
284
285 return !Overflow;
286}
287
289 auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
290 return OBO && OBO->hasNoUnsignedWrap();
291}
292
294 auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
295 return OBO && OBO->hasNoSignedWrap();
296}
297
298/// Conservatively clears subclassOptionalData after a reassociation or
299/// commutation. We preserve fast-math flags when applicable as they can be
300/// preserved.
302 FPMathOperator *FPMO = dyn_cast<FPMathOperator>(&I);
303 if (!FPMO) {
304 I.clearSubclassOptionalData();
305 return;
306 }
307
308 FastMathFlags FMF = I.getFastMathFlags();
309 I.clearSubclassOptionalData();
310 I.setFastMathFlags(FMF);
311}
312
313/// Combine constant operands of associative operations either before or after a
314/// cast to eliminate one of the associative operations:
315/// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
316/// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
318 InstCombinerImpl &IC) {
319 auto *Cast = dyn_cast<CastInst>(BinOp1->getOperand(0));
320 if (!Cast || !Cast->hasOneUse())
321 return false;
322
323 // TODO: Enhance logic for other casts and remove this check.
324 auto CastOpcode = Cast->getOpcode();
325 if (CastOpcode != Instruction::ZExt)
326 return false;
327
328 // TODO: Enhance logic for other BinOps and remove this check.
329 if (!BinOp1->isBitwiseLogicOp())
330 return false;
331
332 auto AssocOpcode = BinOp1->getOpcode();
333 auto *BinOp2 = dyn_cast<BinaryOperator>(Cast->getOperand(0));
334 if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
335 return false;
336
337 Constant *C1, *C2;
338 if (!match(BinOp1->getOperand(1), m_Constant(C1)) ||
339 !match(BinOp2->getOperand(1), m_Constant(C2)))
340 return false;
341
342 // TODO: This assumes a zext cast.
343 // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
344 // to the destination type might lose bits.
345
346 // Fold the constants together in the destination type:
347 // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
348 const DataLayout &DL = IC.getDataLayout();
349 Type *DestTy = C1->getType();
350 Constant *CastC2 = ConstantFoldCastOperand(CastOpcode, C2, DestTy, DL);
351 if (!CastC2)
352 return false;
353 Constant *FoldedC = ConstantFoldBinaryOpOperands(AssocOpcode, C1, CastC2, DL);
354 if (!FoldedC)
355 return false;
356
357 IC.replaceOperand(*Cast, 0, BinOp2->getOperand(0));
358 IC.replaceOperand(*BinOp1, 1, FoldedC);
360 Cast->dropPoisonGeneratingFlags();
361 return true;
362}
363
364// Simplifies IntToPtr/PtrToInt RoundTrip Cast.
365// inttoptr ( ptrtoint (x) ) --> x
366Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
367 auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
368 if (IntToPtr && DL.getTypeSizeInBits(IntToPtr->getDestTy()) ==
369 DL.getTypeSizeInBits(IntToPtr->getSrcTy())) {
370 auto *PtrToInt = dyn_cast<PtrToIntInst>(IntToPtr->getOperand(0));
371 Type *CastTy = IntToPtr->getDestTy();
372 if (PtrToInt &&
373 CastTy->getPointerAddressSpace() ==
374 PtrToInt->getSrcTy()->getPointerAddressSpace() &&
375 DL.getTypeSizeInBits(PtrToInt->getSrcTy()) ==
376 DL.getTypeSizeInBits(PtrToInt->getDestTy()))
377 return PtrToInt->getOperand(0);
378 }
379 return nullptr;
380}
381
382/// This performs a few simplifications for operators that are associative or
383/// commutative:
384///
385/// Commutative operators:
386///
387/// 1. Order operands such that they are listed from right (least complex) to
388/// left (most complex). This puts constants before unary operators before
389/// binary operators.
390///
391/// Associative operators:
392///
393/// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
394/// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
395///
396/// Associative and commutative operators:
397///
398/// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
399/// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
400/// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
401/// if C1 and C2 are constants.
403 Instruction::BinaryOps Opcode = I.getOpcode();
404 bool Changed = false;
405
406 do {
407 // Order operands such that they are listed from right (least complex) to
408 // left (most complex). This puts constants before unary operators before
409 // binary operators.
410 if (I.isCommutative() && getComplexity(I.getOperand(0)) <
411 getComplexity(I.getOperand(1)))
412 Changed = !I.swapOperands();
413
414 if (I.isCommutative()) {
415 if (auto Pair = matchSymmetricPair(I.getOperand(0), I.getOperand(1))) {
416 replaceOperand(I, 0, Pair->first);
417 replaceOperand(I, 1, Pair->second);
418 Changed = true;
419 }
420 }
421
422 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
423 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
424
425 if (I.isAssociative()) {
426 // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
427 if (Op0 && Op0->getOpcode() == Opcode) {
428 Value *A = Op0->getOperand(0);
429 Value *B = Op0->getOperand(1);
430 Value *C = I.getOperand(1);
431
432 // Does "B op C" simplify?
433 if (Value *V = simplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) {
434 // It simplifies to V. Form "A op V".
435 replaceOperand(I, 0, A);
436 replaceOperand(I, 1, V);
437 bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0);
438 bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(*Op0);
439
440 // Conservatively clear all optional flags since they may not be
441 // preserved by the reassociation. Reset nsw/nuw based on the above
442 // analysis.
444
445 // Note: this is only valid because SimplifyBinOp doesn't look at
446 // the operands to Op0.
447 if (IsNUW)
448 I.setHasNoUnsignedWrap(true);
449
450 if (IsNSW)
451 I.setHasNoSignedWrap(true);
452
453 Changed = true;
454 ++NumReassoc;
455 continue;
456 }
457 }
458
459 // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
460 if (Op1 && Op1->getOpcode() == Opcode) {
461 Value *A = I.getOperand(0);
462 Value *B = Op1->getOperand(0);
463 Value *C = Op1->getOperand(1);
464
465 // Does "A op B" simplify?
466 if (Value *V = simplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) {
467 // It simplifies to V. Form "V op C".
468 replaceOperand(I, 0, V);
469 replaceOperand(I, 1, C);
470 // Conservatively clear the optional flags, since they may not be
471 // preserved by the reassociation.
473 Changed = true;
474 ++NumReassoc;
475 continue;
476 }
477 }
478 }
479
480 if (I.isAssociative() && I.isCommutative()) {
481 if (simplifyAssocCastAssoc(&I, *this)) {
482 Changed = true;
483 ++NumReassoc;
484 continue;
485 }
486
487 // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
488 if (Op0 && Op0->getOpcode() == Opcode) {
489 Value *A = Op0->getOperand(0);
490 Value *B = Op0->getOperand(1);
491 Value *C = I.getOperand(1);
492
493 // Does "C op A" simplify?
494 if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
495 // It simplifies to V. Form "V op B".
496 replaceOperand(I, 0, V);
497 replaceOperand(I, 1, B);
498 // Conservatively clear the optional flags, since they may not be
499 // preserved by the reassociation.
501 Changed = true;
502 ++NumReassoc;
503 continue;
504 }
505 }
506
507 // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
508 if (Op1 && Op1->getOpcode() == Opcode) {
509 Value *A = I.getOperand(0);
510 Value *B = Op1->getOperand(0);
511 Value *C = Op1->getOperand(1);
512
513 // Does "C op A" simplify?
514 if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
515 // It simplifies to V. Form "B op V".
516 replaceOperand(I, 0, B);
517 replaceOperand(I, 1, V);
518 // Conservatively clear the optional flags, since they may not be
519 // preserved by the reassociation.
521 Changed = true;
522 ++NumReassoc;
523 continue;
524 }
525 }
526
527 // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
528 // if C1 and C2 are constants.
529 Value *A, *B;
530 Constant *C1, *C2, *CRes;
531 if (Op0 && Op1 &&
532 Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
533 match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) &&
534 match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2)))) &&
535 (CRes = ConstantFoldBinaryOpOperands(Opcode, C1, C2, DL))) {
536 bool IsNUW = hasNoUnsignedWrap(I) &&
537 hasNoUnsignedWrap(*Op0) &&
538 hasNoUnsignedWrap(*Op1);
539 BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
540 BinaryOperator::CreateNUW(Opcode, A, B) :
541 BinaryOperator::Create(Opcode, A, B);
542
543 if (isa<FPMathOperator>(NewBO)) {
544 FastMathFlags Flags = I.getFastMathFlags() &
545 Op0->getFastMathFlags() &
546 Op1->getFastMathFlags();
547 NewBO->setFastMathFlags(Flags);
548 }
549 InsertNewInstWith(NewBO, I.getIterator());
550 NewBO->takeName(Op1);
551 replaceOperand(I, 0, NewBO);
552 replaceOperand(I, 1, CRes);
553 // Conservatively clear the optional flags, since they may not be
554 // preserved by the reassociation.
556 if (IsNUW)
557 I.setHasNoUnsignedWrap(true);
558
559 Changed = true;
560 continue;
561 }
562 }
563
564 // No further simplifications.
565 return Changed;
566 } while (true);
567}
568
569/// Return whether "X LOp (Y ROp Z)" is always equal to
570/// "(X LOp Y) ROp (X LOp Z)".
573 // X & (Y | Z) <--> (X & Y) | (X & Z)
574 // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
575 if (LOp == Instruction::And)
576 return ROp == Instruction::Or || ROp == Instruction::Xor;
577
578 // X | (Y & Z) <--> (X | Y) & (X | Z)
579 if (LOp == Instruction::Or)
580 return ROp == Instruction::And;
581
582 // X * (Y + Z) <--> (X * Y) + (X * Z)
583 // X * (Y - Z) <--> (X * Y) - (X * Z)
584 if (LOp == Instruction::Mul)
585 return ROp == Instruction::Add || ROp == Instruction::Sub;
586
587 return false;
588}
589
590/// Return whether "(X LOp Y) ROp Z" is always equal to
591/// "(X ROp Z) LOp (Y ROp Z)".
595 return leftDistributesOverRight(ROp, LOp);
596
597 // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
599
600 // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
601 // but this requires knowing that the addition does not overflow and other
602 // such subtleties.
603}
604
605/// This function returns identity value for given opcode, which can be used to
606/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
608 if (isa<Constant>(V))
609 return nullptr;
610
611 return ConstantExpr::getBinOpIdentity(Opcode, V->getType());
612}
613
614/// This function predicates factorization using distributive laws. By default,
615/// it just returns the 'Op' inputs. But for special-cases like
616/// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
617/// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
618/// allow more factorization opportunities.
621 Value *&LHS, Value *&RHS, BinaryOperator *OtherOp) {
622 assert(Op && "Expected a binary operator");
623 LHS = Op->getOperand(0);
624 RHS = Op->getOperand(1);
625 if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) {
626 Constant *C;
627 if (match(Op, m_Shl(m_Value(), m_Constant(C)))) {
628 // X << C --> X * (1 << C)
629 RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), C);
630 return Instruction::Mul;
631 }
632 // TODO: We can add other conversions e.g. shr => div etc.
633 }
634 if (Instruction::isBitwiseLogicOp(TopOpcode)) {
635 if (OtherOp && OtherOp->getOpcode() == Instruction::AShr &&
637 // lshr nneg C, X --> ashr nneg C, X
638 return Instruction::AShr;
639 }
640 }
641 return Op->getOpcode();
642}
643
644/// This tries to simplify binary operations by factorizing out common terms
645/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
648 Instruction::BinaryOps InnerOpcode, Value *A,
649 Value *B, Value *C, Value *D) {
650 assert(A && B && C && D && "All values must be provided");
651
652 Value *V = nullptr;
653 Value *RetVal = nullptr;
654 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
655 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
656
657 // Does "X op' Y" always equal "Y op' X"?
658 bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
659
660 // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
661 if (leftDistributesOverRight(InnerOpcode, TopLevelOpcode)) {
662 // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
663 // commutative case, "(A op' B) op (C op' A)"?
664 if (A == C || (InnerCommutative && A == D)) {
665 if (A != C)
666 std::swap(C, D);
667 // Consider forming "A op' (B op D)".
668 // If "B op D" simplifies then it can be formed with no cost.
669 V = simplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
670
671 // If "B op D" doesn't simplify then only go on if one of the existing
672 // operations "A op' B" and "C op' D" will be zapped as no longer used.
673 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
674 V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
675 if (V)
676 RetVal = Builder.CreateBinOp(InnerOpcode, A, V);
677 }
678 }
679
680 // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
681 if (!RetVal && rightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) {
682 // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
683 // commutative case, "(A op' B) op (B op' D)"?
684 if (B == D || (InnerCommutative && B == C)) {
685 if (B != D)
686 std::swap(C, D);
687 // Consider forming "(A op C) op' B".
688 // If "A op C" simplifies then it can be formed with no cost.
689 V = simplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
690
691 // If "A op C" doesn't simplify then only go on if one of the existing
692 // operations "A op' B" and "C op' D" will be zapped as no longer used.
693 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
694 V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
695 if (V)
696 RetVal = Builder.CreateBinOp(InnerOpcode, V, B);
697 }
698 }
699
700 if (!RetVal)
701 return nullptr;
702
703 ++NumFactor;
704 RetVal->takeName(&I);
705
706 // Try to add no-overflow flags to the final value.
707 if (isa<OverflowingBinaryOperator>(RetVal)) {
708 bool HasNSW = false;
709 bool HasNUW = false;
710 if (isa<OverflowingBinaryOperator>(&I)) {
711 HasNSW = I.hasNoSignedWrap();
712 HasNUW = I.hasNoUnsignedWrap();
713 }
714 if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) {
715 HasNSW &= LOBO->hasNoSignedWrap();
716 HasNUW &= LOBO->hasNoUnsignedWrap();
717 }
718
719 if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) {
720 HasNSW &= ROBO->hasNoSignedWrap();
721 HasNUW &= ROBO->hasNoUnsignedWrap();
722 }
723
724 if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) {
725 // We can propagate 'nsw' if we know that
726 // %Y = mul nsw i16 %X, C
727 // %Z = add nsw i16 %Y, %X
728 // =>
729 // %Z = mul nsw i16 %X, C+1
730 //
731 // iff C+1 isn't INT_MIN
732 const APInt *CInt;
733 if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue())
734 cast<Instruction>(RetVal)->setHasNoSignedWrap(HasNSW);
735
736 // nuw can be propagated with any constant or nuw value.
737 cast<Instruction>(RetVal)->setHasNoUnsignedWrap(HasNUW);
738 }
739 }
740 return RetVal;
741}
742
743// If `I` has one Const operand and the other matches `(ctpop (not x))`,
744// replace `(ctpop (not x))` with `(sub nuw nsw BitWidth(x), (ctpop x))`.
745// This is only useful is the new subtract can fold so we only handle the
746// following cases:
747// 1) (add/sub/disjoint_or C, (ctpop (not x))
748// -> (add/sub/disjoint_or C', (ctpop x))
749// 1) (cmp pred C, (ctpop (not x))
750// -> (cmp pred C', (ctpop x))
752 unsigned Opc = I->getOpcode();
753 unsigned ConstIdx = 1;
754 switch (Opc) {
755 default:
756 return nullptr;
757 // (ctpop (not x)) <-> (sub nuw nsw BitWidth(x) - (ctpop x))
758 // We can fold the BitWidth(x) with add/sub/icmp as long the other operand
759 // is constant.
760 case Instruction::Sub:
761 ConstIdx = 0;
762 break;
763 case Instruction::ICmp:
764 // Signed predicates aren't correct in some edge cases like for i2 types, as
765 // well since (ctpop x) is known [0, log2(BitWidth(x))] almost all signed
766 // comparisons against it are simplfied to unsigned.
767 if (cast<ICmpInst>(I)->isSigned())
768 return nullptr;
769 break;
770 case Instruction::Or:
771 if (!match(I, m_DisjointOr(m_Value(), m_Value())))
772 return nullptr;
773 [[fallthrough]];
774 case Instruction::Add:
775 break;
776 }
777
778 Value *Op;
779 // Find ctpop.
780 if (!match(I->getOperand(1 - ConstIdx),
781 m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(Op)))))
782 return nullptr;
783
784 Constant *C;
785 // Check other operand is ImmConstant.
786 if (!match(I->getOperand(ConstIdx), m_ImmConstant(C)))
787 return nullptr;
788
789 Type *Ty = Op->getType();
790 Constant *BitWidthC = ConstantInt::get(Ty, Ty->getScalarSizeInBits());
791 // Need extra check for icmp. Note if this check is true, it generally means
792 // the icmp will simplify to true/false.
793 if (Opc == Instruction::ICmp && !cast<ICmpInst>(I)->isEquality() &&
794 !ConstantExpr::getICmp(ICmpInst::ICMP_UGT, C, BitWidthC)->isZeroValue())
795 return nullptr;
796
797 // Check we can invert `(not x)` for free.
798 bool Consumes = false;
799 if (!isFreeToInvert(Op, Op->hasOneUse(), Consumes) || !Consumes)
800 return nullptr;
801 Value *NotOp = getFreelyInverted(Op, Op->hasOneUse(), &Builder);
802 assert(NotOp != nullptr &&
803 "Desync between isFreeToInvert and getFreelyInverted");
804
805 Value *CtpopOfNotOp = Builder.CreateIntrinsic(Ty, Intrinsic::ctpop, NotOp);
806
807 Value *R = nullptr;
808
809 // Do the transformation here to avoid potentially introducing an infinite
810 // loop.
811 switch (Opc) {
812 case Instruction::Sub:
813 R = Builder.CreateAdd(CtpopOfNotOp, ConstantExpr::getSub(C, BitWidthC));
814 break;
815 case Instruction::Or:
816 case Instruction::Add:
817 R = Builder.CreateSub(ConstantExpr::getAdd(C, BitWidthC), CtpopOfNotOp);
818 break;
819 case Instruction::ICmp:
820 R = Builder.CreateICmp(cast<ICmpInst>(I)->getSwappedPredicate(),
821 CtpopOfNotOp, ConstantExpr::getSub(BitWidthC, C));
822 break;
823 default:
824 llvm_unreachable("Unhandled Opcode");
825 }
826 assert(R != nullptr);
827 return replaceInstUsesWith(*I, R);
828}
829
830// (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C))
831// IFF
832// 1) the logic_shifts match
833// 2) either both binops are binops and one is `and` or
834// BinOp1 is `and`
835// (logic_shift (inv_logic_shift C1, C), C) == C1 or
836//
837// -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C)
838//
839// (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt))
840// IFF
841// 1) the logic_shifts match
842// 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`).
843//
844// -> (BinOp (logic_shift (BinOp X, Y)), Mask)
845//
846// (Binop1 (Binop2 (arithmetic_shift X, Amt), Mask), (arithmetic_shift Y, Amt))
847// IFF
848// 1) Binop1 is bitwise logical operator `and`, `or` or `xor`
849// 2) Binop2 is `not`
850//
851// -> (arithmetic_shift Binop1((not X), Y), Amt)
852
854 const DataLayout &DL = I.getModule()->getDataLayout();
855 auto IsValidBinOpc = [](unsigned Opc) {
856 switch (Opc) {
857 default:
858 return false;
859 case Instruction::And:
860 case Instruction::Or:
861 case Instruction::Xor:
862 case Instruction::Add:
863 // Skip Sub as we only match constant masks which will canonicalize to use
864 // add.
865 return true;
866 }
867 };
868
869 // Check if we can distribute binop arbitrarily. `add` + `lshr` has extra
870 // constraints.
871 auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2,
872 unsigned ShOpc) {
873 assert(ShOpc != Instruction::AShr);
874 return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) ||
875 ShOpc == Instruction::Shl;
876 };
877
878 auto GetInvShift = [](unsigned ShOpc) {
879 assert(ShOpc != Instruction::AShr);
880 return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr;
881 };
882
883 auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2,
884 unsigned ShOpc, Constant *CMask,
885 Constant *CShift) {
886 // If the BinOp1 is `and` we don't need to check the mask.
887 if (BinOpc1 == Instruction::And)
888 return true;
889
890 // For all other possible transfers we need complete distributable
891 // binop/shift (anything but `add` + `lshr`).
892 if (!IsCompletelyDistributable(BinOpc1, BinOpc2, ShOpc))
893 return false;
894
895 // If BinOp2 is `and`, any mask works (this only really helps for non-splat
896 // vecs, otherwise the mask will be simplified and the following check will
897 // handle it).
898 if (BinOpc2 == Instruction::And)
899 return true;
900
901 // Otherwise, need mask that meets the below requirement.
902 // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask
903 Constant *MaskInvShift =
904 ConstantFoldBinaryOpOperands(GetInvShift(ShOpc), CMask, CShift, DL);
905 return ConstantFoldBinaryOpOperands(ShOpc, MaskInvShift, CShift, DL) ==
906 CMask;
907 };
908
909 auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * {
910 Constant *CMask, *CShift;
911 Value *X, *Y, *ShiftedX, *Mask, *Shift;
912 if (!match(I.getOperand(ShOpnum),
913 m_OneUse(m_Shift(m_Value(Y), m_Value(Shift)))))
914 return nullptr;
915 if (!match(I.getOperand(1 - ShOpnum),
916 m_BinOp(m_Value(ShiftedX), m_Value(Mask))))
917 return nullptr;
918
919 if (!match(ShiftedX, m_OneUse(m_Shift(m_Value(X), m_Specific(Shift)))))
920 return nullptr;
921
922 // Make sure we are matching instruction shifts and not ConstantExpr
923 auto *IY = dyn_cast<Instruction>(I.getOperand(ShOpnum));
924 auto *IX = dyn_cast<Instruction>(ShiftedX);
925 if (!IY || !IX)
926 return nullptr;
927
928 // LHS and RHS need same shift opcode
929 unsigned ShOpc = IY->getOpcode();
930 if (ShOpc != IX->getOpcode())
931 return nullptr;
932
933 // Make sure binop is real instruction and not ConstantExpr
934 auto *BO2 = dyn_cast<Instruction>(I.getOperand(1 - ShOpnum));
935 if (!BO2)
936 return nullptr;
937
938 unsigned BinOpc = BO2->getOpcode();
939 // Make sure we have valid binops.
940 if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc))
941 return nullptr;
942
943 if (ShOpc == Instruction::AShr) {
944 if (Instruction::isBitwiseLogicOp(I.getOpcode()) &&
945 BinOpc == Instruction::Xor && match(Mask, m_AllOnes())) {
946 Value *NotX = Builder.CreateNot(X);
947 Value *NewBinOp = Builder.CreateBinOp(I.getOpcode(), Y, NotX);
949 static_cast<Instruction::BinaryOps>(ShOpc), NewBinOp, Shift);
950 }
951
952 return nullptr;
953 }
954
955 // If BinOp1 == BinOp2 and it's bitwise or shl with add, then just
956 // distribute to drop the shift irrelevant of constants.
957 if (BinOpc == I.getOpcode() &&
958 IsCompletelyDistributable(I.getOpcode(), BinOpc, ShOpc)) {
959 Value *NewBinOp2 = Builder.CreateBinOp(I.getOpcode(), X, Y);
960 Value *NewBinOp1 = Builder.CreateBinOp(
961 static_cast<Instruction::BinaryOps>(ShOpc), NewBinOp2, Shift);
962 return BinaryOperator::Create(I.getOpcode(), NewBinOp1, Mask);
963 }
964
965 // Otherwise we can only distribute by constant shifting the mask, so
966 // ensure we have constants.
967 if (!match(Shift, m_ImmConstant(CShift)))
968 return nullptr;
969 if (!match(Mask, m_ImmConstant(CMask)))
970 return nullptr;
971
972 // Check if we can distribute the binops.
973 if (!CanDistributeBinops(I.getOpcode(), BinOpc, ShOpc, CMask, CShift))
974 return nullptr;
975
976 Constant *NewCMask =
977 ConstantFoldBinaryOpOperands(GetInvShift(ShOpc), CMask, CShift, DL);
978 Value *NewBinOp2 = Builder.CreateBinOp(
979 static_cast<Instruction::BinaryOps>(BinOpc), X, NewCMask);
980 Value *NewBinOp1 = Builder.CreateBinOp(I.getOpcode(), Y, NewBinOp2);
981 return BinaryOperator::Create(static_cast<Instruction::BinaryOps>(ShOpc),
982 NewBinOp1, CShift);
983 };
984
985 if (Instruction *R = MatchBinOp(0))
986 return R;
987 return MatchBinOp(1);
988}
989
990// (Binop (zext C), (select C, T, F))
991// -> (select C, (binop 1, T), (binop 0, F))
992//
993// (Binop (sext C), (select C, T, F))
994// -> (select C, (binop -1, T), (binop 0, F))
995//
996// Attempt to simplify binary operations into a select with folded args, when
997// one operand of the binop is a select instruction and the other operand is a
998// zext/sext extension, whose value is the select condition.
1001 // TODO: this simplification may be extended to any speculatable instruction,
1002 // not just binops, and would possibly be handled better in FoldOpIntoSelect.
1003 Instruction::BinaryOps Opc = I.getOpcode();
1004 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1005 Value *A, *CondVal, *TrueVal, *FalseVal;
1006 Value *CastOp;
1007
1008 auto MatchSelectAndCast = [&](Value *CastOp, Value *SelectOp) {
1009 return match(CastOp, m_ZExtOrSExt(m_Value(A))) &&
1010 A->getType()->getScalarSizeInBits() == 1 &&
1011 match(SelectOp, m_Select(m_Value(CondVal), m_Value(TrueVal),
1012 m_Value(FalseVal)));
1013 };
1014
1015 // Make sure one side of the binop is a select instruction, and the other is a
1016 // zero/sign extension operating on a i1.
1017 if (MatchSelectAndCast(LHS, RHS))
1018 CastOp = LHS;
1019 else if (MatchSelectAndCast(RHS, LHS))
1020 CastOp = RHS;
1021 else
1022 return nullptr;
1023
1024 auto NewFoldedConst = [&](bool IsTrueArm, Value *V) {
1025 bool IsCastOpRHS = (CastOp == RHS);
1026 bool IsZExt = isa<ZExtInst>(CastOp);
1027 Constant *C;
1028
1029 if (IsTrueArm) {
1030 C = Constant::getNullValue(V->getType());
1031 } else if (IsZExt) {
1032 unsigned BitWidth = V->getType()->getScalarSizeInBits();
1033 C = Constant::getIntegerValue(V->getType(), APInt(BitWidth, 1));
1034 } else {
1035 C = Constant::getAllOnesValue(V->getType());
1036 }
1037
1038 return IsCastOpRHS ? Builder.CreateBinOp(Opc, V, C)
1039 : Builder.CreateBinOp(Opc, C, V);
1040 };
1041
1042 // If the value used in the zext/sext is the select condition, or the negated
1043 // of the select condition, the binop can be simplified.
1044 if (CondVal == A) {
1045 Value *NewTrueVal = NewFoldedConst(false, TrueVal);
1046 return SelectInst::Create(CondVal, NewTrueVal,
1047 NewFoldedConst(true, FalseVal));
1048 }
1049
1050 if (match(A, m_Not(m_Specific(CondVal)))) {
1051 Value *NewTrueVal = NewFoldedConst(true, TrueVal);
1052 return SelectInst::Create(CondVal, NewTrueVal,
1053 NewFoldedConst(false, FalseVal));
1054 }
1055
1056 return nullptr;
1057}
1058
1060 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1061 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
1062 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
1063 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1064 Value *A, *B, *C, *D;
1065 Instruction::BinaryOps LHSOpcode, RHSOpcode;
1066
1067 if (Op0)
1068 LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B, Op1);
1069 if (Op1)
1070 RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D, Op0);
1071
1072 // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
1073 // a common term.
1074 if (Op0 && Op1 && LHSOpcode == RHSOpcode)
1075 if (Value *V = tryFactorization(I, SQ, Builder, LHSOpcode, A, B, C, D))
1076 return V;
1077
1078 // The instruction has the form "(A op' B) op (C)". Try to factorize common
1079 // term.
1080 if (Op0)
1081 if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
1082 if (Value *V =
1083 tryFactorization(I, SQ, Builder, LHSOpcode, A, B, RHS, Ident))
1084 return V;
1085
1086 // The instruction has the form "(B) op (C op' D)". Try to factorize common
1087 // term.
1088 if (Op1)
1089 if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
1090 if (Value *V =
1091 tryFactorization(I, SQ, Builder, RHSOpcode, LHS, Ident, C, D))
1092 return V;
1093
1094 return nullptr;
1095}
1096
1097/// This tries to simplify binary operations which some other binary operation
1098/// distributes over either by factorizing out common terms
1099/// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
1100/// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
1101/// Returns the simplified value, or null if it didn't simplify.
1103 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1104 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
1105 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
1106 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1107
1108 // Factorization.
1109 if (Value *R = tryFactorizationFolds(I))
1110 return R;
1111
1112 // Expansion.
1113 if (Op0 && rightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
1114 // The instruction has the form "(A op' B) op C". See if expanding it out
1115 // to "(A op C) op' (B op C)" results in simplifications.
1116 Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
1117 Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
1118
1119 // Disable the use of undef because it's not safe to distribute undef.
1120 auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
1121 Value *L = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
1122 Value *R = simplifyBinOp(TopLevelOpcode, B, C, SQDistributive);
1123
1124 // Do "A op C" and "B op C" both simplify?
1125 if (L && R) {
1126 // They do! Return "L op' R".
1127 ++NumExpand;
1128 C = Builder.CreateBinOp(InnerOpcode, L, R);
1129 C->takeName(&I);
1130 return C;
1131 }
1132
1133 // Does "A op C" simplify to the identity value for the inner opcode?
1134 if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
1135 // They do! Return "B op C".
1136 ++NumExpand;
1137 C = Builder.CreateBinOp(TopLevelOpcode, B, C);
1138 C->takeName(&I);
1139 return C;
1140 }
1141
1142 // Does "B op C" simplify to the identity value for the inner opcode?
1143 if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
1144 // They do! Return "A op C".
1145 ++NumExpand;
1146 C = Builder.CreateBinOp(TopLevelOpcode, A, C);
1147 C->takeName(&I);
1148 return C;
1149 }
1150 }
1151
1152 if (Op1 && leftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
1153 // The instruction has the form "A op (B op' C)". See if expanding it out
1154 // to "(A op B) op' (A op C)" results in simplifications.
1155 Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
1156 Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
1157
1158 // Disable the use of undef because it's not safe to distribute undef.
1159 auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
1160 Value *L = simplifyBinOp(TopLevelOpcode, A, B, SQDistributive);
1161 Value *R = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
1162
1163 // Do "A op B" and "A op C" both simplify?
1164 if (L && R) {
1165 // They do! Return "L op' R".
1166 ++NumExpand;
1167 A = Builder.CreateBinOp(InnerOpcode, L, R);
1168 A->takeName(&I);
1169 return A;
1170 }
1171
1172 // Does "A op B" simplify to the identity value for the inner opcode?
1173 if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
1174 // They do! Return "A op C".
1175 ++NumExpand;
1176 A = Builder.CreateBinOp(TopLevelOpcode, A, C);
1177 A->takeName(&I);
1178 return A;
1179 }
1180
1181 // Does "A op C" simplify to the identity value for the inner opcode?
1182 if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
1183 // They do! Return "A op B".
1184 ++NumExpand;
1185 A = Builder.CreateBinOp(TopLevelOpcode, A, B);
1186 A->takeName(&I);
1187 return A;
1188 }
1189 }
1190
1192}
1193
1194static std::optional<std::pair<Value *, Value *>>
1196 if (LHS->getParent() != RHS->getParent())
1197 return std::nullopt;
1198
1199 if (LHS->getNumIncomingValues() < 2)
1200 return std::nullopt;
1201
1202 if (!equal(LHS->blocks(), RHS->blocks()))
1203 return std::nullopt;
1204
1205 Value *L0 = LHS->getIncomingValue(0);
1206 Value *R0 = RHS->getIncomingValue(0);
1207
1208 for (unsigned I = 1, E = LHS->getNumIncomingValues(); I != E; ++I) {
1209 Value *L1 = LHS->getIncomingValue(I);
1210 Value *R1 = RHS->getIncomingValue(I);
1211
1212 if ((L0 == L1 && R0 == R1) || (L0 == R1 && R0 == L1))
1213 continue;
1214
1215 return std::nullopt;
1216 }
1217
1218 return std::optional(std::pair(L0, R0));
1219}
1220
1221std::optional<std::pair<Value *, Value *>>
1222InstCombinerImpl::matchSymmetricPair(Value *LHS, Value *RHS) {
1223 Instruction *LHSInst = dyn_cast<Instruction>(LHS);
1224 Instruction *RHSInst = dyn_cast<Instruction>(RHS);
1225 if (!LHSInst || !RHSInst || LHSInst->getOpcode() != RHSInst->getOpcode())
1226 return std::nullopt;
1227 switch (LHSInst->getOpcode()) {
1228 case Instruction::PHI:
1229 return matchSymmetricPhiNodesPair(cast<PHINode>(LHS), cast<PHINode>(RHS));
1230 case Instruction::Select: {
1231 Value *Cond = LHSInst->getOperand(0);
1232 Value *TrueVal = LHSInst->getOperand(1);
1233 Value *FalseVal = LHSInst->getOperand(2);
1234 if (Cond == RHSInst->getOperand(0) && TrueVal == RHSInst->getOperand(2) &&
1235 FalseVal == RHSInst->getOperand(1))
1236 return std::pair(TrueVal, FalseVal);
1237 return std::nullopt;
1238 }
1239 case Instruction::Call: {
1240 // Match min(a, b) and max(a, b)
1241 MinMaxIntrinsic *LHSMinMax = dyn_cast<MinMaxIntrinsic>(LHSInst);
1242 MinMaxIntrinsic *RHSMinMax = dyn_cast<MinMaxIntrinsic>(RHSInst);
1243 if (LHSMinMax && RHSMinMax &&
1244 LHSMinMax->getPredicate() ==
1246 ((LHSMinMax->getLHS() == RHSMinMax->getLHS() &&
1247 LHSMinMax->getRHS() == RHSMinMax->getRHS()) ||
1248 (LHSMinMax->getLHS() == RHSMinMax->getRHS() &&
1249 LHSMinMax->getRHS() == RHSMinMax->getLHS())))
1250 return std::pair(LHSMinMax->getLHS(), LHSMinMax->getRHS());
1251 return std::nullopt;
1252 }
1253 default:
1254 return std::nullopt;
1255 }
1256}
1257
1259 Value *LHS,
1260 Value *RHS) {
1261 Value *A, *B, *C, *D, *E, *F;
1262 bool LHSIsSelect = match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C)));
1263 bool RHSIsSelect = match(RHS, m_Select(m_Value(D), m_Value(E), m_Value(F)));
1264 if (!LHSIsSelect && !RHSIsSelect)
1265 return nullptr;
1266
1267 FastMathFlags FMF;
1269 if (isa<FPMathOperator>(&I)) {
1270 FMF = I.getFastMathFlags();
1272 }
1273
1274 Instruction::BinaryOps Opcode = I.getOpcode();
1276
1277 Value *Cond, *True = nullptr, *False = nullptr;
1278
1279 // Special-case for add/negate combination. Replace the zero in the negation
1280 // with the trailing add operand:
1281 // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N)
1282 // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False
1283 auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * {
1284 // We need an 'add' and exactly 1 arm of the select to have been simplified.
1285 if (Opcode != Instruction::Add || (!True && !False) || (True && False))
1286 return nullptr;
1287
1288 Value *N;
1289 if (True && match(FVal, m_Neg(m_Value(N)))) {
1290 Value *Sub = Builder.CreateSub(Z, N);
1291 return Builder.CreateSelect(Cond, True, Sub, I.getName());
1292 }
1293 if (False && match(TVal, m_Neg(m_Value(N)))) {
1294 Value *Sub = Builder.CreateSub(Z, N);
1295 return Builder.CreateSelect(Cond, Sub, False, I.getName());
1296 }
1297 return nullptr;
1298 };
1299
1300 if (LHSIsSelect && RHSIsSelect && A == D) {
1301 // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
1302 Cond = A;
1303 True = simplifyBinOp(Opcode, B, E, FMF, Q);
1304 False = simplifyBinOp(Opcode, C, F, FMF, Q);
1305
1306 if (LHS->hasOneUse() && RHS->hasOneUse()) {
1307 if (False && !True)
1308 True = Builder.CreateBinOp(Opcode, B, E);
1309 else if (True && !False)
1310 False = Builder.CreateBinOp(Opcode, C, F);
1311 }
1312 } else if (LHSIsSelect && LHS->hasOneUse()) {
1313 // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
1314 Cond = A;
1315 True = simplifyBinOp(Opcode, B, RHS, FMF, Q);
1316 False = simplifyBinOp(Opcode, C, RHS, FMF, Q);
1317 if (Value *NewSel = foldAddNegate(B, C, RHS))
1318 return NewSel;
1319 } else if (RHSIsSelect && RHS->hasOneUse()) {
1320 // X op (D ? E : F) -> D ? (X op E) : (X op F)
1321 Cond = D;
1322 True = simplifyBinOp(Opcode, LHS, E, FMF, Q);
1323 False = simplifyBinOp(Opcode, LHS, F, FMF, Q);
1324 if (Value *NewSel = foldAddNegate(E, F, LHS))
1325 return NewSel;
1326 }
1327
1328 if (!True || !False)
1329 return nullptr;
1330
1331 Value *SI = Builder.CreateSelect(Cond, True, False);
1332 SI->takeName(&I);
1333 return SI;
1334}
1335
1336/// Freely adapt every user of V as-if V was changed to !V.
1337/// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
1339 assert(!isa<Constant>(I) && "Shouldn't invert users of constant");
1340 for (User *U : make_early_inc_range(I->users())) {
1341 if (U == IgnoredUser)
1342 continue; // Don't consider this user.
1343 switch (cast<Instruction>(U)->getOpcode()) {
1344 case Instruction::Select: {
1345 auto *SI = cast<SelectInst>(U);
1346 SI->swapValues();
1347 SI->swapProfMetadata();
1348 break;
1349 }
1350 case Instruction::Br:
1351 cast<BranchInst>(U)->swapSuccessors(); // swaps prof metadata too
1352 break;
1353 case Instruction::Xor:
1354 replaceInstUsesWith(cast<Instruction>(*U), I);
1355 // Add to worklist for DCE.
1356 addToWorklist(cast<Instruction>(U));
1357 break;
1358 default:
1359 llvm_unreachable("Got unexpected user - out of sync with "
1360 "canFreelyInvertAllUsersOf() ?");
1361 }
1362 }
1363}
1364
1365/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
1366/// constant zero (which is the 'negate' form).
1367Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
1368 Value *NegV;
1369 if (match(V, m_Neg(m_Value(NegV))))
1370 return NegV;
1371
1372 // Constants can be considered to be negated values if they can be folded.
1373 if (ConstantInt *C = dyn_cast<ConstantInt>(V))
1374 return ConstantExpr::getNeg(C);
1375
1376 if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V))
1377 if (C->getType()->getElementType()->isIntegerTy())
1378 return ConstantExpr::getNeg(C);
1379
1380 if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
1381 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
1382 Constant *Elt = CV->getAggregateElement(i);
1383 if (!Elt)
1384 return nullptr;
1385
1386 if (isa<UndefValue>(Elt))
1387 continue;
1388
1389 if (!isa<ConstantInt>(Elt))
1390 return nullptr;
1391 }
1392 return ConstantExpr::getNeg(CV);
1393 }
1394
1395 // Negate integer vector splats.
1396 if (auto *CV = dyn_cast<Constant>(V))
1397 if (CV->getType()->isVectorTy() &&
1398 CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
1399 return ConstantExpr::getNeg(CV);
1400
1401 return nullptr;
1402}
1403
1404// Try to fold:
1405// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1406// -> ({s|u}itofp (int_binop x, y))
1407// 2) (fp_binop ({s|u}itofp x), FpC)
1408// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1409//
1410// Assuming the sign of the cast for x/y is `OpsFromSigned`.
1411Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign(
1412 BinaryOperator &BO, bool OpsFromSigned, std::array<Value *, 2> IntOps,
1414
1415 Type *FPTy = BO.getType();
1416 Type *IntTy = IntOps[0]->getType();
1417
1418 unsigned IntSz = IntTy->getScalarSizeInBits();
1419 // This is the maximum number of inuse bits by the integer where the int -> fp
1420 // casts are exact.
1421 unsigned MaxRepresentableBits =
1423
1424 // Preserve known number of leading bits. This can allow us to trivial nsw/nuw
1425 // checks later on.
1426 unsigned NumUsedLeadingBits[2] = {IntSz, IntSz};
1427
1428 // NB: This only comes up if OpsFromSigned is true, so there is no need to
1429 // cache if between calls to `foldFBinOpOfIntCastsFromSign`.
1430 auto IsNonZero = [&](unsigned OpNo) -> bool {
1431 if (OpsKnown[OpNo].hasKnownBits() &&
1432 OpsKnown[OpNo].getKnownBits(SQ).isNonZero())
1433 return true;
1434 return isKnownNonZero(IntOps[OpNo], SQ.DL);
1435 };
1436
1437 auto IsNonNeg = [&](unsigned OpNo) -> bool {
1438 // NB: This matches the impl in ValueTracking, we just try to use cached
1439 // knownbits here. If we ever start supporting WithCache for
1440 // `isKnownNonNegative`, change this to an explicit call.
1441 return OpsKnown[OpNo].getKnownBits(SQ).isNonNegative();
1442 };
1443
1444 // Check if we know for certain that ({s|u}itofp op) is exact.
1445 auto IsValidPromotion = [&](unsigned OpNo) -> bool {
1446 // Can we treat this operand as the desired sign?
1447 if (OpsFromSigned != isa<SIToFPInst>(BO.getOperand(OpNo)) &&
1448 !IsNonNeg(OpNo))
1449 return false;
1450
1451 // If fp precision >= bitwidth(op) then its exact.
1452 // NB: This is slightly conservative for `sitofp`. For signed conversion, we
1453 // can handle `MaxRepresentableBits == IntSz - 1` as the sign bit will be
1454 // handled specially. We can't, however, increase the bound arbitrarily for
1455 // `sitofp` as for larger sizes, it won't sign extend.
1456 if (MaxRepresentableBits < IntSz) {
1457 // Otherwise if its signed cast check that fp precisions >= bitwidth(op) -
1458 // numSignBits(op).
1459 // TODO: If we add support for `WithCache` in `ComputeNumSignBits`, change
1460 // `IntOps[OpNo]` arguments to `KnownOps[OpNo]`.
1461 if (OpsFromSigned)
1462 NumUsedLeadingBits[OpNo] = IntSz - ComputeNumSignBits(IntOps[OpNo]);
1463 // Finally for unsigned check that fp precision >= bitwidth(op) -
1464 // numLeadingZeros(op).
1465 else {
1466 NumUsedLeadingBits[OpNo] =
1467 IntSz - OpsKnown[OpNo].getKnownBits(SQ).countMinLeadingZeros();
1468 }
1469 }
1470 // NB: We could also check if op is known to be a power of 2 or zero (which
1471 // will always be representable). Its unlikely, however, that is we are
1472 // unable to bound op in any way we will be able to pass the overflow checks
1473 // later on.
1474
1475 if (MaxRepresentableBits < NumUsedLeadingBits[OpNo])
1476 return false;
1477 // Signed + Mul also requires that op is non-zero to avoid -0 cases.
1478 return !OpsFromSigned || BO.getOpcode() != Instruction::FMul ||
1479 IsNonZero(OpNo);
1480 };
1481
1482 // If we have a constant rhs, see if we can losslessly convert it to an int.
1483 if (Op1FpC != nullptr) {
1484 // Signed + Mul req non-zero
1485 if (OpsFromSigned && BO.getOpcode() == Instruction::FMul &&
1486 !match(Op1FpC, m_NonZeroFP()))
1487 return nullptr;
1488
1490 OpsFromSigned ? Instruction::FPToSI : Instruction::FPToUI, Op1FpC,
1491 IntTy, DL);
1492 if (Op1IntC == nullptr)
1493 return nullptr;
1494 if (ConstantFoldCastOperand(OpsFromSigned ? Instruction::SIToFP
1495 : Instruction::UIToFP,
1496 Op1IntC, FPTy, DL) != Op1FpC)
1497 return nullptr;
1498
1499 // First try to keep sign of cast the same.
1500 IntOps[1] = Op1IntC;
1501 }
1502
1503 // Ensure lhs/rhs integer types match.
1504 if (IntTy != IntOps[1]->getType())
1505 return nullptr;
1506
1507 if (Op1FpC == nullptr) {
1508 if (!IsValidPromotion(1))
1509 return nullptr;
1510 }
1511 if (!IsValidPromotion(0))
1512 return nullptr;
1513
1514 // Final we check if the integer version of the binop will not overflow.
1516 // Because of the precision check, we can often rule out overflows.
1517 bool NeedsOverflowCheck = true;
1518 // Try to conservatively rule out overflow based on the already done precision
1519 // checks.
1520 unsigned OverflowMaxOutputBits = OpsFromSigned ? 2 : 1;
1521 unsigned OverflowMaxCurBits =
1522 std::max(NumUsedLeadingBits[0], NumUsedLeadingBits[1]);
1523 bool OutputSigned = OpsFromSigned;
1524 switch (BO.getOpcode()) {
1525 case Instruction::FAdd:
1526 IntOpc = Instruction::Add;
1527 OverflowMaxOutputBits += OverflowMaxCurBits;
1528 break;
1529 case Instruction::FSub:
1530 IntOpc = Instruction::Sub;
1531 OverflowMaxOutputBits += OverflowMaxCurBits;
1532 break;
1533 case Instruction::FMul:
1534 IntOpc = Instruction::Mul;
1535 OverflowMaxOutputBits += OverflowMaxCurBits * 2;
1536 break;
1537 default:
1538 llvm_unreachable("Unsupported binop");
1539 }
1540 // The precision check may have already ruled out overflow.
1541 if (OverflowMaxOutputBits < IntSz) {
1542 NeedsOverflowCheck = false;
1543 // We can bound unsigned overflow from sub to in range signed value (this is
1544 // what allows us to avoid the overflow check for sub).
1545 if (IntOpc == Instruction::Sub)
1546 OutputSigned = true;
1547 }
1548
1549 // Precision check did not rule out overflow, so need to check.
1550 // TODO: If we add support for `WithCache` in `willNotOverflow`, change
1551 // `IntOps[...]` arguments to `KnownOps[...]`.
1552 if (NeedsOverflowCheck &&
1553 !willNotOverflow(IntOpc, IntOps[0], IntOps[1], BO, OutputSigned))
1554 return nullptr;
1555
1556 Value *IntBinOp = Builder.CreateBinOp(IntOpc, IntOps[0], IntOps[1]);
1557 if (auto *IntBO = dyn_cast<BinaryOperator>(IntBinOp)) {
1558 IntBO->setHasNoSignedWrap(OutputSigned);
1559 IntBO->setHasNoUnsignedWrap(!OutputSigned);
1560 }
1561 if (OutputSigned)
1562 return new SIToFPInst(IntBinOp, FPTy);
1563 return new UIToFPInst(IntBinOp, FPTy);
1564}
1565
1566// Try to fold:
1567// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1568// -> ({s|u}itofp (int_binop x, y))
1569// 2) (fp_binop ({s|u}itofp x), FpC)
1570// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1571Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
1572 std::array<Value *, 2> IntOps = {nullptr, nullptr};
1573 Constant *Op1FpC = nullptr;
1574 // Check for:
1575 // 1) (binop ({s|u}itofp x), ({s|u}itofp y))
1576 // 2) (binop ({s|u}itofp x), FpC)
1577 if (!match(BO.getOperand(0), m_SIToFP(m_Value(IntOps[0]))) &&
1578 !match(BO.getOperand(0), m_UIToFP(m_Value(IntOps[0]))))
1579 return nullptr;
1580
1581 if (!match(BO.getOperand(1), m_Constant(Op1FpC)) &&
1582 !match(BO.getOperand(1), m_SIToFP(m_Value(IntOps[1]))) &&
1583 !match(BO.getOperand(1), m_UIToFP(m_Value(IntOps[1]))))
1584 return nullptr;
1585
1586 // Cache KnownBits a bit to potentially save some analysis.
1587 SmallVector<WithCache<const Value *>, 2> OpsKnown = {IntOps[0], IntOps[1]};
1588
1589 // Try treating x/y as coming from both `uitofp` and `sitofp`. There are
1590 // different constraints depending on the sign of the cast.
1591 // NB: `(uitofp nneg X)` == `(sitofp nneg X)`.
1592 if (Instruction *R = foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/false,
1593 IntOps, Op1FpC, OpsKnown))
1594 return R;
1595 return foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/true, IntOps,
1596 Op1FpC, OpsKnown);
1597}
1598
1599/// A binop with a constant operand and a sign-extended boolean operand may be
1600/// converted into a select of constants by applying the binary operation to
1601/// the constant with the two possible values of the extended boolean (0 or -1).
1602Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
1603 // TODO: Handle non-commutative binop (constant is operand 0).
1604 // TODO: Handle zext.
1605 // TODO: Peek through 'not' of cast.
1606 Value *BO0 = BO.getOperand(0);
1607 Value *BO1 = BO.getOperand(1);
1608 Value *X;
1609 Constant *C;
1610 if (!match(BO0, m_SExt(m_Value(X))) || !match(BO1, m_ImmConstant(C)) ||
1611 !X->getType()->isIntOrIntVectorTy(1))
1612 return nullptr;
1613
1614 // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
1617 Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C);
1618 Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C);
1619 return SelectInst::Create(X, TVal, FVal);
1620}
1621
1623 SelectInst *SI,
1624 bool IsTrueArm) {
1625 SmallVector<Constant *> ConstOps;
1626 for (Value *Op : I.operands()) {
1627 CmpInst::Predicate Pred;
1628 Constant *C = nullptr;
1629 if (Op == SI) {
1630 C = dyn_cast<Constant>(IsTrueArm ? SI->getTrueValue()
1631 : SI->getFalseValue());
1632 } else if (match(SI->getCondition(),
1633 m_ICmp(Pred, m_Specific(Op), m_Constant(C))) &&
1634 Pred == (IsTrueArm ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) &&
1636 // Pass
1637 } else {
1638 C = dyn_cast<Constant>(Op);
1639 }
1640 if (C == nullptr)
1641 return nullptr;
1642
1643 ConstOps.push_back(C);
1644 }
1645
1646 return ConstantFoldInstOperands(&I, ConstOps, I.getModule()->getDataLayout());
1647}
1648
1650 Value *NewOp, InstCombiner &IC) {
1651 Instruction *Clone = I.clone();
1652 Clone->replaceUsesOfWith(SI, NewOp);
1654 IC.InsertNewInstBefore(Clone, SI->getIterator());
1655 return Clone;
1656}
1657
1659 bool FoldWithMultiUse) {
1660 // Don't modify shared select instructions unless set FoldWithMultiUse
1661 if (!SI->hasOneUse() && !FoldWithMultiUse)
1662 return nullptr;
1663
1664 Value *TV = SI->getTrueValue();
1665 Value *FV = SI->getFalseValue();
1666 if (!(isa<Constant>(TV) || isa<Constant>(FV)))
1667 return nullptr;
1668
1669 // Bool selects with constant operands can be folded to logical ops.
1670 if (SI->getType()->isIntOrIntVectorTy(1))
1671 return nullptr;
1672
1673 // Test if a FCmpInst instruction is used exclusively by a select as
1674 // part of a minimum or maximum operation. If so, refrain from doing
1675 // any other folding. This helps out other analyses which understand
1676 // non-obfuscated minimum and maximum idioms. And in this case, at
1677 // least one of the comparison operands has at least one user besides
1678 // the compare (the select), which would often largely negate the
1679 // benefit of folding anyway.
1680 if (auto *CI = dyn_cast<FCmpInst>(SI->getCondition())) {
1681 if (CI->hasOneUse()) {
1682 Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
1683 if ((TV == Op0 && FV == Op1) || (FV == Op0 && TV == Op1))
1684 return nullptr;
1685 }
1686 }
1687
1688 // Make sure that one of the select arms constant folds successfully.
1689 Value *NewTV = constantFoldOperationIntoSelectOperand(Op, SI, /*IsTrueArm*/ true);
1690 Value *NewFV = constantFoldOperationIntoSelectOperand(Op, SI, /*IsTrueArm*/ false);
1691 if (!NewTV && !NewFV)
1692 return nullptr;
1693
1694 // Create an instruction for the arm that did not fold.
1695 if (!NewTV)
1696 NewTV = foldOperationIntoSelectOperand(Op, SI, TV, *this);
1697 if (!NewFV)
1698 NewFV = foldOperationIntoSelectOperand(Op, SI, FV, *this);
1699 return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);
1700}
1701
1703 Value *InValue, BasicBlock *InBB,
1704 const DataLayout &DL,
1705 const SimplifyQuery SQ) {
1706 // NB: It is a precondition of this transform that the operands be
1707 // phi translatable! This is usually trivially satisfied by limiting it
1708 // to constant ops, and for selects we do a more sophisticated check.
1710 for (Value *Op : I.operands()) {
1711 if (Op == PN)
1712 Ops.push_back(InValue);
1713 else
1714 Ops.push_back(Op->DoPHITranslation(PN->getParent(), InBB));
1715 }
1716
1717 // Don't consider the simplification successful if we get back a constant
1718 // expression. That's just an instruction in hiding.
1719 // Also reject the case where we simplify back to the phi node. We wouldn't
1720 // be able to remove it in that case.
1722 &I, Ops, SQ.getWithInstruction(InBB->getTerminator()));
1723 if (NewVal && NewVal != PN && !match(NewVal, m_ConstantExpr()))
1724 return NewVal;
1725
1726 // Check if incoming PHI value can be replaced with constant
1727 // based on implied condition.
1728 BranchInst *TerminatorBI = dyn_cast<BranchInst>(InBB->getTerminator());
1729 const ICmpInst *ICmp = dyn_cast<ICmpInst>(&I);
1730 if (TerminatorBI && TerminatorBI->isConditional() &&
1731 TerminatorBI->getSuccessor(0) != TerminatorBI->getSuccessor(1) && ICmp) {
1732 bool LHSIsTrue = TerminatorBI->getSuccessor(0) == PN->getParent();
1733 std::optional<bool> ImpliedCond =
1734 isImpliedCondition(TerminatorBI->getCondition(), ICmp->getPredicate(),
1735 Ops[0], Ops[1], DL, LHSIsTrue);
1736 if (ImpliedCond)
1737 return ConstantInt::getBool(I.getType(), ImpliedCond.value());
1738 }
1739
1740 return nullptr;
1741}
1742
1744 unsigned NumPHIValues = PN->getNumIncomingValues();
1745 if (NumPHIValues == 0)
1746 return nullptr;
1747
1748 // We normally only transform phis with a single use. However, if a PHI has
1749 // multiple uses and they are all the same operation, we can fold *all* of the
1750 // uses into the PHI.
1751 if (!PN->hasOneUse()) {
1752 // Walk the use list for the instruction, comparing them to I.
1753 for (User *U : PN->users()) {
1754 Instruction *UI = cast<Instruction>(U);
1755 if (UI != &I && !I.isIdenticalTo(UI))
1756 return nullptr;
1757 }
1758 // Otherwise, we can replace *all* users with the new PHI we form.
1759 }
1760
1761 // Check to see whether the instruction can be folded into each phi operand.
1762 // If there is one operand that does not fold, remember the BB it is in.
1763 // If there is more than one or if *it* is a PHI, bail out.
1764 SmallVector<Value *> NewPhiValues;
1765 BasicBlock *NonSimplifiedBB = nullptr;
1766 Value *NonSimplifiedInVal = nullptr;
1767 for (unsigned i = 0; i != NumPHIValues; ++i) {
1768 Value *InVal = PN->getIncomingValue(i);
1769 BasicBlock *InBB = PN->getIncomingBlock(i);
1770
1771 if (auto *NewVal = simplifyInstructionWithPHI(I, PN, InVal, InBB, DL, SQ)) {
1772 NewPhiValues.push_back(NewVal);
1773 continue;
1774 }
1775
1776 if (NonSimplifiedBB) return nullptr; // More than one non-simplified value.
1777
1778 NonSimplifiedBB = InBB;
1779 NonSimplifiedInVal = InVal;
1780 NewPhiValues.push_back(nullptr);
1781
1782 // If the InVal is an invoke at the end of the pred block, then we can't
1783 // insert a computation after it without breaking the edge.
1784 if (isa<InvokeInst>(InVal))
1785 if (cast<Instruction>(InVal)->getParent() == NonSimplifiedBB)
1786 return nullptr;
1787
1788 // If the incoming non-constant value is reachable from the phis block,
1789 // we'll push the operation across a loop backedge. This could result in
1790 // an infinite combine loop, and is generally non-profitable (especially
1791 // if the operation was originally outside the loop).
1792 if (isPotentiallyReachable(PN->getParent(), NonSimplifiedBB, nullptr, &DT,
1793 LI))
1794 return nullptr;
1795 }
1796
1797 // If there is exactly one non-simplified value, we can insert a copy of the
1798 // operation in that block. However, if this is a critical edge, we would be
1799 // inserting the computation on some other paths (e.g. inside a loop). Only
1800 // do this if the pred block is unconditionally branching into the phi block.
1801 // Also, make sure that the pred block is not dead code.
1802 if (NonSimplifiedBB != nullptr) {
1803 BranchInst *BI = dyn_cast<BranchInst>(NonSimplifiedBB->getTerminator());
1804 if (!BI || !BI->isUnconditional() ||
1805 !DT.isReachableFromEntry(NonSimplifiedBB))
1806 return nullptr;
1807 }
1808
1809 // Okay, we can do the transformation: create the new PHI node.
1810 PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());
1811 InsertNewInstBefore(NewPN, PN->getIterator());
1812 NewPN->takeName(PN);
1813 NewPN->setDebugLoc(PN->getDebugLoc());
1814
1815 // If we are going to have to insert a new computation, do so right before the
1816 // predecessor's terminator.
1817 Instruction *Clone = nullptr;
1818 if (NonSimplifiedBB) {
1819 Clone = I.clone();
1820 for (Use &U : Clone->operands()) {
1821 if (U == PN)
1822 U = NonSimplifiedInVal;
1823 else
1824 U = U->DoPHITranslation(PN->getParent(), NonSimplifiedBB);
1825 }
1826 InsertNewInstBefore(Clone, NonSimplifiedBB->getTerminator()->getIterator());
1827 }
1828
1829 for (unsigned i = 0; i != NumPHIValues; ++i) {
1830 if (NewPhiValues[i])
1831 NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));
1832 else
1833 NewPN->addIncoming(Clone, PN->getIncomingBlock(i));
1834 }
1835
1836 for (User *U : make_early_inc_range(PN->users())) {
1837 Instruction *User = cast<Instruction>(U);
1838 if (User == &I) continue;
1839 replaceInstUsesWith(*User, NewPN);
1841 }
1842
1843 replaceAllDbgUsesWith(const_cast<PHINode &>(*PN),
1844 const_cast<PHINode &>(*NewPN),
1845 const_cast<PHINode &>(*PN), DT);
1846 return replaceInstUsesWith(I, NewPN);
1847}
1848
1850 // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
1851 // we are guarding against replicating the binop in >1 predecessor.
1852 // This could miss matching a phi with 2 constant incoming values.
1853 auto *Phi0 = dyn_cast<PHINode>(BO.getOperand(0));
1854 auto *Phi1 = dyn_cast<PHINode>(BO.getOperand(1));
1855 if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() ||
1856 Phi0->getNumOperands() != Phi1->getNumOperands())
1857 return nullptr;
1858
1859 // TODO: Remove the restriction for binop being in the same block as the phis.
1860 if (BO.getParent() != Phi0->getParent() ||
1861 BO.getParent() != Phi1->getParent())
1862 return nullptr;
1863
1864 // Fold if there is at least one specific constant value in phi0 or phi1's
1865 // incoming values that comes from the same block and this specific constant
1866 // value can be used to do optimization for specific binary operator.
1867 // For example:
1868 // %phi0 = phi i32 [0, %bb0], [%i, %bb1]
1869 // %phi1 = phi i32 [%j, %bb0], [0, %bb1]
1870 // %add = add i32 %phi0, %phi1
1871 // ==>
1872 // %add = phi i32 [%j, %bb0], [%i, %bb1]
1874 /*AllowRHSConstant*/ false);
1875 if (C) {
1876 SmallVector<Value *, 4> NewIncomingValues;
1877 auto CanFoldIncomingValuePair = [&](std::tuple<Use &, Use &> T) {
1878 auto &Phi0Use = std::get<0>(T);
1879 auto &Phi1Use = std::get<1>(T);
1880 if (Phi0->getIncomingBlock(Phi0Use) != Phi1->getIncomingBlock(Phi1Use))
1881 return false;
1882 Value *Phi0UseV = Phi0Use.get();
1883 Value *Phi1UseV = Phi1Use.get();
1884 if (Phi0UseV == C)
1885 NewIncomingValues.push_back(Phi1UseV);
1886 else if (Phi1UseV == C)
1887 NewIncomingValues.push_back(Phi0UseV);
1888 else
1889 return false;
1890 return true;
1891 };
1892
1893 if (all_of(zip(Phi0->operands(), Phi1->operands()),
1894 CanFoldIncomingValuePair)) {
1895 PHINode *NewPhi =
1896 PHINode::Create(Phi0->getType(), Phi0->getNumOperands());
1897 assert(NewIncomingValues.size() == Phi0->getNumOperands() &&
1898 "The number of collected incoming values should equal the number "
1899 "of the original PHINode operands!");
1900 for (unsigned I = 0; I < Phi0->getNumOperands(); I++)
1901 NewPhi->addIncoming(NewIncomingValues[I], Phi0->getIncomingBlock(I));
1902 return NewPhi;
1903 }
1904 }
1905
1906 if (Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
1907 return nullptr;
1908
1909 // Match a pair of incoming constants for one of the predecessor blocks.
1910 BasicBlock *ConstBB, *OtherBB;
1911 Constant *C0, *C1;
1912 if (match(Phi0->getIncomingValue(0), m_ImmConstant(C0))) {
1913 ConstBB = Phi0->getIncomingBlock(0);
1914 OtherBB = Phi0->getIncomingBlock(1);
1915 } else if (match(Phi0->getIncomingValue(1), m_ImmConstant(C0))) {
1916 ConstBB = Phi0->getIncomingBlock(1);
1917 OtherBB = Phi0->getIncomingBlock(0);
1918 } else {
1919 return nullptr;
1920 }
1921 if (!match(Phi1->getIncomingValueForBlock(ConstBB), m_ImmConstant(C1)))
1922 return nullptr;
1923
1924 // The block that we are hoisting to must reach here unconditionally.
1925 // Otherwise, we could be speculatively executing an expensive or
1926 // non-speculative op.
1927 auto *PredBlockBranch = dyn_cast<BranchInst>(OtherBB->getTerminator());
1928 if (!PredBlockBranch || PredBlockBranch->isConditional() ||
1929 !DT.isReachableFromEntry(OtherBB))
1930 return nullptr;
1931
1932 // TODO: This check could be tightened to only apply to binops (div/rem) that
1933 // are not safe to speculatively execute. But that could allow hoisting
1934 // potentially expensive instructions (fdiv for example).
1935 for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
1937 return nullptr;
1938
1939 // Fold constants for the predecessor block with constant incoming values.
1940 Constant *NewC = ConstantFoldBinaryOpOperands(BO.getOpcode(), C0, C1, DL);
1941 if (!NewC)
1942 return nullptr;
1943
1944 // Make a new binop in the predecessor block with the non-constant incoming
1945 // values.
1946 Builder.SetInsertPoint(PredBlockBranch);
1947 Value *NewBO = Builder.CreateBinOp(BO.getOpcode(),
1948 Phi0->getIncomingValueForBlock(OtherBB),
1949 Phi1->getIncomingValueForBlock(OtherBB));
1950 if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(NewBO))
1951 NotFoldedNewBO->copyIRFlags(&BO);
1952
1953 // Replace the binop with a phi of the new values. The old phis are dead.
1954 PHINode *NewPhi = PHINode::Create(BO.getType(), 2);
1955 NewPhi->addIncoming(NewBO, OtherBB);
1956 NewPhi->addIncoming(NewC, ConstBB);
1957 return NewPhi;
1958}
1959
1961 if (!isa<Constant>(I.getOperand(1)))
1962 return nullptr;
1963
1964 if (auto *Sel = dyn_cast<SelectInst>(I.getOperand(0))) {
1965 if (Instruction *NewSel = FoldOpIntoSelect(I, Sel))
1966 return NewSel;
1967 } else if (auto *PN = dyn_cast<PHINode>(I.getOperand(0))) {
1968 if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
1969 return NewPhi;
1970 }
1971 return nullptr;
1972}
1973
1975 // If this GEP has only 0 indices, it is the same pointer as
1976 // Src. If Src is not a trivial GEP too, don't combine
1977 // the indices.
1978 if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
1979 !Src.hasOneUse())
1980 return false;
1981 return true;
1982}
1983
1985 if (!isa<VectorType>(Inst.getType()))
1986 return nullptr;
1987
1988 BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
1989 Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
1990 assert(cast<VectorType>(LHS->getType())->getElementCount() ==
1991 cast<VectorType>(Inst.getType())->getElementCount());
1992 assert(cast<VectorType>(RHS->getType())->getElementCount() ==
1993 cast<VectorType>(Inst.getType())->getElementCount());
1994
1995 // If both operands of the binop are vector concatenations, then perform the
1996 // narrow binop on each pair of the source operands followed by concatenation
1997 // of the results.
1998 Value *L0, *L1, *R0, *R1;
1999 ArrayRef<int> Mask;
2000 if (match(LHS, m_Shuffle(m_Value(L0), m_Value(L1), m_Mask(Mask))) &&
2001 match(RHS, m_Shuffle(m_Value(R0), m_Value(R1), m_SpecificMask(Mask))) &&
2002 LHS->hasOneUse() && RHS->hasOneUse() &&
2003 cast<ShuffleVectorInst>(LHS)->isConcat() &&
2004 cast<ShuffleVectorInst>(RHS)->isConcat()) {
2005 // This transform does not have the speculative execution constraint as
2006 // below because the shuffle is a concatenation. The new binops are
2007 // operating on exactly the same elements as the existing binop.
2008 // TODO: We could ease the mask requirement to allow different undef lanes,
2009 // but that requires an analysis of the binop-with-undef output value.
2010 Value *NewBO0 = Builder.CreateBinOp(Opcode, L0, R0);
2011 if (auto *BO = dyn_cast<BinaryOperator>(NewBO0))
2012 BO->copyIRFlags(&Inst);
2013 Value *NewBO1 = Builder.CreateBinOp(Opcode, L1, R1);
2014 if (auto *BO = dyn_cast<BinaryOperator>(NewBO1))
2015 BO->copyIRFlags(&Inst);
2016 return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
2017 }
2018
2019 auto createBinOpReverse = [&](Value *X, Value *Y) {
2020 Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName());
2021 if (auto *BO = dyn_cast<BinaryOperator>(V))
2022 BO->copyIRFlags(&Inst);
2023 Module *M = Inst.getModule();
2025 M, Intrinsic::experimental_vector_reverse, V->getType());
2026 return CallInst::Create(F, V);
2027 };
2028
2029 // NOTE: Reverse shuffles don't require the speculative execution protection
2030 // below because they don't affect which lanes take part in the computation.
2031
2032 Value *V1, *V2;
2033 if (match(LHS, m_VecReverse(m_Value(V1)))) {
2034 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2035 if (match(RHS, m_VecReverse(m_Value(V2))) &&
2036 (LHS->hasOneUse() || RHS->hasOneUse() ||
2037 (LHS == RHS && LHS->hasNUses(2))))
2038 return createBinOpReverse(V1, V2);
2039
2040 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2041 if (LHS->hasOneUse() && isSplatValue(RHS))
2042 return createBinOpReverse(V1, RHS);
2043 }
2044 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2045 else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2)))))
2046 return createBinOpReverse(LHS, V2);
2047
2048 // It may not be safe to reorder shuffles and things like div, urem, etc.
2049 // because we may trap when executing those ops on unknown vector elements.
2050 // See PR20059.
2051 if (!isSafeToSpeculativelyExecute(&Inst))
2052 return nullptr;
2053
2054 auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) {
2055 Value *XY = Builder.CreateBinOp(Opcode, X, Y);
2056 if (auto *BO = dyn_cast<BinaryOperator>(XY))
2057 BO->copyIRFlags(&Inst);
2058 return new ShuffleVectorInst(XY, M);
2059 };
2060
2061 // If both arguments of the binary operation are shuffles that use the same
2062 // mask and shuffle within a single vector, move the shuffle after the binop.
2063 if (match(LHS, m_Shuffle(m_Value(V1), m_Poison(), m_Mask(Mask))) &&
2064 match(RHS, m_Shuffle(m_Value(V2), m_Poison(), m_SpecificMask(Mask))) &&
2065 V1->getType() == V2->getType() &&
2066 (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
2067 // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
2068 return createBinOpShuffle(V1, V2, Mask);
2069 }
2070
2071 // If both arguments of a commutative binop are select-shuffles that use the
2072 // same mask with commuted operands, the shuffles are unnecessary.
2073 if (Inst.isCommutative() &&
2074 match(LHS, m_Shuffle(m_Value(V1), m_Value(V2), m_Mask(Mask))) &&
2075 match(RHS,
2076 m_Shuffle(m_Specific(V2), m_Specific(V1), m_SpecificMask(Mask)))) {
2077 auto *LShuf = cast<ShuffleVectorInst>(LHS);
2078 auto *RShuf = cast<ShuffleVectorInst>(RHS);
2079 // TODO: Allow shuffles that contain undefs in the mask?
2080 // That is legal, but it reduces undef knowledge.
2081 // TODO: Allow arbitrary shuffles by shuffling after binop?
2082 // That might be legal, but we have to deal with poison.
2083 if (LShuf->isSelect() &&
2084 !is_contained(LShuf->getShuffleMask(), PoisonMaskElem) &&
2085 RShuf->isSelect() &&
2086 !is_contained(RShuf->getShuffleMask(), PoisonMaskElem)) {
2087 // Example:
2088 // LHS = shuffle V1, V2, <0, 5, 6, 3>
2089 // RHS = shuffle V2, V1, <0, 5, 6, 3>
2090 // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
2091 Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
2092 NewBO->copyIRFlags(&Inst);
2093 return NewBO;
2094 }
2095 }
2096
2097 // If one argument is a shuffle within one vector and the other is a constant,
2098 // try moving the shuffle after the binary operation. This canonicalization
2099 // intends to move shuffles closer to other shuffles and binops closer to
2100 // other binops, so they can be folded. It may also enable demanded elements
2101 // transforms.
2102 Constant *C;
2103 auto *InstVTy = dyn_cast<FixedVectorType>(Inst.getType());
2104 if (InstVTy &&
2106 m_Mask(Mask))),
2107 m_ImmConstant(C))) &&
2108 cast<FixedVectorType>(V1->getType())->getNumElements() <=
2109 InstVTy->getNumElements()) {
2110 assert(InstVTy->getScalarType() == V1->getType()->getScalarType() &&
2111 "Shuffle should not change scalar type");
2112
2113 // Find constant NewC that has property:
2114 // shuffle(NewC, ShMask) = C
2115 // If such constant does not exist (example: ShMask=<0,0> and C=<1,2>)
2116 // reorder is not possible. A 1-to-1 mapping is not required. Example:
2117 // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef>
2118 bool ConstOp1 = isa<Constant>(RHS);
2119 ArrayRef<int> ShMask = Mask;
2120 unsigned SrcVecNumElts =
2121 cast<FixedVectorType>(V1->getType())->getNumElements();
2122 PoisonValue *PoisonScalar = PoisonValue::get(C->getType()->getScalarType());
2123 SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, PoisonScalar);
2124 bool MayChange = true;
2125 unsigned NumElts = InstVTy->getNumElements();
2126 for (unsigned I = 0; I < NumElts; ++I) {
2127 Constant *CElt = C->getAggregateElement(I);
2128 if (ShMask[I] >= 0) {
2129 assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
2130 Constant *NewCElt = NewVecC[ShMask[I]];
2131 // Bail out if:
2132 // 1. The constant vector contains a constant expression.
2133 // 2. The shuffle needs an element of the constant vector that can't
2134 // be mapped to a new constant vector.
2135 // 3. This is a widening shuffle that copies elements of V1 into the
2136 // extended elements (extending with poison is allowed).
2137 if (!CElt || (!isa<PoisonValue>(NewCElt) && NewCElt != CElt) ||
2138 I >= SrcVecNumElts) {
2139 MayChange = false;
2140 break;
2141 }
2142 NewVecC[ShMask[I]] = CElt;
2143 }
2144 // If this is a widening shuffle, we must be able to extend with poison
2145 // elements. If the original binop does not produce a poison in the high
2146 // lanes, then this transform is not safe.
2147 // Similarly for poison lanes due to the shuffle mask, we can only
2148 // transform binops that preserve poison.
2149 // TODO: We could shuffle those non-poison constant values into the
2150 // result by using a constant vector (rather than an poison vector)
2151 // as operand 1 of the new binop, but that might be too aggressive
2152 // for target-independent shuffle creation.
2153 if (I >= SrcVecNumElts || ShMask[I] < 0) {
2154 Constant *MaybePoison =
2155 ConstOp1
2156 ? ConstantFoldBinaryOpOperands(Opcode, PoisonScalar, CElt, DL)
2157 : ConstantFoldBinaryOpOperands(Opcode, CElt, PoisonScalar, DL);
2158 if (!MaybePoison || !isa<PoisonValue>(MaybePoison)) {
2159 MayChange = false;
2160 break;
2161 }
2162 }
2163 }
2164 if (MayChange) {
2165 Constant *NewC = ConstantVector::get(NewVecC);
2166 // It may not be safe to execute a binop on a vector with poison elements
2167 // because the entire instruction can be folded to undef or create poison
2168 // that did not exist in the original code.
2169 // TODO: The shift case should not be necessary.
2170 if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1))
2171 NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1);
2172
2173 // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
2174 // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
2175 Value *NewLHS = ConstOp1 ? V1 : NewC;
2176 Value *NewRHS = ConstOp1 ? NewC : V1;
2177 return createBinOpShuffle(NewLHS, NewRHS, Mask);
2178 }
2179 }
2180
2181 // Try to reassociate to sink a splat shuffle after a binary operation.
2182 if (Inst.isAssociative() && Inst.isCommutative()) {
2183 // Canonicalize shuffle operand as LHS.
2184 if (isa<ShuffleVectorInst>(RHS))
2185 std::swap(LHS, RHS);
2186
2187 Value *X;
2188 ArrayRef<int> MaskC;
2189 int SplatIndex;
2190 Value *Y, *OtherOp;
2191 if (!match(LHS,
2192 m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(MaskC)))) ||
2193 !match(MaskC, m_SplatOrUndefMask(SplatIndex)) ||
2194 X->getType() != Inst.getType() ||
2195 !match(RHS, m_OneUse(m_BinOp(Opcode, m_Value(Y), m_Value(OtherOp)))))
2196 return nullptr;
2197
2198 // FIXME: This may not be safe if the analysis allows undef elements. By
2199 // moving 'Y' before the splat shuffle, we are implicitly assuming
2200 // that it is not undef/poison at the splat index.
2201 if (isSplatValue(OtherOp, SplatIndex)) {
2202 std::swap(Y, OtherOp);
2203 } else if (!isSplatValue(Y, SplatIndex)) {
2204 return nullptr;
2205 }
2206
2207 // X and Y are splatted values, so perform the binary operation on those
2208 // values followed by a splat followed by the 2nd binary operation:
2209 // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
2210 Value *NewBO = Builder.CreateBinOp(Opcode, X, Y);
2211 SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
2212 Value *NewSplat = Builder.CreateShuffleVector(NewBO, NewMask);
2213 Instruction *R = BinaryOperator::Create(Opcode, NewSplat, OtherOp);
2214
2215 // Intersect FMF on both new binops. Other (poison-generating) flags are
2216 // dropped to be safe.
2217 if (isa<FPMathOperator>(R)) {
2218 R->copyFastMathFlags(&Inst);
2219 R->andIRFlags(RHS);
2220 }
2221 if (auto *NewInstBO = dyn_cast<BinaryOperator>(NewBO))
2222 NewInstBO->copyIRFlags(R);
2223 return R;
2224 }
2225
2226 return nullptr;
2227}
2228
2229/// Try to narrow the width of a binop if at least 1 operand is an extend of
2230/// of a value. This requires a potentially expensive known bits check to make
2231/// sure the narrow op does not overflow.
2232Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
2233 // We need at least one extended operand.
2234 Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1);
2235
2236 // If this is a sub, we swap the operands since we always want an extension
2237 // on the RHS. The LHS can be an extension or a constant.
2238 if (BO.getOpcode() == Instruction::Sub)
2239 std::swap(Op0, Op1);
2240
2241 Value *X;
2242 bool IsSext = match(Op0, m_SExt(m_Value(X)));
2243 if (!IsSext && !match(Op0, m_ZExt(m_Value(X))))
2244 return nullptr;
2245
2246 // If both operands are the same extension from the same source type and we
2247 // can eliminate at least one (hasOneUse), this might work.
2248 CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
2249 Value *Y;
2250 if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
2251 cast<Operator>(Op1)->getOpcode() == CastOpc &&
2252 (Op0->hasOneUse() || Op1->hasOneUse()))) {
2253 // If that did not match, see if we have a suitable constant operand.
2254 // Truncating and extending must produce the same constant.
2255 Constant *WideC;
2256 if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC)))
2257 return nullptr;
2258 Constant *NarrowC = getLosslessTrunc(WideC, X->getType(), CastOpc);
2259 if (!NarrowC)
2260 return nullptr;
2261 Y = NarrowC;
2262 }
2263
2264 // Swap back now that we found our operands.
2265 if (BO.getOpcode() == Instruction::Sub)
2266 std::swap(X, Y);
2267
2268 // Both operands have narrow versions. Last step: the math must not overflow
2269 // in the narrow width.
2270 if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext))
2271 return nullptr;
2272
2273 // bo (ext X), (ext Y) --> ext (bo X, Y)
2274 // bo (ext X), C --> ext (bo X, C')
2275 Value *NarrowBO = Builder.CreateBinOp(BO.getOpcode(), X, Y, "narrow");
2276 if (auto *NewBinOp = dyn_cast<BinaryOperator>(NarrowBO)) {
2277 if (IsSext)
2278 NewBinOp->setHasNoSignedWrap();
2279 else
2280 NewBinOp->setHasNoUnsignedWrap();
2281 }
2282 return CastInst::Create(CastOpc, NarrowBO, BO.getType());
2283}
2284
2286 // At least one GEP must be inbounds.
2287 if (!GEP1.isInBounds() && !GEP2.isInBounds())
2288 return false;
2289
2290 return (GEP1.isInBounds() || GEP1.hasAllZeroIndices()) &&
2291 (GEP2.isInBounds() || GEP2.hasAllZeroIndices());
2292}
2293
2294/// Thread a GEP operation with constant indices through the constant true/false
2295/// arms of a select.
2297 InstCombiner::BuilderTy &Builder) {
2298 if (!GEP.hasAllConstantIndices())
2299 return nullptr;
2300
2301 Instruction *Sel;
2302 Value *Cond;
2303 Constant *TrueC, *FalseC;
2304 if (!match(GEP.getPointerOperand(), m_Instruction(Sel)) ||
2305 !match(Sel,
2306 m_Select(m_Value(Cond), m_Constant(TrueC), m_Constant(FalseC))))
2307 return nullptr;
2308
2309 // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
2310 // Propagate 'inbounds' and metadata from existing instructions.
2311 // Note: using IRBuilder to create the constants for efficiency.
2312 SmallVector<Value *, 4> IndexC(GEP.indices());
2313 bool IsInBounds = GEP.isInBounds();
2314 Type *Ty = GEP.getSourceElementType();
2315 Value *NewTrueC = Builder.CreateGEP(Ty, TrueC, IndexC, "", IsInBounds);
2316 Value *NewFalseC = Builder.CreateGEP(Ty, FalseC, IndexC, "", IsInBounds);
2317 return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel);
2318}
2319
2321 GEPOperator *Src) {
2322 // Combine Indices - If the source pointer to this getelementptr instruction
2323 // is a getelementptr instruction with matching element type, combine the
2324 // indices of the two getelementptr instructions into a single instruction.
2325 if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
2326 return nullptr;
2327
2328 // For constant GEPs, use a more general offset-based folding approach.
2329 Type *PtrTy = Src->getType()->getScalarType();
2330 if (GEP.hasAllConstantIndices() &&
2331 (Src->hasOneUse() || Src->hasAllConstantIndices())) {
2332 // Split Src into a variable part and a constant suffix.
2334 Type *BaseType = GTI.getIndexedType();
2335 bool IsFirstType = true;
2336 unsigned NumVarIndices = 0;
2337 for (auto Pair : enumerate(Src->indices())) {
2338 if (!isa<ConstantInt>(Pair.value())) {
2339 BaseType = GTI.getIndexedType();
2340 IsFirstType = false;
2341 NumVarIndices = Pair.index() + 1;
2342 }
2343 ++GTI;
2344 }
2345
2346 // Determine the offset for the constant suffix of Src.
2348 if (NumVarIndices != Src->getNumIndices()) {
2349 // FIXME: getIndexedOffsetInType() does not handled scalable vectors.
2350 if (BaseType->isScalableTy())
2351 return nullptr;
2352
2353 SmallVector<Value *> ConstantIndices;
2354 if (!IsFirstType)
2355 ConstantIndices.push_back(
2357 append_range(ConstantIndices, drop_begin(Src->indices(), NumVarIndices));
2358 Offset += DL.getIndexedOffsetInType(BaseType, ConstantIndices);
2359 }
2360
2361 // Add the offset for GEP (which is fully constant).
2362 if (!GEP.accumulateConstantOffset(DL, Offset))
2363 return nullptr;
2364
2365 APInt OffsetOld = Offset;
2366 // Convert the total offset back into indices.
2367 SmallVector<APInt> ConstIndices =
2369 if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero())) {
2370 // If both GEP are constant-indexed, and cannot be merged in either way,
2371 // convert them to a GEP of i8.
2372 if (Src->hasAllConstantIndices())
2373 return replaceInstUsesWith(
2375 Builder.getInt8Ty(), Src->getOperand(0),
2376 Builder.getInt(OffsetOld), "",
2377 isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))));
2378 return nullptr;
2379 }
2380
2381 bool IsInBounds = isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP));
2382 SmallVector<Value *> Indices;
2383 append_range(Indices, drop_end(Src->indices(),
2384 Src->getNumIndices() - NumVarIndices));
2385 for (const APInt &Idx : drop_begin(ConstIndices, !IsFirstType)) {
2386 Indices.push_back(ConstantInt::get(GEP.getContext(), Idx));
2387 // Even if the total offset is inbounds, we may end up representing it
2388 // by first performing a larger negative offset, and then a smaller
2389 // positive one. The large negative offset might go out of bounds. Only
2390 // preserve inbounds if all signs are the same.
2391 IsInBounds &= Idx.isNonNegative() == ConstIndices[0].isNonNegative();
2392 }
2393
2394 return replaceInstUsesWith(
2395 GEP, Builder.CreateGEP(Src->getSourceElementType(), Src->getOperand(0),
2396 Indices, "", IsInBounds));
2397 }
2398
2399 if (Src->getResultElementType() != GEP.getSourceElementType())
2400 return nullptr;
2401
2402 SmallVector<Value*, 8> Indices;
2403
2404 // Find out whether the last index in the source GEP is a sequential idx.
2405 bool EndsWithSequential = false;
2406 for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
2407 I != E; ++I)
2408 EndsWithSequential = I.isSequential();
2409
2410 // Can we combine the two pointer arithmetics offsets?
2411 if (EndsWithSequential) {
2412 // Replace: gep (gep %P, long B), long A, ...
2413 // With: T = long A+B; gep %P, T, ...
2414 Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
2415 Value *GO1 = GEP.getOperand(1);
2416
2417 // If they aren't the same type, then the input hasn't been processed
2418 // by the loop above yet (which canonicalizes sequential index types to
2419 // intptr_t). Just avoid transforming this until the input has been
2420 // normalized.
2421 if (SO1->getType() != GO1->getType())
2422 return nullptr;
2423
2424 Value *Sum =
2425 simplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP));
2426 // Only do the combine when we are sure the cost after the
2427 // merge is never more than that before the merge.
2428 if (Sum == nullptr)
2429 return nullptr;
2430
2431 // Update the GEP in place if possible.
2432 if (Src->getNumOperands() == 2) {
2433 GEP.setIsInBounds(isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP)));
2434 replaceOperand(GEP, 0, Src->getOperand(0));
2435 replaceOperand(GEP, 1, Sum);
2436 return &GEP;
2437 }
2438 Indices.append(Src->op_begin()+1, Src->op_end()-1);
2439 Indices.push_back(Sum);
2440 Indices.append(GEP.op_begin()+2, GEP.op_end());
2441 } else if (isa<Constant>(*GEP.idx_begin()) &&
2442 cast<Constant>(*GEP.idx_begin())->isNullValue() &&
2443 Src->getNumOperands() != 1) {
2444 // Otherwise we can do the fold if the first index of the GEP is a zero
2445 Indices.append(Src->op_begin()+1, Src->op_end());
2446 Indices.append(GEP.idx_begin()+1, GEP.idx_end());
2447 }
2448
2449 if (!Indices.empty())
2450 return replaceInstUsesWith(
2452 Src->getSourceElementType(), Src->getOperand(0), Indices, "",
2453 isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))));
2454
2455 return nullptr;
2456}
2457
2459 BuilderTy *Builder,
2460 bool &DoesConsume, unsigned Depth) {
2461 static Value *const NonNull = reinterpret_cast<Value *>(uintptr_t(1));
2462 // ~(~(X)) -> X.
2463 Value *A, *B;
2464 if (match(V, m_Not(m_Value(A)))) {
2465 DoesConsume = true;
2466 return A;
2467 }
2468
2469 Constant *C;
2470 // Constants can be considered to be not'ed values.
2471 if (match(V, m_ImmConstant(C)))
2472 return ConstantExpr::getNot(C);
2473
2475 return nullptr;
2476
2477 // The rest of the cases require that we invert all uses so don't bother
2478 // doing the analysis if we know we can't use the result.
2479 if (!WillInvertAllUses)
2480 return nullptr;
2481
2482 // Compares can be inverted if all of their uses are being modified to use
2483 // the ~V.
2484 if (auto *I = dyn_cast<CmpInst>(V)) {
2485 if (Builder != nullptr)
2486 return Builder->CreateCmp(I->getInversePredicate(), I->getOperand(0),
2487 I->getOperand(1));
2488 return NonNull;
2489 }
2490
2491 // If `V` is of the form `A + B` then `-1 - V` can be folded into
2492 // `(-1 - B) - A` if we are willing to invert all of the uses.
2493 if (match(V, m_Add(m_Value(A), m_Value(B)))) {
2494 if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
2495 DoesConsume, Depth))
2496 return Builder ? Builder->CreateSub(BV, A) : NonNull;
2497 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2498 DoesConsume, Depth))
2499 return Builder ? Builder->CreateSub(AV, B) : NonNull;
2500 return nullptr;
2501 }
2502
2503 // If `V` is of the form `A ^ ~B` then `~(A ^ ~B)` can be folded
2504 // into `A ^ B` if we are willing to invert all of the uses.
2505 if (match(V, m_Xor(m_Value(A), m_Value(B)))) {
2506 if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
2507 DoesConsume, Depth))
2508 return Builder ? Builder->CreateXor(A, BV) : NonNull;
2509 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2510 DoesConsume, Depth))
2511 return Builder ? Builder->CreateXor(AV, B) : NonNull;
2512 return nullptr;
2513 }
2514
2515 // If `V` is of the form `B - A` then `-1 - V` can be folded into
2516 // `A + (-1 - B)` if we are willing to invert all of the uses.
2517 if (match(V, m_Sub(m_Value(A), m_Value(B)))) {
2518 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2519 DoesConsume, Depth))
2520 return Builder ? Builder->CreateAdd(AV, B) : NonNull;
2521 return nullptr;
2522 }
2523
2524 // If `V` is of the form `(~A) s>> B` then `~((~A) s>> B)` can be folded
2525 // into `A s>> B` if we are willing to invert all of the uses.
2526 if (match(V, m_AShr(m_Value(A), m_Value(B)))) {
2527 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2528 DoesConsume, Depth))
2529 return Builder ? Builder->CreateAShr(AV, B) : NonNull;
2530 return nullptr;
2531 }
2532
2533 Value *Cond;
2534 // LogicOps are special in that we canonicalize them at the cost of an
2535 // instruction.
2536 bool IsSelect = match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))) &&
2537 !shouldAvoidAbsorbingNotIntoSelect(*cast<SelectInst>(V));
2538 // Selects/min/max with invertible operands are freely invertible
2539 if (IsSelect || match(V, m_MaxOrMin(m_Value(A), m_Value(B)))) {
2540 bool LocalDoesConsume = DoesConsume;
2541 if (!getFreelyInvertedImpl(B, B->hasOneUse(), /*Builder*/ nullptr,
2542 LocalDoesConsume, Depth))
2543 return nullptr;
2544 if (Value *NotA = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2545 LocalDoesConsume, Depth)) {
2546 DoesConsume = LocalDoesConsume;
2547 if (Builder != nullptr) {
2548 Value *NotB = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
2549 DoesConsume, Depth);
2550 assert(NotB != nullptr &&
2551 "Unable to build inverted value for known freely invertable op");
2552 if (auto *II = dyn_cast<IntrinsicInst>(V))
2554 getInverseMinMaxIntrinsic(II->getIntrinsicID()), NotA, NotB);
2555 return Builder->CreateSelect(Cond, NotA, NotB);
2556 }
2557 return NonNull;
2558 }
2559 }
2560
2561 if (PHINode *PN = dyn_cast<PHINode>(V)) {
2562 bool LocalDoesConsume = DoesConsume;
2564 for (Use &U : PN->operands()) {
2565 BasicBlock *IncomingBlock = PN->getIncomingBlock(U);
2566 Value *NewIncomingVal = getFreelyInvertedImpl(
2567 U.get(), /*WillInvertAllUses=*/false,
2568 /*Builder=*/nullptr, LocalDoesConsume, MaxAnalysisRecursionDepth - 1);
2569 if (NewIncomingVal == nullptr)
2570 return nullptr;
2571 // Make sure that we can safely erase the original PHI node.
2572 if (NewIncomingVal == V)
2573 return nullptr;
2574 if (Builder != nullptr)
2575 IncomingValues.emplace_back(NewIncomingVal, IncomingBlock);
2576 }
2577
2578 DoesConsume = LocalDoesConsume;
2579 if (Builder != nullptr) {
2582 PHINode *NewPN =
2583 Builder->CreatePHI(PN->getType(), PN->getNumIncomingValues());
2584 for (auto [Val, Pred] : IncomingValues)
2585 NewPN->addIncoming(Val, Pred);
2586 return NewPN;
2587 }
2588 return NonNull;
2589 }
2590
2591 if (match(V, m_SExtLike(m_Value(A)))) {
2592 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2593 DoesConsume, Depth))
2594 return Builder ? Builder->CreateSExt(AV, V->getType()) : NonNull;
2595 return nullptr;
2596 }
2597
2598 if (match(V, m_Trunc(m_Value(A)))) {
2599 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2600 DoesConsume, Depth))
2601 return Builder ? Builder->CreateTrunc(AV, V->getType()) : NonNull;
2602 return nullptr;
2603 }
2604
2605 // De Morgan's Laws:
2606 // (~(A | B)) -> (~A & ~B)
2607 // (~(A & B)) -> (~A | ~B)
2608 auto TryInvertAndOrUsingDeMorgan = [&](Instruction::BinaryOps Opcode,
2609 bool IsLogical, Value *A,
2610 Value *B) -> Value * {
2611 bool LocalDoesConsume = DoesConsume;
2612 if (!getFreelyInvertedImpl(B, B->hasOneUse(), /*Builder=*/nullptr,
2613 LocalDoesConsume, Depth))
2614 return nullptr;
2615 if (auto *NotA = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2616 LocalDoesConsume, Depth)) {
2617 auto *NotB = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
2618 LocalDoesConsume, Depth);
2619 DoesConsume = LocalDoesConsume;
2620 if (IsLogical)
2621 return Builder ? Builder->CreateLogicalOp(Opcode, NotA, NotB) : NonNull;
2622 return Builder ? Builder->CreateBinOp(Opcode, NotA, NotB) : NonNull;
2623 }
2624
2625 return nullptr;
2626 };
2627
2628 if (match(V, m_Or(m_Value(A), m_Value(B))))
2629 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/false, A,
2630 B);
2631
2632 if (match(V, m_And(m_Value(A), m_Value(B))))
2633 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/false, A,
2634 B);
2635
2636 if (match(V, m_LogicalOr(m_Value(A), m_Value(B))))
2637 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/true, A,
2638 B);
2639
2640 if (match(V, m_LogicalAnd(m_Value(A), m_Value(B))))
2641 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/true, A,
2642 B);
2643
2644 return nullptr;
2645}
2646
2648 Value *PtrOp = GEP.getOperand(0);
2649 SmallVector<Value *, 8> Indices(GEP.indices());
2650 Type *GEPType = GEP.getType();
2651 Type *GEPEltType = GEP.getSourceElementType();
2652 bool IsGEPSrcEleScalable = GEPEltType->isScalableTy();
2653 if (Value *V = simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(),
2655 return replaceInstUsesWith(GEP, V);
2656
2657 // For vector geps, use the generic demanded vector support.
2658 // Skip if GEP return type is scalable. The number of elements is unknown at
2659 // compile-time.
2660 if (auto *GEPFVTy = dyn_cast<FixedVectorType>(GEPType)) {
2661 auto VWidth = GEPFVTy->getNumElements();
2662 APInt PoisonElts(VWidth, 0);
2663 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2664 if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
2665 PoisonElts)) {
2666 if (V != &GEP)
2667 return replaceInstUsesWith(GEP, V);
2668 return &GEP;
2669 }
2670
2671 // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
2672 // possible (decide on canonical form for pointer broadcast), 3) exploit
2673 // undef elements to decrease demanded bits
2674 }
2675
2676 // Eliminate unneeded casts for indices, and replace indices which displace
2677 // by multiples of a zero size type with zero.
2678 bool MadeChange = false;
2679
2680 // Index width may not be the same width as pointer width.
2681 // Data layout chooses the right type based on supported integer types.
2682 Type *NewScalarIndexTy =
2683 DL.getIndexType(GEP.getPointerOperandType()->getScalarType());
2684
2686 for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
2687 ++I, ++GTI) {
2688 // Skip indices into struct types.
2689 if (GTI.isStruct())
2690 continue;
2691
2692 Type *IndexTy = (*I)->getType();
2693 Type *NewIndexType =
2694 IndexTy->isVectorTy()
2695 ? VectorType::get(NewScalarIndexTy,
2696 cast<VectorType>(IndexTy)->getElementCount())
2697 : NewScalarIndexTy;
2698
2699 // If the element type has zero size then any index over it is equivalent
2700 // to an index of zero, so replace it with zero if it is not zero already.
2701 Type *EltTy = GTI.getIndexedType();
2702 if (EltTy->isSized() && DL.getTypeAllocSize(EltTy).isZero())
2703 if (!isa<Constant>(*I) || !match(I->get(), m_Zero())) {
2704 *I = Constant::getNullValue(NewIndexType);
2705 MadeChange = true;
2706 }
2707
2708 if (IndexTy != NewIndexType) {
2709 // If we are using a wider index than needed for this platform, shrink
2710 // it to what we need. If narrower, sign-extend it to what we need.
2711 // This explicit cast can make subsequent optimizations more obvious.
2712 *I = Builder.CreateIntCast(*I, NewIndexType, true);
2713 MadeChange = true;
2714 }
2715 }
2716 if (MadeChange)
2717 return &GEP;
2718
2719 // Canonicalize constant GEPs to i8 type.
2720 if (!GEPEltType->isIntegerTy(8) && GEP.hasAllConstantIndices()) {
2722 if (GEP.accumulateConstantOffset(DL, Offset))
2723 return replaceInstUsesWith(
2725 GEP.isInBounds()));
2726 }
2727
2728 // Check to see if the inputs to the PHI node are getelementptr instructions.
2729 if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
2730 auto *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
2731 if (!Op1)
2732 return nullptr;
2733
2734 // Don't fold a GEP into itself through a PHI node. This can only happen
2735 // through the back-edge of a loop. Folding a GEP into itself means that
2736 // the value of the previous iteration needs to be stored in the meantime,
2737 // thus requiring an additional register variable to be live, but not
2738 // actually achieving anything (the GEP still needs to be executed once per
2739 // loop iteration).
2740 if (Op1 == &GEP)
2741 return nullptr;
2742
2743 int DI = -1;
2744
2745 for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
2746 auto *Op2 = dyn_cast<GetElementPtrInst>(*I);
2747 if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() ||
2748 Op1->getSourceElementType() != Op2->getSourceElementType())
2749 return nullptr;
2750
2751 // As for Op1 above, don't try to fold a GEP into itself.
2752 if (Op2 == &GEP)
2753 return nullptr;
2754
2755 // Keep track of the type as we walk the GEP.
2756 Type *CurTy = nullptr;
2757
2758 for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
2759 if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType())
2760 return nullptr;
2761
2762 if (Op1->getOperand(J) != Op2->getOperand(J)) {
2763 if (DI == -1) {
2764 // We have not seen any differences yet in the GEPs feeding the
2765 // PHI yet, so we record this one if it is allowed to be a
2766 // variable.
2767
2768 // The first two arguments can vary for any GEP, the rest have to be
2769 // static for struct slots
2770 if (J > 1) {
2771 assert(CurTy && "No current type?");
2772 if (CurTy->isStructTy())
2773 return nullptr;
2774 }
2775
2776 DI = J;
2777 } else {
2778 // The GEP is different by more than one input. While this could be
2779 // extended to support GEPs that vary by more than one variable it
2780 // doesn't make sense since it greatly increases the complexity and
2781 // would result in an R+R+R addressing mode which no backend
2782 // directly supports and would need to be broken into several
2783 // simpler instructions anyway.
2784 return nullptr;
2785 }
2786 }
2787
2788 // Sink down a layer of the type for the next iteration.
2789 if (J > 0) {
2790 if (J == 1) {
2791 CurTy = Op1->getSourceElementType();
2792 } else {
2793 CurTy =
2794 GetElementPtrInst::getTypeAtIndex(CurTy, Op1->getOperand(J));
2795 }
2796 }
2797 }
2798 }
2799
2800 // If not all GEPs are identical we'll have to create a new PHI node.
2801 // Check that the old PHI node has only one use so that it will get
2802 // removed.
2803 if (DI != -1 && !PN->hasOneUse())
2804 return nullptr;
2805
2806 auto *NewGEP = cast<GetElementPtrInst>(Op1->clone());
2807 if (DI == -1) {
2808 // All the GEPs feeding the PHI are identical. Clone one down into our
2809 // BB so that it can be merged with the current GEP.
2810 } else {
2811 // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
2812 // into the current block so it can be merged, and create a new PHI to
2813 // set that index.
2814 PHINode *NewPN;
2815 {
2818 NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(),
2819 PN->getNumOperands());
2820 }
2821
2822 for (auto &I : PN->operands())
2823 NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
2824 PN->getIncomingBlock(I));
2825
2826 NewGEP->setOperand(DI, NewPN);
2827 }
2828
2829 NewGEP->insertBefore(*GEP.getParent(), GEP.getParent()->getFirstInsertionPt());
2830 return replaceOperand(GEP, 0, NewGEP);
2831 }
2832
2833 if (auto *Src = dyn_cast<GEPOperator>(PtrOp))
2834 if (Instruction *I = visitGEPOfGEP(GEP, Src))
2835 return I;
2836
2837 // Skip if GEP source element type is scalable. The type alloc size is unknown
2838 // at compile-time.
2839 if (GEP.getNumIndices() == 1 && !IsGEPSrcEleScalable) {
2840 unsigned AS = GEP.getPointerAddressSpace();
2841 if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
2842 DL.getIndexSizeInBits(AS)) {
2843 uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedValue();
2844
2845 if (TyAllocSize == 1) {
2846 // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y),
2847 // but only if the result pointer is only used as if it were an integer,
2848 // or both point to the same underlying object (otherwise provenance is
2849 // not necessarily retained).
2850 Value *X = GEP.getPointerOperand();
2851 Value *Y;
2852 if (match(GEP.getOperand(1),
2854 GEPType == Y->getType()) {
2855 bool HasSameUnderlyingObject =
2857 bool Changed = false;
2858 GEP.replaceUsesWithIf(Y, [&](Use &U) {
2859 bool ShouldReplace = HasSameUnderlyingObject ||
2860 isa<ICmpInst>(U.getUser()) ||
2861 isa<PtrToIntInst>(U.getUser());
2862 Changed |= ShouldReplace;
2863 return ShouldReplace;
2864 });
2865 return Changed ? &GEP : nullptr;
2866 }
2867 } else {
2868 // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V)
2869 Value *V;
2870 if ((has_single_bit(TyAllocSize) &&
2871 match(GEP.getOperand(1),
2873 m_SpecificInt(countr_zero(TyAllocSize)))))) ||
2874 match(GEP.getOperand(1),
2875 m_Exact(m_IDiv(m_Value(V), m_SpecificInt(TyAllocSize))))) {
2877 Builder.getInt8Ty(), GEP.getPointerOperand(), V);
2878 NewGEP->setIsInBounds(GEP.isInBounds());
2879 return NewGEP;
2880 }
2881 }
2882 }
2883 }
2884 // We do not handle pointer-vector geps here.
2885 if (GEPType->isVectorTy())
2886 return nullptr;
2887
2888 if (GEP.getNumIndices() == 1) {
2889 // Try to replace ADD + GEP with GEP + GEP.
2890 Value *Idx1, *Idx2;
2891 if (match(GEP.getOperand(1),
2892 m_OneUse(m_Add(m_Value(Idx1), m_Value(Idx2))))) {
2893 // %idx = add i64 %idx1, %idx2
2894 // %gep = getelementptr i32, ptr %ptr, i64 %idx
2895 // as:
2896 // %newptr = getelementptr i32, ptr %ptr, i64 %idx1
2897 // %newgep = getelementptr i32, ptr %newptr, i64 %idx2
2898 auto *NewPtr = Builder.CreateGEP(GEP.getResultElementType(),
2899 GEP.getPointerOperand(), Idx1);
2900 return GetElementPtrInst::Create(GEP.getResultElementType(), NewPtr,
2901 Idx2);
2902 }
2903 ConstantInt *C;
2904 if (match(GEP.getOperand(1), m_OneUse(m_SExtLike(m_OneUse(m_NSWAdd(
2905 m_Value(Idx1), m_ConstantInt(C))))))) {
2906 // %add = add nsw i32 %idx1, idx2
2907 // %sidx = sext i32 %add to i64
2908 // %gep = getelementptr i32, ptr %ptr, i64 %sidx
2909 // as:
2910 // %newptr = getelementptr i32, ptr %ptr, i32 %idx1
2911 // %newgep = getelementptr i32, ptr %newptr, i32 idx2
2912 auto *NewPtr = Builder.CreateGEP(
2913 GEP.getResultElementType(), GEP.getPointerOperand(),
2914 Builder.CreateSExt(Idx1, GEP.getOperand(1)->getType()));
2916 GEP.getResultElementType(), NewPtr,
2917 Builder.CreateSExt(C, GEP.getOperand(1)->getType()));
2918 }
2919 }
2920
2921 if (!GEP.isInBounds()) {
2922 unsigned IdxWidth =
2924 APInt BasePtrOffset(IdxWidth, 0);
2925 Value *UnderlyingPtrOp =
2927 BasePtrOffset);
2928 bool CanBeNull, CanBeFreed;
2929 uint64_t DerefBytes = UnderlyingPtrOp->getPointerDereferenceableBytes(
2930 DL, CanBeNull, CanBeFreed);
2931 if (!CanBeNull && !CanBeFreed && DerefBytes != 0) {
2932 if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
2933 BasePtrOffset.isNonNegative()) {
2934 APInt AllocSize(IdxWidth, DerefBytes);
2935 if (BasePtrOffset.ule(AllocSize)) {
2937 GEP.getSourceElementType(), PtrOp, Indices, GEP.getName());
2938 }
2939 }
2940 }
2941 }
2942
2944 return R;
2945
2946 return nullptr;
2947}
2948
2950 Instruction *AI) {
2951 if (isa<ConstantPointerNull>(V))
2952 return true;
2953 if (auto *LI = dyn_cast<LoadInst>(V))
2954 return isa<GlobalVariable>(LI->getPointerOperand());
2955 // Two distinct allocations will never be equal.
2956 return isAllocLikeFn(V, &TLI) && V != AI;
2957}
2958
2959/// Given a call CB which uses an address UsedV, return true if we can prove the
2960/// call's only possible effect is storing to V.
2961static bool isRemovableWrite(CallBase &CB, Value *UsedV,
2962 const TargetLibraryInfo &TLI) {
2963 if (!CB.use_empty())
2964 // TODO: add recursion if returned attribute is present
2965 return false;
2966
2967 if (CB.isTerminator())
2968 // TODO: remove implementation restriction
2969 return false;
2970
2971 if (!CB.willReturn() || !CB.doesNotThrow())
2972 return false;
2973
2974 // If the only possible side effect of the call is writing to the alloca,
2975 // and the result isn't used, we can safely remove any reads implied by the
2976 // call including those which might read the alloca itself.
2977 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(&CB, TLI);
2978 return Dest && Dest->Ptr == UsedV;
2979}
2980
2983 const TargetLibraryInfo &TLI) {
2985 const std::optional<StringRef> Family = getAllocationFamily(AI, &TLI);
2986 Worklist.push_back(AI);
2987
2988 do {
2989 Instruction *PI = Worklist.pop_back_val();
2990 for (User *U : PI->users()) {
2991 Instruction *I = cast<Instruction>(U);
2992 switch (I->getOpcode()) {
2993 default:
2994 // Give up the moment we see something we can't handle.
2995 return false;
2996
2997 case Instruction::AddrSpaceCast:
2998 case Instruction::BitCast:
2999 case Instruction::GetElementPtr:
3000 Users.emplace_back(I);
3001 Worklist.push_back(I);
3002 continue;
3003
3004 case Instruction::ICmp: {
3005 ICmpInst *ICI = cast<ICmpInst>(I);
3006 // We can fold eq/ne comparisons with null to false/true, respectively.
3007 // We also fold comparisons in some conditions provided the alloc has
3008 // not escaped (see isNeverEqualToUnescapedAlloc).
3009 if (!ICI->isEquality())
3010 return false;
3011 unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0;
3012 if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))
3013 return false;
3014
3015 // Do not fold compares to aligned_alloc calls, as they may have to
3016 // return null in case the required alignment cannot be satisfied,
3017 // unless we can prove that both alignment and size are valid.
3018 auto AlignmentAndSizeKnownValid = [](CallBase *CB) {
3019 // Check if alignment and size of a call to aligned_alloc is valid,
3020 // that is alignment is a power-of-2 and the size is a multiple of the
3021 // alignment.
3022 const APInt *Alignment;
3023 const APInt *Size;
3024 return match(CB->getArgOperand(0), m_APInt(Alignment)) &&
3025 match(CB->getArgOperand(1), m_APInt(Size)) &&
3026 Alignment->isPowerOf2() && Size->urem(*Alignment).isZero();
3027 };
3028 auto *CB = dyn_cast<CallBase>(AI);
3029 LibFunc TheLibFunc;
3030 if (CB && TLI.getLibFunc(*CB->getCalledFunction(), TheLibFunc) &&
3031 TLI.has(TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc &&
3032 !AlignmentAndSizeKnownValid(CB))
3033 return false;
3034 Users.emplace_back(I);
3035 continue;
3036 }
3037
3038 case Instruction::Call:
3039 // Ignore no-op and store intrinsics.
3040 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
3041 switch (II->getIntrinsicID()) {
3042 default:
3043 return false;
3044
3045 case Intrinsic::memmove:
3046 case Intrinsic::memcpy:
3047 case Intrinsic::memset: {
3048 MemIntrinsic *MI = cast<MemIntrinsic>(II);
3049 if (MI->isVolatile() || MI->getRawDest() != PI)
3050 return false;
3051 [[fallthrough]];
3052 }
3053 case Intrinsic::assume:
3054 case Intrinsic::invariant_start:
3055 case Intrinsic::invariant_end:
3056 case Intrinsic::lifetime_start:
3057 case Intrinsic::lifetime_end:
3058 case Intrinsic::objectsize:
3059 Users.emplace_back(I);
3060 continue;
3061 case Intrinsic::launder_invariant_group:
3062 case Intrinsic::strip_invariant_group:
3063 Users.emplace_back(I);
3064 Worklist.push_back(I);
3065 continue;
3066 }
3067 }
3068
3069 if (isRemovableWrite(*cast<CallBase>(I), PI, TLI)) {
3070 Users.emplace_back(I);
3071 continue;
3072 }
3073
3074 if (getFreedOperand(cast<CallBase>(I), &TLI) == PI &&
3075 getAllocationFamily(I, &TLI) == Family) {
3076 assert(Family);
3077 Users.emplace_back(I);
3078 continue;
3079 }
3080
3081 if (getReallocatedOperand(cast<CallBase>(I)) == PI &&
3082 getAllocationFamily(I, &TLI) == Family) {
3083 assert(Family);
3084 Users.emplace_back(I);
3085 Worklist.push_back(I);
3086 continue;
3087 }
3088
3089 return false;
3090
3091 case Instruction::Store: {
3092 StoreInst *SI = cast<StoreInst>(I);
3093 if (SI->isVolatile() || SI->getPointerOperand() != PI)
3094 return false;
3095 Users.emplace_back(I);
3096 continue;
3097 }
3098 }
3099 llvm_unreachable("missing a return?");
3100 }
3101 } while (!Worklist.empty());
3102 return true;
3103}
3104
3106 assert(isa<AllocaInst>(MI) || isRemovableAlloc(&cast<CallBase>(MI), &TLI));
3107
3108 // If we have a malloc call which is only used in any amount of comparisons to
3109 // null and free calls, delete the calls and replace the comparisons with true
3110 // or false as appropriate.
3111
3112 // This is based on the principle that we can substitute our own allocation
3113 // function (which will never return null) rather than knowledge of the
3114 // specific function being called. In some sense this can change the permitted
3115 // outputs of a program (when we convert a malloc to an alloca, the fact that
3116 // the allocation is now on the stack is potentially visible, for example),
3117 // but we believe in a permissible manner.
3119
3120 // If we are removing an alloca with a dbg.declare, insert dbg.value calls
3121 // before each store.
3124 std::unique_ptr<DIBuilder> DIB;
3125 if (isa<AllocaInst>(MI)) {
3126 findDbgUsers(DVIs, &MI, &DPVs);
3127 DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
3128 }
3129
3130 if (isAllocSiteRemovable(&MI, Users, TLI)) {
3131 for (unsigned i = 0, e = Users.size(); i != e; ++i) {
3132 // Lowering all @llvm.objectsize calls first because they may
3133 // use a bitcast/GEP of the alloca we are removing.
3134 if (!Users[i])
3135 continue;
3136
3137 Instruction *I = cast<Instruction>(&*Users[i]);
3138
3139 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
3140 if (II->getIntrinsicID() == Intrinsic::objectsize) {
3141 SmallVector<Instruction *> InsertedInstructions;
3142 Value *Result = lowerObjectSizeCall(
3143 II, DL, &TLI, AA, /*MustSucceed=*/true, &InsertedInstructions);
3144 for (Instruction *Inserted : InsertedInstructions)
3145 Worklist.add(Inserted);
3146 replaceInstUsesWith(*I, Result);
3148 Users[i] = nullptr; // Skip examining in the next loop.
3149 }
3150 }
3151 }
3152 for (unsigned i = 0, e = Users.size(); i != e; ++i) {
3153 if (!Users[i])
3154 continue;
3155
3156 Instruction *I = cast<Instruction>(&*Users[i]);
3157
3158 if (ICmpInst *C = dyn_cast<ICmpInst>(I)) {
3160 ConstantInt::get(Type::getInt1Ty(C->getContext()),
3161 C->isFalseWhenEqual()));
3162 } else if (auto *SI = dyn_cast<StoreInst>(I)) {
3163 for (auto *DVI : DVIs)
3164 if (DVI->isAddressOfVariable())
3165 ConvertDebugDeclareToDebugValue(DVI, SI, *DIB);
3166 for (auto *DPV : DPVs)
3167 if (DPV->isAddressOfVariable())
3168 ConvertDebugDeclareToDebugValue(DPV, SI, *DIB);
3169 } else {
3170 // Casts, GEP, or anything else: we're about to delete this instruction,
3171 // so it can not have any valid uses.
3172 replaceInstUsesWith(*I, PoisonValue::get(I->getType()));
3173 }
3175 }
3176
3177 if (InvokeInst *II = dyn_cast<InvokeInst>(&MI)) {
3178 // Replace invoke with a NOP intrinsic to maintain the original CFG
3179 Module *M = II->getModule();
3180 Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
3181 InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
3182 std::nullopt, "", II->getParent());
3183 }
3184
3185 // Remove debug intrinsics which describe the value contained within the
3186 // alloca. In addition to removing dbg.{declare,addr} which simply point to
3187 // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
3188 //
3189 // ```
3190 // define void @foo(i32 %0) {
3191 // %a = alloca i32 ; Deleted.
3192 // store i32 %0, i32* %a
3193 // dbg.value(i32 %0, "arg0") ; Not deleted.
3194 // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
3195 // call void @trivially_inlinable_no_op(i32* %a)
3196 // ret void
3197 // }
3198 // ```
3199 //
3200 // This may not be required if we stop describing the contents of allocas
3201 // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
3202 // the LowerDbgDeclare utility.
3203 //
3204 // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
3205 // "arg0" dbg.value may be stale after the call. However, failing to remove
3206 // the DW_OP_deref dbg.value causes large gaps in location coverage.
3207 //
3208 // FIXME: the Assignment Tracking project has now likely made this
3209 // redundant (and it's sometimes harmful).
3210 for (auto *DVI : DVIs)
3211 if (DVI->isAddressOfVariable() || DVI->getExpression()->startsWithDeref())
3212 DVI->eraseFromParent();
3213 for (auto *DPV : DPVs)
3214 if (DPV->isAddressOfVariable() || DPV->getExpression()->startsWithDeref())
3215 DPV->eraseFromParent();
3216
3217 return eraseInstFromFunction(MI);
3218 }
3219 return nullptr;
3220}
3221
3222/// Move the call to free before a NULL test.
3223///
3224/// Check if this free is accessed after its argument has been test
3225/// against NULL (property 0).
3226/// If yes, it is legal to move this call in its predecessor block.
3227///
3228/// The move is performed only if the block containing the call to free
3229/// will be removed, i.e.:
3230/// 1. it has only one predecessor P, and P has two successors
3231/// 2. it contains the call, noops, and an unconditional branch
3232/// 3. its successor is the same as its predecessor's successor
3233///
3234/// The profitability is out-of concern here and this function should
3235/// be called only if the caller knows this transformation would be
3236/// profitable (e.g., for code size).
3238 const DataLayout &DL) {
3239 Value *Op = FI.getArgOperand(0);
3240 BasicBlock *FreeInstrBB = FI.getParent();
3241 BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
3242
3243 // Validate part of constraint #1: Only one predecessor
3244 // FIXME: We can extend the number of predecessor, but in that case, we
3245 // would duplicate the call to free in each predecessor and it may
3246 // not be profitable even for code size.
3247 if (!PredBB)
3248 return nullptr;
3249
3250 // Validate constraint #2: Does this block contains only the call to
3251 // free, noops, and an unconditional branch?
3252 BasicBlock *SuccBB;
3253 Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
3254 if (!match(FreeInstrBBTerminator, m_UnconditionalBr(SuccBB)))
3255 return nullptr;
3256
3257 // If there are only 2 instructions in the block, at this point,
3258 // this is the call to free and unconditional.
3259 // If there are more than 2 instructions, check that they are noops
3260 // i.e., they won't hurt the performance of the generated code.
3261 if (FreeInstrBB->size() != 2) {
3262 for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) {
3263 if (&Inst == &FI || &Inst == FreeInstrBBTerminator)
3264 continue;
3265 auto *Cast = dyn_cast<CastInst>(&Inst);
3266 if (!Cast || !Cast->isNoopCast(DL))
3267 return nullptr;
3268 }
3269 }
3270 // Validate the rest of constraint #1 by matching on the pred branch.
3271 Instruction *TI = PredBB->getTerminator();
3272 BasicBlock *TrueBB, *FalseBB;
3274 if (!match(TI, m_Br(m_ICmp(Pred,
3276 m_Specific(Op->stripPointerCasts())),
3277 m_Zero()),
3278 TrueBB, FalseBB)))
3279 return nullptr;
3280 if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
3281 return nullptr;
3282
3283 // Validate constraint #3: Ensure the null case just falls through.
3284 if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
3285 return nullptr;
3286 assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
3287 "Broken CFG: missing edge from predecessor to successor");
3288
3289 // At this point, we know that everything in FreeInstrBB can be moved
3290 // before TI.
3291 for (Instruction &Instr : llvm::make_early_inc_range(*FreeInstrBB)) {
3292 if (&Instr == FreeInstrBBTerminator)
3293 break;
3294 Instr.moveBeforePreserving(TI);
3295 }
3296 assert(FreeInstrBB->size() == 1 &&
3297 "Only the branch instruction should remain");
3298
3299 // Now that we've moved the call to free before the NULL check, we have to
3300 // remove any attributes on its parameter that imply it's non-null, because
3301 // those attributes might have only been valid because of the NULL check, and
3302 // we can get miscompiles if we keep them. This is conservative if non-null is
3303 // also implied by something other than the NULL check, but it's guaranteed to
3304 // be correct, and the conservativeness won't matter in practice, since the
3305 // attributes are irrelevant for the call to free itself and the pointer
3306 // shouldn't be used after the call.
3307 AttributeList Attrs = FI.getAttributes();
3308 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::NonNull);
3309 Attribute Dereferenceable = Attrs.getParamAttr(0, Attribute::Dereferenceable);
3310 if (Dereferenceable.isValid()) {
3311 uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
3312 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0,
3313 Attribute::Dereferenceable);
3314 Attrs = Attrs.addDereferenceableOrNullParamAttr(FI.getContext(), 0, Bytes);
3315 }
3316 FI.setAttributes(Attrs);
3317
3318 return &FI;
3319}
3320
3322 // free undef -> unreachable.
3323 if (isa<UndefValue>(Op)) {
3324 // Leave a marker since we can't modify the CFG here.
3326 return eraseInstFromFunction(FI);
3327 }
3328
3329 // If we have 'free null' delete the instruction. This can happen in stl code
3330 // when lots of inlining happens.
3331 if (isa<ConstantPointerNull>(Op))
3332 return eraseInstFromFunction(FI);
3333
3334 // If we had free(realloc(...)) with no intervening uses, then eliminate the
3335 // realloc() entirely.
3336 CallInst *CI = dyn_cast<CallInst>(Op);
3337 if (CI && CI->hasOneUse())
3338 if (Value *ReallocatedOp = getReallocatedOperand(CI))
3339 return eraseInstFromFunction(*replaceInstUsesWith(*CI, ReallocatedOp));
3340
3341 // If we optimize for code size, try to move the call to free before the null
3342 // test so that simplify cfg can remove the empty block and dead code
3343 // elimination the branch. I.e., helps to turn something like:
3344 // if (foo) free(foo);
3345 // into
3346 // free(foo);
3347 //
3348 // Note that we can only do this for 'free' and not for any flavor of
3349 // 'operator delete'; there is no 'operator delete' symbol for which we are
3350 // permitted to invent a call, even if we're passing in a null pointer.
3351 if (MinimizeSize) {
3352 LibFunc Func;
3353 if (TLI.getLibFunc(FI, Func) && TLI.has(Func) && Func == LibFunc_free)
3355 return I;
3356 }
3357
3358 return nullptr;
3359}
3360
3362 Value *RetVal = RI.getReturnValue();
3363 if (!RetVal || !AttributeFuncs::isNoFPClassCompatibleType(RetVal->getType()))
3364 return nullptr;
3365
3366 Function *F = RI.getFunction();
3367 FPClassTest ReturnClass = F->getAttributes().getRetNoFPClass();
3368 if (ReturnClass == fcNone)
3369 return nullptr;
3370
3371 KnownFPClass KnownClass;
3372 Value *Simplified =
3373 SimplifyDemandedUseFPClass(RetVal, ~ReturnClass, KnownClass, 0, &RI);
3374 if (!Simplified)
3375 return nullptr;
3376
3377 return ReturnInst::Create(RI.getContext(), Simplified);
3378}
3379
3380// WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
3382 // Try to remove the previous instruction if it must lead to unreachable.
3383 // This includes instructions like stores and "llvm.assume" that may not get
3384 // removed by simple dead code elimination.
3385 bool Changed = false;
3386 while (Instruction *Prev = I.getPrevNonDebugInstruction()) {
3387 // While we theoretically can erase EH, that would result in a block that
3388 // used to start with an EH no longer starting with EH, which is invalid.
3389 // To make it valid, we'd need to fixup predecessors to no longer refer to
3390 // this block, but that changes CFG, which is not allowed in InstCombine.
3391 if (Prev->isEHPad())
3392 break; // Can not drop any more instructions. We're done here.
3393
3395 break; // Can not drop any more instructions. We're done here.
3396 // Otherwise, this instruction can be freely erased,
3397 // even if it is not side-effect free.
3398
3399 // A value may still have uses before we process it here (for example, in
3400 // another unreachable block), so convert those to poison.
3401 replaceInstUsesWith(*Prev, PoisonValue::get(Prev->getType()));
3402 eraseInstFromFunction(*Prev);
3403 Changed = true;
3404 }
3405 return Changed;
3406}
3407
3410 return nullptr;
3411}
3412
3414 assert(BI.isUnconditional() && "Only for unconditional branches.");
3415
3416 // If this store is the second-to-last instruction in the basic block
3417 // (excluding debug info and bitcasts of pointers) and if the block ends with
3418 // an unconditional branch, try to move the store to the successor block.
3419
3420 auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
3421 auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) {
3422 return BBI->isDebugOrPseudoInst() ||
3423 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy());
3424 };
3425
3426 BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
3427 do {
3428 if (BBI != FirstInstr)
3429 --BBI;
3430 } while (BBI != FirstInstr && IsNoopInstrForStoreMerging(BBI));
3431
3432 return dyn_cast<StoreInst>(BBI);
3433 };
3434
3435 if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
3436 if (mergeStoreIntoSuccessor(*SI))
3437 return &BI;
3438
3439 return nullptr;
3440}
3441
3444 if (!DeadEdges.insert({From, To}).second)
3445 return;
3446
3447 // Replace phi node operands in successor with poison.
3448 for (PHINode &PN : To->phis())
3449 for (Use &U : PN.incoming_values())
3450 if (PN.getIncomingBlock(U) == From && !isa<PoisonValue>(U)) {
3451 replaceUse(U, PoisonValue::get(PN.getType()));
3452 addToWorklist(&PN);
3453 MadeIRChange = true;
3454 }
3455
3456 Worklist.push_back(To);
3457}
3458
3459// Under the assumption that I is unreachable, remove it and following
3460// instructions. Changes are reported directly to MadeIRChange.
3463 BasicBlock *BB = I->getParent();
3464 for (Instruction &Inst : make_early_inc_range(
3465 make_range(std::next(BB->getTerminator()->getReverseIterator()),
3466 std::next(I->getReverseIterator())))) {
3467 if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) {
3468 replaceInstUsesWith(Inst, PoisonValue::get(Inst.getType()));
3469 MadeIRChange = true;
3470 }
3471 if (Inst.isEHPad() || Inst.getType()->isTokenTy())
3472 continue;
3473 // RemoveDIs: erase debug-info on this instruction manually.
3474 Inst.dropDbgRecords();
3476 MadeIRChange = true;
3477 }
3478
3479 SmallVector<Value *> Changed;
3480 if (handleUnreachableTerminator(BB->getTerminator(), Changed)) {
3481 MadeIRChange = true;
3482 for (Value *V : Changed)
3483 addToWorklist(cast<Instruction>(V));
3484 }
3485
3486 // Handle potentially dead successors.
3487 for (BasicBlock *Succ : successors(BB))
3488 addDeadEdge(BB, Succ, Worklist);
3489}
3490
3493 while (!Worklist.empty()) {
3494 BasicBlock *BB = Worklist.pop_back_val();
3495 if (!all_of(predecessors(BB), [&](BasicBlock *Pred) {
3496 return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred);
3497 }))
3498 continue;
3499
3501 }
3502}
3503
3505 BasicBlock *LiveSucc) {
3507 for (BasicBlock *Succ : successors(BB)) {
3508 // The live successor isn't dead.
3509 if (Succ == LiveSucc)
3510 continue;
3511
3512 addDeadEdge(BB, Succ, Worklist);
3513 }
3514
3516}
3517
3519 if (BI.isUnconditional())
3521
3522 // Change br (not X), label True, label False to: br X, label False, True
3523 Value *Cond = BI.getCondition();
3524 Value *X;
3525 if (match(Cond, m_Not(m_Value(X))) && !isa<Constant>(X)) {
3526 // Swap Destinations and condition...
3527 BI.swapSuccessors();
3528 return replaceOperand(BI, 0, X);
3529 }
3530
3531 // Canonicalize logical-and-with-invert as logical-or-with-invert.
3532 // This is done by inverting the condition and swapping successors:
3533 // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T
3534 Value *Y;
3535 if (isa<SelectInst>(Cond) &&
3536 match(Cond,
3538 Value *NotX = Builder.CreateNot(X, "not." + X->getName());
3539 Value *Or = Builder.CreateLogicalOr(NotX, Y);
3540 BI.swapSuccessors();
3541 return replaceOperand(BI, 0, Or);
3542 }
3543
3544 // If the condition is irrelevant, remove the use so that other
3545 // transforms on the condition become more effective.
3546 if (!isa<ConstantInt>(Cond) && BI.getSuccessor(0) == BI.getSuccessor(1))
3547 return replaceOperand(BI, 0, ConstantInt::getFalse(Cond->getType()));
3548
3549 // Canonicalize, for example, fcmp_one -> fcmp_oeq.
3550 CmpInst::Predicate Pred;
3551 if (match(Cond, m_OneUse(m_FCmp(Pred, m_Value(), m_Value()))) &&
3552 !isCanonicalPredicate(Pred)) {
3553 // Swap destinations and condition.
3554 auto *Cmp = cast<CmpInst>(Cond);
3555 Cmp->setPredicate(CmpInst::getInversePredicate(Pred));
3556 BI.swapSuccessors();
3557 Worklist.push(Cmp);
3558 return &BI;
3559 }
3560
3561 if (isa<UndefValue>(Cond)) {
3562 handlePotentiallyDeadSuccessors(BI.getParent(), /*LiveSucc*/ nullptr);
3563 return nullptr;
3564 }
3565 if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
3567 BI.getSuccessor(!CI->getZExtValue()));
3568 return nullptr;
3569 }
3570
3571 DC.registerBranch(&BI);
3572 return nullptr;
3573}
3574
3576 Value *Cond = SI.getCondition();
3577 Value *Op0;
3578 ConstantInt *AddRHS;
3579 if (match(Cond, m_Add(m_Value(Op0), m_ConstantInt(AddRHS)))) {
3580 // Change 'switch (X+4) case 1:' into 'switch (X) case -3'.
3581 for (auto Case : SI.cases()) {
3582 Constant *NewCase = ConstantExpr::getSub(Case.getCaseValue(), AddRHS);
3583 assert(isa<ConstantInt>(NewCase) &&
3584 "Result of expression should be constant");
3585 Case.setValue(cast<ConstantInt>(NewCase));
3586 }
3587 return replaceOperand(SI, 0, Op0);
3588 }
3589
3590 ConstantInt *SubLHS;
3591 if (match(Cond, m_Sub(m_ConstantInt(SubLHS), m_Value(Op0)))) {
3592 // Change 'switch (1-X) case 1:' into 'switch (X) case 0'.
3593 for (auto Case : SI.cases()) {
3594 Constant *NewCase = ConstantExpr::getSub(SubLHS, Case.getCaseValue());
3595 assert(isa<ConstantInt>(NewCase) &&
3596 "Result of expression should be constant");
3597 Case.setValue(cast<ConstantInt>(NewCase));
3598 }
3599 return replaceOperand(SI, 0, Op0);
3600 }
3601
3602 uint64_t ShiftAmt;
3603 if (match(Cond, m_Shl(m_Value(Op0), m_ConstantInt(ShiftAmt))) &&
3604 ShiftAmt < Op0->getType()->getScalarSizeInBits() &&
3605 all_of(SI.cases(), [&](const auto &Case) {
3606 return Case.getCaseValue()->getValue().countr_zero() >= ShiftAmt;
3607 })) {
3608 // Change 'switch (X << 2) case 4:' into 'switch (X) case 1:'.
3609 OverflowingBinaryOperator *Shl = cast<OverflowingBinaryOperator>(Cond);
3610 if (Shl->hasNoUnsignedWrap() || Shl->hasNoSignedWrap() ||
3611 Shl->hasOneUse()) {
3612 Value *NewCond = Op0;
3613 if (!Shl->hasNoUnsignedWrap() && !Shl->hasNoSignedWrap()) {
3614 // If the shift may wrap, we need to mask off the shifted bits.
3615 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
3616 NewCond = Builder.CreateAnd(
3617 Op0, APInt::getLowBitsSet(BitWidth, BitWidth - ShiftAmt));
3618 }
3619 for (auto Case : SI.cases()) {
3620 const APInt &CaseVal = Case.getCaseValue()->getValue();
3621 APInt ShiftedCase = Shl->hasNoSignedWrap() ? CaseVal.ashr(ShiftAmt)
3622 : CaseVal.lshr(ShiftAmt);
3623 Case.setValue(ConstantInt::get(SI.getContext(), ShiftedCase));
3624 }
3625 return replaceOperand(SI, 0, NewCond);
3626 }
3627 }
3628
3629 // Fold switch(zext/sext(X)) into switch(X) if possible.
3630 if (match(Cond, m_ZExtOrSExt(m_Value(Op0)))) {
3631 bool IsZExt = isa<ZExtInst>(Cond);
3632 Type *SrcTy = Op0->getType();
3633 unsigned NewWidth = SrcTy->getScalarSizeInBits();
3634
3635 if (all_of(SI.cases(), [&](const auto &Case) {
3636 const APInt &CaseVal = Case.getCaseValue()->getValue();
3637 return IsZExt ? CaseVal.isIntN(NewWidth)
3638 : CaseVal.isSignedIntN(NewWidth);
3639 })) {
3640 for (auto &Case : SI.cases()) {
3641 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
3642 Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
3643 }
3644 return replaceOperand(SI, 0, Op0);
3645 }
3646 }
3647
3648 KnownBits Known = computeKnownBits(Cond, 0, &SI);
3649 unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
3650 unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
3651
3652 // Compute the number of leading bits we can ignore.
3653 // TODO: A better way to determine this would use ComputeNumSignBits().
3654 for (const auto &C : SI.cases()) {
3655 LeadingKnownZeros =
3656 std::min(LeadingKnownZeros, C.getCaseValue()->getValue().countl_zero());
3657 LeadingKnownOnes =
3658 std::min(LeadingKnownOnes, C.getCaseValue()->getValue().countl_one());
3659 }
3660
3661 unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
3662
3663 // Shrink the condition operand if the new type is smaller than the old type.
3664 // But do not shrink to a non-standard type, because backend can't generate
3665 // good code for that yet.
3666 // TODO: We can make it aggressive again after fixing PR39569.
3667 if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
3668 shouldChangeType(Known.getBitWidth(), NewWidth)) {
3669 IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
3671 Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc");
3672
3673 for (auto Case : SI.cases()) {
3674 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
3675 Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
3676 }
3677 return replaceOperand(SI, 0, NewCond);
3678 }
3679
3680 if (isa<UndefValue>(Cond)) {
3681 handlePotentiallyDeadSuccessors(SI.getParent(), /*LiveSucc*/ nullptr);
3682 return nullptr;
3683 }
3684 if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
3685 handlePotentiallyDeadSuccessors(SI.getParent(),
3686 SI.findCaseValue(CI)->getCaseSuccessor());
3687 return nullptr;
3688 }
3689
3690 return nullptr;
3691}
3692
3694InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
3695 auto *WO = dyn_cast<WithOverflowInst>(EV.getAggregateOperand());
3696 if (!WO)
3697 return nullptr;
3698
3699 Intrinsic::ID OvID = WO->getIntrinsicID();
3700 const APInt *C = nullptr;
3701 if (match(WO->getRHS(), m_APIntAllowUndef(C))) {
3702 if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow ||
3703 OvID == Intrinsic::umul_with_overflow)) {
3704 // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
3705 if (C->isAllOnes())
3706 return BinaryOperator::CreateNeg(WO->getLHS());
3707 // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
3708 if (C->isPowerOf2()) {
3709 return BinaryOperator::CreateShl(
3710 WO->getLHS(),
3711 ConstantInt::get(WO->getLHS()->getType(), C->logBase2()));
3712 }
3713 }
3714 }
3715
3716 // We're extracting from an overflow intrinsic. See if we're the only user.
3717 // That allows us to simplify multiple result intrinsics to simpler things
3718 // that just get one value.
3719 if (!WO->hasOneUse())
3720 return nullptr;
3721
3722 // Check if we're grabbing only the result of a 'with overflow' intrinsic
3723 // and replace it with a traditional binary instruction.
3724 if (*EV.idx_begin() == 0) {
3725 Instruction::BinaryOps BinOp = WO->getBinaryOp();
3726 Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
3727 // Replace the old instruction's uses with poison.
3728 replaceInstUsesWith(*WO, PoisonValue::get(WO->getType()));
3730 return BinaryOperator::Create(BinOp, LHS, RHS);
3731 }
3732
3733 assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst");
3734
3735 // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS.
3736 if (OvID == Intrinsic::usub_with_overflow)
3737 return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS());
3738
3739 // smul with i1 types overflows when both sides are set: -1 * -1 == +1, but
3740 // +1 is not possible because we assume signed values.
3741 if (OvID == Intrinsic::smul_with_overflow &&
3742 WO->getLHS()->getType()->isIntOrIntVectorTy(1))
3743 return BinaryOperator::CreateAnd(WO->getLHS(), WO->getRHS());
3744
3745 // extractvalue (umul_with_overflow X, X), 1 -> X u> 2^(N/2)-1
3746 if (OvID == Intrinsic::umul_with_overflow && WO->getLHS() == WO->getRHS()) {
3747 unsigned BitWidth = WO->getLHS()->getType()->getScalarSizeInBits();
3748 // Only handle even bitwidths for performance reasons.
3749 if (BitWidth % 2 == 0)
3750 return new ICmpInst(
3751 ICmpInst::ICMP_UGT, WO->getLHS(),
3752 ConstantInt::get(WO->getLHS()->getType(),
3754 }
3755
3756 // If only the overflow result is used, and the right hand side is a
3757 // constant (or constant splat), we can remove the intrinsic by directly
3758 // checking for overflow.
3759 if (C) {
3760 // Compute the no-wrap range for LHS given RHS=C, then construct an
3761 // equivalent icmp, potentially using an offset.
3763 WO->getBinaryOp(), *C, WO->getNoWrapKind());
3764
3765 CmpInst::Predicate Pred;
3766 APInt NewRHSC, Offset;
3767 NWR.getEquivalentICmp(Pred, NewRHSC, Offset);
3768 auto *OpTy = WO->getRHS()->getType();
3769 auto *NewLHS = WO->getLHS();
3770 if (Offset != 0)
3771 NewLHS = Builder.CreateAdd(NewLHS, ConstantInt::get(OpTy, Offset));
3772 return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS,
3773 ConstantInt::get(OpTy, NewRHSC));
3774 }
3775
3776 return nullptr;
3777}
3778
3780 Value *Agg = EV.getAggregateOperand();
3781
3782 if (!EV.hasIndices())
3783 return replaceInstUsesWith(EV, Agg);
3784
3785 if (Value *V = simplifyExtractValueInst(Agg, EV.getIndices(),
3786 SQ.getWithInstruction(&EV)))
3787 return replaceInstUsesWith(EV, V);
3788
3789 if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
3790 // We're extracting from an insertvalue instruction, compare the indices
3791 const unsigned *exti, *exte, *insi, *inse;
3792 for (exti = EV.idx_begin(), insi = IV->idx_begin(),
3793 exte = EV.idx_end(), inse = IV->idx_end();
3794 exti != exte && insi != inse;
3795 ++exti, ++insi) {
3796 if (*insi != *exti)
3797 // The insert and extract both reference distinctly different elements.
3798 // This means the extract is not influenced by the insert, and we can
3799 // replace the aggregate operand of the extract with the aggregate
3800 // operand of the insert. i.e., replace
3801 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3802 // %E = extractvalue { i32, { i32 } } %I, 0
3803 // with
3804 // %E = extractvalue { i32, { i32 } } %A, 0
3805 return ExtractValueInst::Create(IV->getAggregateOperand(),
3806 EV.getIndices());
3807 }
3808 if (exti == exte && insi == inse)
3809 // Both iterators are at the end: Index lists are identical. Replace
3810 // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3811 // %C = extractvalue { i32, { i32 } } %B, 1, 0
3812 // with "i32 42"
3813 return replaceInstUsesWith(EV, IV->getInsertedValueOperand());
3814 if (exti == exte) {
3815 // The extract list is a prefix of the insert list. i.e. replace
3816 // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3817 // %E = extractvalue { i32, { i32 } } %I, 1
3818 // with
3819 // %X = extractvalue { i32, { i32 } } %A, 1
3820 // %E = insertvalue { i32 } %X, i32 42, 0
3821 // by switching the order of the insert and extract (though the
3822 // insertvalue should be left in, since it may have other uses).
3823 Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(),
3824 EV.getIndices());
3825 return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
3826 ArrayRef(insi, inse));
3827 }
3828 if (insi == inse)
3829 // The insert list is a prefix of the extract list
3830 // We can simply remove the common indices from the extract and make it
3831 // operate on the inserted value instead of the insertvalue result.
3832 // i.e., replace
3833 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3834 // %E = extractvalue { i32, { i32 } } %I, 1, 0
3835 // with
3836 // %E extractvalue { i32 } { i32 42 }, 0
3837 return ExtractValueInst::Create(IV->getInsertedValueOperand(),
3838 ArrayRef(exti, exte));
3839 }
3840
3841 if (Instruction *R = foldExtractOfOverflowIntrinsic(EV))
3842 return R;
3843
3844 if (LoadInst *L = dyn_cast<LoadInst>(Agg)) {
3845 // Bail out if the aggregate contains scalable vector type
3846 if (auto *STy = dyn_cast<StructType>(Agg->getType());
3847 STy && STy->containsScalableVectorType())
3848 return nullptr;
3849
3850 // If the (non-volatile) load only has one use, we can rewrite this to a
3851 // load from a GEP. This reduces the size of the load. If a load is used
3852 // only by extractvalue instructions then this either must have been
3853 // optimized before, or it is a struct with padding, in which case we
3854 // don't want to do the transformation as it loses padding knowledge.
3855 if (L->isSimple() && L->hasOneUse()) {
3856 // extractvalue has integer indices, getelementptr has Value*s. Convert.
3857 SmallVector<Value*, 4> Indices;
3858 // Prefix an i32 0 since we need the first element.
3859 Indices.push_back(Builder.getInt32(0));
3860 for (unsigned Idx : EV.indices())
3861 Indices.push_back(Builder.getInt32(Idx));
3862
3863 // We need to insert these at the location of the old load, not at that of
3864 // the extractvalue.
3866 Value *GEP = Builder.CreateInBoundsGEP(L->getType(),
3867 L->getPointerOperand(), Indices);
3869 // Whatever aliasing information we had for the orignal load must also
3870 // hold for the smaller load, so propagate the annotations.
3871 NL->setAAMetadata(L->getAAMetadata());
3872 // Returning the load directly will cause the main loop to insert it in
3873 // the wrong spot, so use replaceInstUsesWith().
3874 return replaceInstUsesWith(EV, NL);
3875 }
3876 }
3877
3878 if (auto *PN = dyn_cast<PHINode>(Agg))
3879 if (Instruction *Res = foldOpIntoPhi(EV, PN))
3880 return Res;
3881
3882 // Canonicalize extract (select Cond, TV, FV)
3883 // -> select cond, (extract TV), (extract FV)
3884 if (auto *SI = dyn_cast<SelectInst>(Agg))
3885 if (Instruction *R = FoldOpIntoSelect(EV, SI, /*FoldWithMultiUse=*/true))
3886 return R;
3887
3888 // We could simplify extracts from other values. Note that nested extracts may
3889 // already be simplified implicitly by the above: extract (extract (insert) )
3890 // will be translated into extract ( insert ( extract ) ) first and then just
3891 // the value inserted, if appropriate. Similarly for extracts from single-use
3892 // loads: extract (extract (load)) will be translated to extract (load (gep))
3893 // and if again single-use then via load (gep (gep)) to load (gep).
3894 // However, double extracts from e.g. function arguments or return values
3895 // aren't handled yet.
3896 return nullptr;
3897}
3898
3899/// Return 'true' if the given typeinfo will match anything.
3900static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
3901 switch (Personality) {
3905 // The GCC C EH and Rust personality only exists to support cleanups, so
3906 // it's not clear what the semantics of catch clauses are.
3907 return false;
3909 return false;
3911 // While __gnat_all_others_value will match any Ada exception, it doesn't
3912 // match foreign exceptions (or didn't, before gcc-4.7).
3913 return false;
3923 return TypeInfo->isNullValue();
3924 }
3925 llvm_unreachable("invalid enum");
3926}
3927
3928static bool shorter_filter(const Value *LHS, const Value *RHS) {
3929 return
3930 cast<ArrayType>(LHS->getType())->getNumElements()
3931 <
3932 cast<ArrayType>(RHS->getType())->getNumElements();
3933}
3934
3936 // The logic here should be correct for any real-world personality function.
3937 // However if that turns out not to be true, the offending logic can always
3938 // be conditioned on the personality function, like the catch-all logic is.
3939 EHPersonality Personality =
3940 classifyEHPersonality(LI.getParent()->getParent()->getPersonalityFn());
3941
3942 // Simplify the list of clauses, eg by removing repeated catch clauses
3943 // (these are often created by inlining).
3944 bool MakeNewInstruction = false; // If true, recreate using the following:
3945 SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
3946 bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
3947
3948 SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
3949 for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
3950 bool isLastClause = i + 1 == e;
3951 if (LI.isCatch(i)) {
3952 // A catch clause.
3953 Constant *CatchClause = LI.getClause(i);
3954 Constant *TypeInfo = CatchClause->stripPointerCasts();
3955
3956 // If we already saw this clause, there is no point in having a second
3957 // copy of it.
3958 if (AlreadyCaught.insert(TypeInfo).second) {
3959 // This catch clause was not already seen.
3960 NewClauses.push_back(CatchClause);
3961 } else {
3962 // Repeated catch clause - drop the redundant copy.
3963 MakeNewInstruction = true;
3964 }
3965
3966 // If this is a catch-all then there is no point in keeping any following
3967 // clauses or marking the landingpad as having a cleanup.
3968 if (isCatchAll(Personality, TypeInfo)) {
3969 if (!isLastClause)
3970 MakeNewInstruction = true;
3971 CleanupFlag = false;
3972 break;
3973 }
3974 } else {
3975 // A filter clause. If any of the filter elements were already caught
3976 // then they can be dropped from the filter. It is tempting to try to
3977 // exploit the filter further by saying that any typeinfo that does not
3978 // occur in the filter can't be caught later (and thus can be dropped).
3979 // However this would be wrong, since typeinfos can match without being
3980 // equal (for example if one represents a C++ class, and the other some
3981 // class derived from it).
3982 assert(LI.isFilter(i) && "Unsupported landingpad clause!");
3983 Constant *FilterClause = LI.getClause(i);
3984 ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
3985 unsigned NumTypeInfos = FilterType->getNumElements();
3986
3987 // An empty filter catches everything, so there is no point in keeping any
3988 // following clauses or marking the landingpad as having a cleanup. By
3989 // dealing with this case here the following code is made a bit simpler.
3990 if (!NumTypeInfos) {
3991 NewClauses.push_back(FilterClause);
3992 if (!isLastClause)
3993 MakeNewInstruction = true;
3994 CleanupFlag = false;
3995 break;
3996 }
3997
3998 bool MakeNewFilter = false; // If true, make a new filter.
3999 SmallVector<Constant *, 16> NewFilterElts; // New elements.
4000 if (isa<ConstantAggregateZero>(FilterClause)) {
4001 // Not an empty filter - it contains at least one null typeinfo.
4002 assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
4003 Constant *TypeInfo =
4005 // If this typeinfo is a catch-all then the filter can never match.
4006 if (isCatchAll(Personality, TypeInfo)) {
4007 // Throw the filter away.
4008 MakeNewInstruction = true;
4009 continue;
4010 }
4011
4012 // There is no point in having multiple copies of this typeinfo, so
4013 // discard all but the first copy if there is more than one.
4014 NewFilterElts.push_back(TypeInfo);
4015 if (NumTypeInfos > 1)
4016 MakeNewFilter = true;
4017 } else {
4018 ConstantArray *Filter = cast<ConstantArray>(FilterClause);
4019 SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
4020 NewFilterElts.reserve(NumTypeInfos);
4021
4022 // Remove any filter elements that were already caught or that already
4023 // occurred in the filter. While there, see if any of the elements are
4024 // catch-alls. If so, the filter can be discarded.
4025 bool SawCatchAll = false;
4026 for (unsigned j = 0; j != NumTypeInfos; ++j) {
4027 Constant *Elt = Filter->getOperand(j);
4028 Constant *TypeInfo = Elt->stripPointerCasts();
4029 if (isCatchAll(Personality, TypeInfo)) {
4030 // This element is a catch-all. Bail out, noting this fact.
4031 SawCatchAll = true;
4032 break;
4033 }
4034
4035 // Even if we've seen a type in a catch clause, we don't want to
4036 // remove it from the filter. An unexpected type handler may be
4037 // set up for a call site which throws an exception of the same
4038 // type caught. In order for the exception thrown by the unexpected
4039 // handler to propagate correctly, the filter must be correctly
4040 // described for the call site.
4041 //
4042 // Example:
4043 //
4044 // void unexpected() { throw 1;}
4045 // void foo() throw (int) {
4046 // std::set_unexpected(unexpected);
4047 // try {
4048 // throw 2.0;
4049 // } catch (int i) {}
4050 // }
4051
4052 // There is no point in having multiple copies of the same typeinfo in
4053 // a filter, so only add it if we didn't already.
4054 if (SeenInFilter.insert(TypeInfo).second)
4055 NewFilterElts.push_back(cast<Constant>(Elt));
4056 }
4057 // A filter containing a catch-all cannot match anything by definition.
4058 if (SawCatchAll) {
4059 // Throw the filter away.
4060 MakeNewInstruction = true;
4061 continue;
4062 }
4063
4064 // If we dropped something from the filter, make a new one.
4065 if (NewFilterElts.size() < NumTypeInfos)
4066 MakeNewFilter = true;
4067 }
4068 if (MakeNewFilter) {
4069 FilterType = ArrayType::get(FilterType->getElementType(),
4070 NewFilterElts.size());
4071 FilterClause = ConstantArray::get(FilterType, NewFilterElts);
4072 MakeNewInstruction = true;
4073 }
4074
4075 NewClauses.push_back(FilterClause);
4076
4077 // If the new filter is empty then it will catch everything so there is
4078 // no point in keeping any following clauses or marking the landingpad
4079 // as having a cleanup. The case of the original filter being empty was
4080 // already handled above.
4081 if (MakeNewFilter && !NewFilterElts.size()) {
4082 assert(MakeNewInstruction && "New filter but not a new instruction!");
4083 CleanupFlag = false;
4084 break;
4085 }
4086 }
4087 }
4088
4089 // If several filters occur in a row then reorder them so that the shortest
4090 // filters come first (those with the smallest number of elements). This is
4091 // advantageous because shorter filters are more likely to match, speeding up
4092 // unwinding, but mostly because it increases the effectiveness of the other
4093 // filter optimizations below.
4094 for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
4095 unsigned j;
4096 // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
4097 for (j = i; j != e; ++j)
4098 if (!isa<ArrayType>(NewClauses[j]->getType()))
4099 break;
4100
4101 // Check whether the filters are already sorted by length. We need to know
4102 // if sorting them is actually going to do anything so that we only make a
4103 // new landingpad instruction if it does.
4104 for (unsigned k = i; k + 1 < j; ++k)
4105 if (shorter_filter(NewClauses[k+1], NewClauses[k])) {
4106 // Not sorted, so sort the filters now. Doing an unstable sort would be
4107 // correct too but reordering filters pointlessly might confuse users.
4108 std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j,
4110 MakeNewInstruction = true;
4111 break;
4112 }
4113
4114 // Look for the next batch of filters.
4115 i = j + 1;
4116 }
4117
4118 // If typeinfos matched if and only if equal, then the elements of a filter L
4119 // that occurs later than a filter F could be replaced by the intersection of
4120 // the elements of F and L. In reality two typeinfos can match without being
4121 // equal (for example if one represents a C++ class, and the other some class
4122 // derived from it) so it would be wrong to perform this transform in general.
4123 // However the transform is correct and useful if F is a subset of L. In that
4124 // case L can be replaced by F, and thus removed altogether since repeating a
4125 // filter is pointless. So here we look at all pairs of filters F and L where
4126 // L follows F in the list of clauses, and remove L if every element of F is
4127 // an element of L. This can occur when inlining C++ functions with exception
4128 // specifications.
4129 for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
4130 // Examine each filter in turn.
4131 Value *Filter = NewClauses[i];
4132 ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType());
4133 if (!FTy)
4134 // Not a filter - skip it.
4135 continue;
4136 unsigned FElts = FTy->getNumElements();
4137 // Examine each filter following this one. Doing this backwards means that
4138 // we don't have to worry about filters disappearing under us when removed.
4139 for (unsigned j = NewClauses.size() - 1; j != i; --j) {
4140 Value *LFilter = NewClauses[j];
4141 ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType());
4142 if (!LTy)
4143 // Not a filter - skip it.
4144 continue;
4145 // If Filter is a subset of LFilter, i.e. every element of Filter is also
4146 // an element of LFilter, then discard LFilter.
4147 SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
4148 // If Filter is empty then it is a subset of LFilter.
4149 if (!FElts) {
4150 // Discard LFilter.
4151 NewClauses.erase(J);
4152 MakeNewInstruction = true;
4153 // Move on to the next filter.
4154 continue;
4155 }
4156 unsigned LElts = LTy->getNumElements();
4157 // If Filter is longer than LFilter then it cannot be a subset of it.
4158 if (FElts > LElts)
4159 // Move on to the next filter.
4160 continue;
4161 // At this point we know that LFilter has at least one element.
4162 if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros.
4163 // Filter is a subset of LFilter iff Filter contains only zeros (as we
4164 // already know that Filter is not longer than LFilter).
4165 if (isa<ConstantAggregateZero>(Filter)) {
4166 assert(FElts <= LElts && "Should have handled this case earlier!");
4167 // Discard LFilter.
4168 NewClauses.erase(J);
4169 MakeNewInstruction = true;
4170 }
4171 // Move on to the next filter.
4172 continue;
4173 }
4174 ConstantArray *LArray = cast<ConstantArray>(LFilter);
4175 if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros.
4176 // Since Filter is non-empty and contains only zeros, it is a subset of
4177 // LFilter iff LFilter contains a zero.
4178 assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
4179 for (unsigned l = 0; l != LElts; ++l)
4180 if (LArray->getOperand(l)->isNullValue()) {
4181 // LFilter contains a zero - discard it.
4182 NewClauses.erase(J);
4183 MakeNewInstruction = true;
4184 break;
4185 }
4186 // Move on to the next filter.
4187 continue;
4188 }
4189 // At this point we know that both filters are ConstantArrays. Loop over
4190 // operands to see whether every element of Filter is also an element of
4191 // LFilter. Since filters tend to be short this is probably faster than
4192 // using a method that scales nicely.
4193 ConstantArray *FArray = cast<ConstantArray>(Filter);
4194 bool AllFound = true;
4195 for (unsigned f = 0; f != FElts; ++f) {
4196 Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts();
4197 AllFound = false;
4198 for (unsigned l = 0; l != LElts; ++l) {
4199 Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts();
4200 if (LTypeInfo == FTypeInfo) {
4201 AllFound = true;
4202 break;
4203 }
4204 }
4205 if (!AllFound)
4206 break;
4207 }
4208 if (AllFound) {
4209 // Discard LFilter.
4210 NewClauses.erase(J);
4211 MakeNewInstruction = true;
4212 }
4213 // Move on to the next filter.
4214 }
4215 }
4216
4217 // If we changed any of the clauses, replace the old landingpad instruction
4218 // with a new one.
4219 if (MakeNewInstruction) {
4220 LandingPadInst *NLI = LandingPadInst::Create(LI.getType(),
4221 NewClauses.size());
4222 for (unsigned i = 0, e = NewClauses.size(); i != e; ++i)
4223 NLI->addClause(NewClauses[i]);
4224 // A landing pad with no clauses must have the cleanup flag set. It is
4225 // theoretically possible, though highly unlikely, that we eliminated all
4226 // clauses. If so, force the cleanup flag to true.
4227 if (NewClauses.empty())
4228 CleanupFlag = true;
4229 NLI->setCleanup(CleanupFlag);
4230 return NLI;
4231 }
4232
4233 // Even if none of the clauses changed, we may nonetheless have understood
4234 // that the cleanup flag is pointless. Clear it if so.
4235 if (LI.isCleanup() != CleanupFlag) {
4236 assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
4237 LI.setCleanup(CleanupFlag);
4238 return &LI;
4239 }
4240
4241 return nullptr;
4242}
4243
4244Value *
4246 // Try to push freeze through instructions that propagate but don't produce
4247 // poison as far as possible. If an operand of freeze follows three
4248 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
4249 // guaranteed-non-poison operands then push the freeze through to the one
4250 // operand that is not guaranteed non-poison. The actual transform is as
4251 // follows.
4252 // Op1 = ... ; Op1 can be posion
4253 // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
4254 // ; single guaranteed-non-poison operands
4255 // ... = Freeze(Op0)
4256 // =>
4257 // Op1 = ...
4258 // Op1.fr = Freeze(Op1)
4259 // ... = Inst(Op1.fr, NonPoisonOps...)
4260 auto *OrigOp = OrigFI.getOperand(0);
4261 auto *OrigOpInst = dyn_cast<Instruction>(OrigOp);
4262
4263 // While we could change the other users of OrigOp to use freeze(OrigOp), that
4264 // potentially reduces their optimization potential, so let's only do this iff
4265 // the OrigOp is only used by the freeze.
4266 if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(OrigOp))
4267 return nullptr;
4268
4269 // We can't push the freeze through an instruction which can itself create
4270 // poison. If the only source of new poison is flags, we can simply
4271 // strip them (since we know the only use is the freeze and nothing can
4272 // benefit from them.)
4273 if (canCreateUndefOrPoison(cast<Operator>(OrigOp),
4274 /*ConsiderFlagsAndMetadata*/ false))
4275 return nullptr;
4276
4277 // If operand is guaranteed not to be poison, there is no need to add freeze
4278 // to the operand. So we first find the operand that is not guaranteed to be
4279 // poison.
4280 Use *MaybePoisonOperand = nullptr;
4281 for (Use &U : OrigOpInst->operands()) {
4282 if (isa<MetadataAsValue>(U.get()) ||
4284 continue;
4285 if (!MaybePoisonOperand)
4286 MaybePoisonOperand = &U;
4287 else
4288 return nullptr;
4289 }
4290
4291 OrigOpInst->dropPoisonGeneratingFlagsAndMetadata();
4292
4293 // If all operands are guaranteed to be non-poison, we can drop freeze.
4294 if (!MaybePoisonOperand)
4295 return OrigOp;
4296
4297 Builder.SetInsertPoint(OrigOpInst);
4298 auto *FrozenMaybePoisonOperand = Builder.CreateFreeze(
4299 MaybePoisonOperand->get(), MaybePoisonOperand->get()->getName() + ".fr");
4300
4301 replaceUse(*MaybePoisonOperand, FrozenMaybePoisonOperand);
4302 return OrigOp;
4303}
4304
4306 PHINode *PN) {
4307 // Detect whether this is a recurrence with a start value and some number of
4308 // backedge values. We'll check whether we can push the freeze through the
4309 // backedge values (possibly dropping poison flags along the way) until we
4310 // reach the phi again. In that case, we can move the freeze to the start
4311 // value.
4312 Use *StartU = nullptr;
4314 for (Use &U : PN->incoming_values()) {
4315 if (DT.dominates(PN->getParent(), PN->getIncomingBlock(U))) {
4316 // Add backedge value to worklist.
4317 Worklist.push_back(U.get());
4318 continue;
4319 }
4320
4321 // Don't bother handling multiple start values.
4322 if (StartU)
4323 return nullptr;
4324 StartU = &U;
4325 }
4326
4327 if (!StartU || Worklist.empty())
4328 return nullptr; // Not a recurrence.
4329
4330 Value *StartV = StartU->get();
4331 BasicBlock *StartBB = PN->getIncomingBlock(*StartU);
4332 bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(StartV);
4333 // We can't insert freeze if the start value is the result of the
4334 // terminator (e.g. an invoke).
4335 if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
4336 return nullptr;
4337
4340 while (!Worklist.empty()) {
4341 Value *V = Worklist.pop_back_val();
4342 if (!Visited.insert(V).second)
4343 continue;
4344
4345 if (Visited.size() > 32)
4346 return nullptr; // Limit the total number of values we inspect.
4347
4348 // Assume that PN is non-poison, because it will be after the transform.
4349 if (V == PN || isGuaranteedNotToBeUndefOrPoison(V))
4350 continue;
4351
4352 Instruction *I = dyn_cast<Instruction>(V);
4353 if (!I || canCreateUndefOrPoison(cast<Operator>(I),
4354 /*ConsiderFlagsAndMetadata*/ false))
4355 return nullptr;
4356
4357 DropFlags.push_back(I);
4358 append_range(Worklist, I->operands());
4359 }
4360
4361 for (Instruction *I : DropFlags)
4362 I->dropPoisonGeneratingFlagsAndMetadata();
4363
4364 if (StartNeedsFreeze) {
4366 Value *FrozenStartV = Builder.CreateFreeze(StartV,
4367 StartV->getName() + ".fr");
4368 replaceUse(*StartU, FrozenStartV);
4369 }
4370 return replaceInstUsesWith(FI, PN);
4371}
4372
4374 Value *Op = FI.getOperand(0);
4375
4376 if (isa<Constant>(Op) || Op->hasOneUse())
4377 return false;
4378
4379 // Move the freeze directly after the definition of its operand, so that
4380 // it dominates the maximum number of uses. Note that it may not dominate
4381 // *all* uses if the operand is an invoke/callbr and the use is in a phi on
4382 // the normal/default destination. This is why the domination check in the
4383 // replacement below is still necessary.
4384 BasicBlock::iterator MoveBefore;
4385 if (isa<Argument>(Op)) {
4386 MoveBefore =
4388 } else {
4389 auto MoveBeforeOpt = cast<Instruction>(Op)->getInsertionPointAfterDef();
4390 if (!MoveBeforeOpt)
4391 return false;
4392 MoveBefore = *MoveBeforeOpt;
4393 }
4394
4395 // Don't move to the position of a debug intrinsic.
4396 if (isa<DbgInfoIntrinsic>(MoveBefore))
4397 MoveBefore = MoveBefore->getNextNonDebugInstruction()->getIterator();
4398 // Re-point iterator to come after any debug-info records, if we're
4399 // running in "RemoveDIs" mode
4400 MoveBefore.setHeadBit(false);
4401
4402 bool Changed = false;
4403 if (&FI != &*MoveBefore) {
4404 FI.moveBefore(*MoveBefore->getParent(), MoveBefore);
4405 Changed = true;
4406 }
4407
4408 Op->replaceUsesWithIf(&FI, [&](Use &U) -> bool {
4409 bool Dominates = DT.dominates(&FI, U);
4410 Changed |= Dominates;
4411 return Dominates;
4412 });
4413
4414 return Changed;
4415}
4416
4417// Check if any direct or bitcast user of this value is a shuffle instruction.
4419 for (auto *U : V->users()) {
4420 if (isa<ShuffleVectorInst>(U))
4421 return true;
4422 else if (match(U, m_BitCast(m_Specific(V))) && isUsedWithinShuffleVector(U))
4423 return true;
4424 }
4425 return false;
4426}
4427
4429 Value *Op0 = I.getOperand(0);
4430
4432 return replaceInstUsesWith(I, V);
4433
4434 // freeze (phi const, x) --> phi const, (freeze x)
4435 if (auto *PN = dyn_cast<PHINode>(Op0)) {
4436 if (Instruction *NV = foldOpIntoPhi(I, PN))
4437 return NV;
4438 if (Instruction *NV = foldFreezeIntoRecurrence(I, PN))
4439 return NV;
4440 }
4441
4443 return replaceInstUsesWith(I, NI);
4444
4445 // If I is freeze(undef), check its uses and fold it to a fixed constant.
4446 // - or: pick -1
4447 // - select's condition: if the true value is constant, choose it by making
4448 // the condition true.
4449 // - default: pick 0
4450 //
4451 // Note that this transform is intentionally done here rather than
4452 // via an analysis in InstSimplify or at individual user sites. That is
4453 // because we must produce the same value for all uses of the freeze -
4454 // it's the reason "freeze" exists!
4455 //
4456 // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
4457 // duplicating logic for binops at least.
4458 auto getUndefReplacement = [&I](Type *Ty) {
4459 Constant *BestValue = nullptr;
4460 Constant *NullValue = Constant::getNullValue(Ty);
4461 for (const auto *U : I.users()) {
4462 Constant *C = NullValue;
4463 if (match(U, m_Or(m_Value(), m_Value())))
4465 else if (match(U, m_Select(m_Specific(&I), m_Constant(), m_Value())))
4466 C = ConstantInt::getTrue(Ty);
4467
4468 if (!BestValue)
4469 BestValue = C;
4470 else if (BestValue != C)
4471 BestValue = NullValue;
4472 }
4473 assert(BestValue && "Must have at least one use");
4474 return BestValue;
4475 };
4476
4477 if (match(Op0, m_Undef())) {
4478 // Don't fold freeze(undef/poison) if it's used as a vector operand in
4479 // a shuffle. This may improve codegen for shuffles that allow
4480 // unspecified inputs.
4482 return nullptr;
4483 return replaceInstUsesWith(I, getUndefReplacement(I.getType()));
4484 }
4485
4486 Constant *C;
4487 if (match(Op0, m_Constant(C)) && C->containsUndefOrPoisonElement()) {
4488 Constant *ReplaceC = getUndefReplacement(I.getType()->getScalarType());
4490 }
4491
4492 // Replace uses of Op with freeze(Op).
4493 if (freezeOtherUses(I))
4494 return &I;
4495
4496 return nullptr;
4497}
4498
4499/// Check for case where the call writes to an otherwise dead alloca. This
4500/// shows up for unused out-params in idiomatic C/C++ code. Note that this
4501/// helper *only* analyzes the write; doesn't check any other legality aspect.
4503 auto *CB = dyn_cast<CallBase>(I);
4504 if (!CB)
4505 // TODO: handle e.g. store to alloca here - only worth doing if we extend
4506 // to allow reload along used path as described below. Otherwise, this
4507 // is simply a store to a dead allocation which will be removed.
4508 return false;
4509 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CB, TLI);
4510 if (!Dest)
4511 return false;
4512 auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(Dest->Ptr));
4513 if (!AI)
4514 // TODO: allow malloc?
4515 return false;
4516 // TODO: allow memory access dominated by move point? Note that since AI
4517 // could have a reference to itself captured by the call, we would need to
4518 // account for cycles in doing so.
4519 SmallVector<const User *> AllocaUsers;
4521 auto pushUsers = [&](const Instruction &I) {
4522 for (const User *U : I.users()) {
4523 if (Visited.insert(U).second)
4524 AllocaUsers.push_back(U);
4525 }
4526 };
4527 pushUsers(*AI);
4528 while (!AllocaUsers.empty()) {
4529 auto *UserI = cast<Instruction>(AllocaUsers.pop_back_val());
4530 if (isa<BitCastInst>(UserI) || isa<GetElementPtrInst>(UserI) ||
4531 isa<AddrSpaceCastInst>(UserI)) {
4532 pushUsers(*UserI);
4533 continue;
4534 }
4535 if (UserI == CB)
4536 continue;
4537 // TODO: support lifetime.start/end here
4538 return false;
4539 }
4540 return true;
4541}
4542
4543/// Try to move the specified instruction from its current block into the
4544/// beginning of DestBlock, which can only happen if it's safe to move the
4545/// instruction past all of the instructions between it and the end of its
4546/// block.
4548 BasicBlock *DestBlock) {
4549 BasicBlock *SrcBlock = I->getParent();
4550
4551 // Cannot move control-flow-involving, volatile loads, vaarg, etc.
4552 if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
4553 I->isTerminator())
4554 return false;
4555
4556 // Do not sink static or dynamic alloca instructions. Static allocas must
4557 // remain in the entry block, and dynamic allocas must not be sunk in between
4558 // a stacksave / stackrestore pair, which would incorrectly shorten its
4559 // lifetime.
4560 if (isa<AllocaInst>(I))
4561 return false;
4562
4563 // Do not sink into catchswitch blocks.
4564 if (isa<CatchSwitchInst>(DestBlock->getTerminator()))
4565 return false;
4566
4567 // Do not sink convergent call instructions.
4568 if (auto *CI = dyn_cast<CallInst>(I)) {
4569 if (CI->isConvergent())
4570 return false;
4571 }
4572
4573 // Unless we can prove that the memory write isn't visibile except on the
4574 // path we're sinking to, we must bail.
4575 if (I->mayWriteToMemory()) {
4576 if (!SoleWriteToDeadLocal(I, TLI))
4577 return false;
4578 }
4579
4580 // We can only sink load instructions if there is nothing between the load and
4581 // the end of block that could change the value.
4582 if (I->mayReadFromMemory()) {
4583 // We don't want to do any sophisticated alias analysis, so we only check
4584 // the instructions after I in I's parent block if we try to sink to its
4585 // successor block.
4586 if (DestBlock->getUniquePredecessor() != I->getParent())
4587 return false;
4588 for (BasicBlock::iterator Scan = std::next(I->getIterator()),
4589 E = I->getParent()->end();
4590 Scan != E; ++Scan)
4591 if (Scan->mayWriteToMemory())
4592 return false;
4593 }
4594
4595 I->dropDroppableUses([&](const Use *U) {
4596 auto *I = dyn_cast<Instruction>(U->getUser());
4597 if (I && I->getParent() != DestBlock) {
4598 Worklist.add(I);
4599 return true;
4600 }
4601 return false;
4602 });
4603 /// FIXME: We could remove droppable uses that are not dominated by
4604 /// the new position.
4605
4606 BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
4607 I->moveBefore(*DestBlock, InsertPos);
4608 ++NumSunkInst;
4609
4610 // Also sink all related debug uses from the source basic block. Otherwise we
4611 // get debug use before the def. Attempt to salvage debug uses first, to
4612 // maximise the range variables have location for. If we cannot salvage, then
4613 // mark the location undef: we know it was supposed to receive a new location
4614 // here, but that computation has been sunk.
4617 findDbgUsers(DbgUsers, I, &DPValues);
4618 if (!DbgUsers.empty())
4619 tryToSinkInstructionDbgValues(I, InsertPos, SrcBlock, DestBlock, DbgUsers);
4620 if (!DPValues.empty())
4621 tryToSinkInstructionDPValues(I, InsertPos, SrcBlock, DestBlock, DPValues);
4622
4623 // PS: there are numerous flaws with this behaviour, not least that right now
4624 // assignments can be re-ordered past other assignments to the same variable
4625 // if they use different Values. Creating more undef assignements can never be
4626 // undone. And salvaging all users outside of this block can un-necessarily
4627 // alter the lifetime of the live-value that the variable refers to.
4628 // Some of these things can be resolved by tolerating debug use-before-defs in
4629 // LLVM-IR, however it depends on the instruction-referencing CodeGen backend
4630 // being used for more architectures.
4631
4632 return true;
4633}
4634
4636 Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
4638 // For all debug values in the destination block, the sunk instruction
4639 // will still be available, so they do not need to be dropped.
4641 for (auto &DbgUser : DbgUsers)
4642 if (DbgUser->getParent() != DestBlock)
4643 DbgUsersToSalvage.push_back(DbgUser);
4644
4645 // Process the sinking DbgUsersToSalvage in reverse order, as we only want
4646 // to clone the last appearing debug intrinsic for each given variable.
4648 for (DbgVariableIntrinsic *DVI : DbgUsersToSalvage)
4649 if (DVI->getParent() == SrcBlock)
4650 DbgUsersToSink.push_back(DVI);
4651 llvm::sort(DbgUsersToSink,
4652 [](auto *A, auto *B) { return B->comesBefore(A); });
4653
4655 SmallSet<DebugVariable, 4> SunkVariables;
4656 for (auto *User : DbgUsersToSink) {
4657 // A dbg.declare instruction should not be cloned, since there can only be
4658 // one per variable fragment. It should be left in the original place
4659 // because the sunk instruction is not an alloca (otherwise we could not be
4660 // here).
4661 if (isa<DbgDeclareInst>(User))
4662 continue;
4663
4664 DebugVariable DbgUserVariable =
4665 DebugVariable(User->getVariable(), User->getExpression(),
4666 User->getDebugLoc()->getInlinedAt());
4667
4668 if (!SunkVariables.insert(DbgUserVariable).second)
4669 continue;
4670
4671 // Leave dbg.assign intrinsics in their original positions and there should
4672 // be no need to insert a clone.
4673 if (isa<DbgAssignIntrinsic>(User))
4674 continue;
4675
4676 DIIClones.emplace_back(cast<DbgVariableIntrinsic>(User->clone()));
4677 if (isa<DbgDeclareInst>(User) && isa<CastInst>(I))
4678 DIIClones.back()->replaceVariableLocationOp(I, I->getOperand(0));
4679 LLVM_DEBUG(dbgs() << "CLONE: " << *DIIClones.back() << '\n');
4680 }
4681
4682 // Perform salvaging without the clones, then sink the clones.
4683 if (!DIIClones.empty()) {
4684 salvageDebugInfoForDbgValues(*I, DbgUsersToSalvage, {});
4685 // The clones are in reverse order of original appearance, reverse again to
4686 // maintain the original order.
4687 for (auto &DIIClone : llvm::reverse(DIIClones)) {
4688 DIIClone->insertBefore(&*InsertPos);
4689 LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone << '\n');
4690 }
4691 }
4692}
4693
4695 Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
4696 BasicBlock *DestBlock, SmallVectorImpl<DPValue *> &DPValues) {
4697 // Implementation of tryToSinkInstructionDbgValues, but for the DPValue of
4698 // variable assignments rather than dbg.values.
4699
4700 // Fetch all DPValues not already in the destination.
4701 SmallVector<DPValue *, 2> DPValuesToSalvage;
4702 for (auto &DPV : DPValues)
4703 if (DPV->getParent() != DestBlock)
4704 DPValuesToSalvage.push_back(DPV);
4705
4706 // Fetch a second collection, of DPValues in the source block that we're going
4707 // to sink.
4708 SmallVector<DPValue *> DPValuesToSink;
4709 for (DPValue *DPV : DPValuesToSalvage)
4710 if (DPV->getParent() == SrcBlock)
4711 DPValuesToSink.push_back(DPV);
4712
4713 // Sort DPValues according to their position in the block. This is a partial
4714 // order: DPValues attached to different instructions will be ordered by the
4715 // instruction order, but DPValues attached to the same instruction won't
4716 // have an order.
4717 auto Order = [](DPValue *A, DPValue *B) -> bool {
4718 return B->getInstruction()->comesBefore(A->getInstruction());
4719 };
4720 llvm::stable_sort(DPValuesToSink, Order);
4721
4722 // If there are two assignments to the same variable attached to the same
4723 // instruction, the ordering between the two assignments is important. Scan
4724 // for this (rare) case and establish which is the last assignment.
4725 using InstVarPair = std::pair<const Instruction *, DebugVariable>;
4727 if (DPValuesToSink.size() > 1) {
4729 // Count how many assignments to each variable there is per instruction.
4730 for (DPValue *DPV : DPValuesToSink) {
4731 DebugVariable DbgUserVariable =
4732 DebugVariable(DPV->getVariable(), DPV->getExpression(),
4733 DPV->getDebugLoc()->getInlinedAt());
4734 CountMap[std::make_pair(DPV->getInstruction(), DbgUserVariable)] += 1;
4735 }
4736
4737 // If there are any instructions with two assignments, add them to the
4738 // FilterOutMap to record that they need extra filtering.
4740 for (auto It : CountMap) {
4741 if (It.second > 1) {
4742 FilterOutMap[It.first] = nullptr;
4743 DupSet.insert(It.first.first);
4744 }
4745 }
4746
4747 // For all instruction/variable pairs needing extra filtering, find the
4748 // latest assignment.
4749 for (const Instruction *Inst : DupSet) {
4750 for (DPValue &DPV :
4751 llvm::reverse(filterDbgVars(Inst->getDbgRecordRange()))) {
4752 DebugVariable DbgUserVariable =
4753 DebugVariable(DPV.getVariable(), DPV.getExpression(),
4754 DPV.getDebugLoc()->getInlinedAt());
4755 auto FilterIt =
4756 FilterOutMap.find(std::make_pair(Inst, DbgUserVariable));
4757 if (FilterIt == FilterOutMap.end())
4758 continue;
4759 if (FilterIt->second != nullptr)
4760 continue;
4761 FilterIt->second = &DPV;
4762 }
4763 }
4764 }
4765
4766 // Perform cloning of the DPValues that we plan on sinking, filter out any
4767 // duplicate assignments identified above.
4768 SmallVector<DPValue *, 2> DPVClones;
4769 SmallSet<DebugVariable, 4> SunkVariables;
4770 for (DPValue *DPV : DPValuesToSink) {
4771 if (DPV->Type == DPValue::LocationType::Declare)
4772 continue;
4773
4774 DebugVariable DbgUserVariable =
4775 DebugVariable(DPV->getVariable(), DPV->getExpression(),
4776 DPV->getDebugLoc()->getInlinedAt());
4777
4778 // For any variable where there were multiple assignments in the same place,
4779 // ignore all but the last assignment.
4780 if (!FilterOutMap.empty()) {
4781 InstVarPair IVP = std::make_pair(DPV->getInstruction(), DbgUserVariable);
4782 auto It = FilterOutMap.find(IVP);
4783
4784 // Filter out.
4785 if (It != FilterOutMap.end() && It->second != DPV)
4786 continue;
4787 }
4788
4789 if (!SunkVariables.insert(DbgUserVariable).second)
4790 continue;
4791
4792 if (DPV->isDbgAssign())
4793 continue;
4794
4795 DPVClones.emplace_back(DPV->clone());
4796 LLVM_DEBUG(dbgs() << "CLONE: " << *DPVClones.back() << '\n');
4797 }
4798
4799 // Perform salvaging without the clones, then sink the clones.
4800 if (DPVClones.empty())
4801 return;
4802
4803 salvageDebugInfoForDbgValues(*I, {}, DPValuesToSalvage);
4804
4805 // The clones are in reverse order of original appearance. Assert that the
4806 // head bit is set on the iterator as we _should_ have received it via
4807 // getFirstInsertionPt. Inserting like this will reverse the clone order as
4808 // we'll repeatedly insert at the head, such as:
4809 // DPV-3 (third insertion goes here)
4810 // DPV-2 (second insertion goes here)
4811 // DPV-1 (first insertion goes here)
4812 // Any-Prior-DPVs
4813 // InsertPtInst
4814 assert(InsertPos.getHeadBit());
4815 for (DPValue *DPVClone : DPVClones) {
4816 InsertPos->getParent()->insertDbgRecordBefore(DPVClone, InsertPos);
4817 LLVM_DEBUG(dbgs() << "SINK: " << *DPVClone << '\n');
4818 }
4819}
4820
4822 while (!Worklist.isEmpty()) {
4823 // Walk deferred instructions in reverse order, and push them to the
4824 // worklist, which means they'll end up popped from the worklist in-order.
4825 while (Instruction *I = Worklist.popDeferred()) {
4826 // Check to see if we can DCE the instruction. We do this already here to
4827 // reduce the number of uses and thus allow other folds to trigger.
4828 // Note that eraseInstFromFunction() may push additional instructions on
4829 // the deferred worklist, so this will DCE whole instruction chains.
4832 ++NumDeadInst;
4833 continue;
4834 }
4835
4836 Worklist.push(I);
4837 }
4838
4840 if (I == nullptr) continue; // skip null values.
4841
4842 // Check to see if we can DCE the instruction.
4845 ++NumDeadInst;
4846 continue;
4847 }
4848
4849 if (!DebugCounter::shouldExecute(VisitCounter))
4850 continue;
4851
4852 // See if we can trivially sink this instruction to its user if we can
4853 // prove that the successor is not executed more frequently than our block.
4854 // Return the UserBlock if successful.
4855 auto getOptionalSinkBlockForInst =
4856 [this](Instruction *I) -> std::optional<BasicBlock *> {
4857 if (!EnableCodeSinking)
4858 return std::nullopt;
4859
4860 BasicBlock *BB = I->getParent();
4861 BasicBlock *UserParent = nullptr;
4862 unsigned NumUsers = 0;
4863
4864 for (auto *U : I->users()) {
4865 if (U->isDroppable())
4866 continue;
4867 if (NumUsers > MaxSinkNumUsers)
4868 return std::nullopt;
4869
4870 Instruction *UserInst = cast<Instruction>(U);
4871 // Special handling for Phi nodes - get the block the use occurs in.
4872 if (PHINode *PN = dyn_cast<PHINode>(UserInst)) {
4873 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
4874 if (PN->getIncomingValue(i) == I) {
4875 // Bail out if we have uses in different blocks. We don't do any
4876 // sophisticated analysis (i.e finding NearestCommonDominator of
4877 // these use blocks).
4878 if (UserParent && UserParent != PN->getIncomingBlock(i))
4879 return std::nullopt;
4880 UserParent = PN->getIncomingBlock(i);
4881 }
4882 }
4883 assert(UserParent && "expected to find user block!");
4884 } else {
4885 if (UserParent && UserParent != UserInst->getParent())
4886 return std::nullopt;
4887 UserParent = UserInst->getParent();
4888 }
4889
4890 // Make sure these checks are done only once, naturally we do the checks
4891 // the first time we get the userparent, this will save compile time.
4892 if (NumUsers == 0) {
4893 // Try sinking to another block. If that block is unreachable, then do
4894 // not bother. SimplifyCFG should handle it.
4895 if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
4896 return std::nullopt;
4897
4898 auto *Term = UserParent->getTerminator();
4899 // See if the user is one of our successors that has only one
4900 // predecessor, so that we don't have to split the critical edge.
4901 // Another option where we can sink is a block that ends with a
4902 // terminator that does not pass control to other block (such as
4903 // return or unreachable or resume). In this case:
4904 // - I dominates the User (by SSA form);
4905 // - the User will be executed at most once.
4906 // So sinking I down to User is always profitable or neutral.
4907 if (UserParent->getUniquePredecessor() != BB && !succ_empty(Term))
4908 return std::nullopt;
4909
4910 assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
4911 }
4912
4913 NumUsers++;
4914 }
4915
4916 // No user or only has droppable users.
4917 if (!UserParent)
4918 return std::nullopt;
4919
4920 return UserParent;
4921 };
4922
4923 auto OptBB = getOptionalSinkBlockForInst(I);
4924 if (OptBB) {
4925 auto *UserParent = *OptBB;
4926 // Okay, the CFG is simple enough, try to sink this instruction.
4927 if (tryToSinkInstruction(I, UserParent)) {
4928 LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
4929 MadeIRChange = true;
4930 // We'll add uses of the sunk instruction below, but since
4931 // sinking can expose opportunities for it's *operands* add
4932 // them to the worklist
4933 for (Use &U : I->operands())
4934 if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
4935 Worklist.push(OpI);
4936 }
4937 }
4938
4939 // Now that we have an instruction, try combining it to simplify it.
4942 I, {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
4943
4944#ifndef NDEBUG
4945 std::string OrigI;
4946#endif
4947 LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
4948 LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
4949
4950 if (Instruction *Result = visit(*I)) {
4951 ++NumCombined;
4952 // Should we replace the old instruction with a new one?
4953 if (Result != I) {
4954 LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
4955 << " New = " << *Result << '\n');
4956
4957 Result->copyMetadata(*I,
4958 {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
4959 // Everything uses the new instruction now.
4960 I->replaceAllUsesWith(Result);
4961
4962 // Move the name to the new instruction first.
4963 Result->takeName(I);
4964
4965 // Insert the new instruction into the basic block...
4966 BasicBlock *InstParent = I->getParent();
4967 BasicBlock::iterator InsertPos = I->getIterator();
4968
4969 // Are we replace a PHI with something that isn't a PHI, or vice versa?
4970 if (isa<PHINode>(Result) != isa<PHINode>(I)) {
4971 // We need to fix up the insertion point.
4972 if (isa<PHINode>(I)) // PHI -> Non-PHI
4973 InsertPos = InstParent->getFirstInsertionPt();
4974 else // Non-PHI -> PHI
4975 InsertPos = InstParent->getFirstNonPHIIt();
4976 }
4977
4978 Result->insertInto(InstParent, InsertPos);
4979
4980 // Push the new instruction and any users onto the worklist.
4982 Worklist.push(Result);
4983
4985 } else {
4986 LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
4987 << " New = " << *I << '\n');
4988
4989 // If the instruction was modified, it's possible that it is now dead.
4990 // if so, remove it.
4993 } else {
4995 Worklist.push(I);
4996 }
4997 }
4998 MadeIRChange = true;
4999 }
5000 }
5001
5002 Worklist.zap();
5003 return MadeIRChange;
5004}
5005
5006// Track the scopes used by !alias.scope and !noalias. In a function, a
5007// @llvm.experimental.noalias.scope.decl is only useful if that scope is used
5008// by both sets. If not, the declaration of the scope can be safely omitted.
5009// The MDNode of the scope can be omitted as well for the instructions that are
5010// part of this function. We do not do that at this point, as this might become
5011// too time consuming to do.
5013 SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists;
5014 SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists;
5015
5016public:
5018 // This seems to be faster than checking 'mayReadOrWriteMemory()'.
5019 if (!I->hasMetadataOtherThanDebugLoc())
5020 return;
5021
5022 auto Track = [](Metadata *ScopeList, auto &Container) {
5023 const auto *MDScopeList = dyn_cast_or_null<MDNode>(ScopeList);
5024 if (!MDScopeList || !Container.insert(MDScopeList).second)
5025 return;
5026 for (const auto &MDOperand : MDScopeList->operands())
5027 if (auto *MDScope = dyn_cast<MDNode>(MDOperand))
5028 Container.insert(MDScope);
5029 };
5030
5031 Track(I->getMetadata(LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
5032 Track(I->getMetadata(LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
5033 }
5034
5036 NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Inst);
5037 if (!Decl)
5038 return false;
5039
5040 assert(Decl->use_empty() &&
5041 "llvm.experimental.noalias.scope.decl in use ?");
5042 const MDNode *MDSL = Decl->getScopeList();
5043 assert(MDSL->getNumOperands() == 1 &&
5044 "llvm.experimental.noalias.scope should refer to a single scope");
5045 auto &MDOperand = MDSL->getOperand(0);
5046 if (auto *MD = dyn_cast<MDNode>(MDOperand))
5047 return !UsedAliasScopesAndLists.contains(MD) ||
5048 !UsedNoAliasScopesAndLists.contains(MD);
5049
5050 // Not an MDNode ? throw away.
5051 return true;
5052 }
5053};
5054
5055/// Populate the IC worklist from a function, by walking it in reverse
5056/// post-order and adding all reachable code to the worklist.
5057///
5058/// This has a couple of tricks to make the code faster and more powerful. In
5059/// particular, we constant fold and DCE instructions as we go, to avoid adding
5060/// them to the worklist (this significantly speeds up instcombine on code where
5061/// many instructions are dead or constant). Additionally, if we find a branch
5062/// whose condition is a known constant, we only visit the reachable successors.
5065 bool MadeIRChange = false;
5067 SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
5068 DenseMap<Constant *, Constant *> FoldedConstants;
5069 AliasScopeTracker SeenAliasScopes;
5070
5071 auto HandleOnlyLiveSuccessor = [&](BasicBlock *BB, BasicBlock *LiveSucc) {
5072 for (BasicBlock *Succ : successors(BB))
5073 if (Succ != LiveSucc && DeadEdges.insert({BB, Succ}).second)
5074 for (PHINode &PN : Succ->phis())
5075 for (Use &U : PN.incoming_values())
5076 if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(U)) {
5077 U.set(PoisonValue::get(PN.getType()));
5078 MadeIRChange = true;
5079 }
5080 };
5081
5082 for (BasicBlock *BB : RPOT) {
5083 if (!BB->isEntryBlock() && all_of(predecessors(BB), [&](BasicBlock *Pred) {
5084 return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred);
5085 })) {
5086 HandleOnlyLiveSuccessor(BB, nullptr);
5087 continue;
5088 }
5089 LiveBlocks.insert(BB);
5090
5091 for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
5092 // ConstantProp instruction if trivially constant.
5093 if (!Inst.use_empty() &&
5094 (Inst.getNumOperands() == 0 || isa<Constant>(Inst.getOperand(0))))
5095 if (Constant *C = ConstantFoldInstruction(&Inst, DL, &TLI)) {
5096 LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
5097 << '\n');
5098 Inst.replaceAllUsesWith(C);
5099 ++NumConstProp;
5100 if (isInstructionTriviallyDead(&Inst, &TLI))
5101 Inst.eraseFromParent();
5102 MadeIRChange = true;
5103 continue;
5104 }
5105
5106 // See if we can constant fold its operands.
5107 for (Use &U : Inst.operands()) {
5108 if (!isa<ConstantVector>(U) && !isa<ConstantExpr>(U))
5109 continue;
5110
5111 auto *C = cast<Constant>(U);
5112 Constant *&FoldRes = FoldedConstants[C];
5113 if (!FoldRes)
5114 FoldRes = ConstantFoldConstant(C, DL, &TLI);
5115
5116 if (FoldRes != C) {
5117 LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
5118 << "\n Old = " << *C
5119 << "\n New = " << *FoldRes << '\n');
5120 U = FoldRes;
5121 MadeIRChange = true;
5122 }
5123 }
5124
5125 // Skip processing debug and pseudo intrinsics in InstCombine. Processing
5126 // these call instructions consumes non-trivial amount of time and
5127 // provides no value for the optimization.
5128 if (!Inst.isDebugOrPseudoInst()) {
5129 InstrsForInstructionWorklist.push_back(&Inst);
5130 SeenAliasScopes.analyse(&Inst);
5131 }
5132 }
5133
5134 // If this is a branch or switch on a constant, mark only the single
5135 // live successor. Otherwise assume all successors are live.
5136 Instruction *TI = BB->getTerminator();
5137 if (BranchInst *BI = dyn_cast<BranchInst>(TI); BI && BI->isConditional()) {
5138 if (isa<UndefValue>(BI->getCondition())) {
5139 // Branch on undef is UB.
5140 HandleOnlyLiveSuccessor(BB, nullptr);
5141 continue;
5142 }
5143 if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
5144 bool CondVal = Cond->getZExtValue();
5145 HandleOnlyLiveSuccessor(BB, BI->getSuccessor(!CondVal));
5146 continue;
5147 }
5148 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
5149 if (isa<UndefValue>(SI->getCondition())) {
5150 // Switch on undef is UB.
5151 HandleOnlyLiveSuccessor(BB, nullptr);
5152 continue;
5153 }
5154 if (auto *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
5155 HandleOnlyLiveSuccessor(BB,
5156 SI->findCaseValue(Cond)->getCaseSuccessor());
5157 continue;
5158 }
5159 }
5160 }
5161
5162 // Remove instructions inside unreachable blocks. This prevents the
5163 // instcombine code from having to deal with some bad special cases, and
5164 // reduces use counts of instructions.
5165 for (BasicBlock &BB : F) {
5166 if (LiveBlocks.count(&BB))
5167 continue;
5168
5169 unsigned NumDeadInstInBB;
5170 unsigned NumDeadDbgInstInBB;
5171 std::tie(NumDeadInstInBB, NumDeadDbgInstInBB) =
5173
5174 MadeIRChange |= NumDeadInstInBB + NumDeadDbgInstInBB > 0;
5175 NumDeadInst += NumDeadInstInBB;
5176 }
5177
5178 // Once we've found all of the instructions to add to instcombine's worklist,
5179 // add them in reverse order. This way instcombine will visit from the top
5180 // of the function down. This jives well with the way that it adds all uses
5181 // of instructions to the worklist after doing a transformation, thus avoiding
5182 // some N^2 behavior in pathological cases.
5183 Worklist.reserve(InstrsForInstructionWorklist.size());
5184 for (Instruction *Inst : reverse(InstrsForInstructionWorklist)) {
5185 // DCE instruction if trivially dead. As we iterate in reverse program
5186 // order here, we will clean up whole chains of dead instructions.
5187 if (isInstructionTriviallyDead(Inst, &TLI) ||
5188 SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
5189 ++NumDeadInst;
5190 LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
5191 salvageDebugInfo(*Inst);
5192 Inst->eraseFromParent();
5193 MadeIRChange = true;
5194 continue;
5195 }
5196
5197 Worklist.push(Inst);
5198 }
5199
5200 return MadeIRChange;
5201}
5202
5207 ProfileSummaryInfo *PSI, LoopInfo *LI, const InstCombineOptions &Opts) {
5208 auto &DL = F.getParent()->getDataLayout();
5209
5210 /// Builder - This is an IRBuilder that automatically inserts new
5211 /// instructions into the worklist when they are created.
5213 F.getContext(), TargetFolder(DL),
5214 IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) {
5215 Worklist.add(I);
5216 if (auto *Assume = dyn_cast<AssumeInst>(I))
5217 AC.registerAssumption(Assume);
5218 }));
5219
5221
5222 // Lower dbg.declare intrinsics otherwise their value may be clobbered
5223 // by instcombiner.
5224 bool MadeIRChange = false;
5226 MadeIRChange = LowerDbgDeclare(F);
5227
5228 // Iterate while there is work to do.
5229 unsigned Iteration = 0;
5230 while (true) {
5231 ++Iteration;
5232
5233 if (Iteration > Opts.MaxIterations && !Opts.VerifyFixpoint) {
5234 LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations
5235 << " on " << F.getName()
5236 << " reached; stopping without verifying fixpoint\n");
5237 break;
5238 }
5239
5240 ++NumWorklistIterations;
5241 LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
5242 << F.getName() << "\n");
5243
5244 InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
5245 ORE, BFI, PSI, DL, LI);
5247 bool MadeChangeInThisIteration = IC.prepareWorklist(F, RPOT);
5248 MadeChangeInThisIteration |= IC.run();
5249 if (!MadeChangeInThisIteration)
5250 break;
5251
5252 MadeIRChange = true;
5253 if (Iteration > Opts.MaxIterations) {
5255 "Instruction Combining did not reach a fixpoint after " +
5256 Twine(Opts.MaxIterations) + " iterations",
5257 /*GenCrashDiag=*/false);
5258 }
5259 }
5260
5261 if (Iteration == 1)
5262 ++NumOneIteration;
5263 else if (Iteration == 2)
5264 ++NumTwoIterations;
5265 else if (Iteration == 3)
5266 ++NumThreeIterations;
5267 else
5268 ++NumFourOrMoreIterations;
5269
5270 return MadeIRChange;
5271}
5272
5274
5276 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
5277 static_cast<PassInfoMixin<InstCombinePass> *>(this)->printPipeline(
5278 OS, MapClassName2PassName);
5279 OS << '<';
5280 OS << "max-iterations=" << Options.MaxIterations << ";";
5281 OS << (Options.UseLoopInfo ? "" : "no-") << "use-loop-info;";
5282 OS << (Options.VerifyFixpoint ? "" : "no-") << "verify-fixpoint";
5283 OS << '>';
5284}
5285
5288 auto &AC = AM.getResult<AssumptionAnalysis>(F);
5289 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
5290 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
5292 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
5293
5294 // TODO: Only use LoopInfo when the option is set. This requires that the
5295 // callers in the pass pipeline explicitly set the option.
5296 auto *LI = AM.getCachedResult<LoopAnalysis>(F);
5297 if (!LI && Options.UseLoopInfo)
5298 LI = &AM.getResult<LoopAnalysis>(F);
5299
5300 auto *AA = &AM.getResult<AAManager>(F);
5301 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
5302 ProfileSummaryInfo *PSI =
5303 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
5304 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
5305 &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
5306
5307 if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
5308 BFI, PSI, LI, Options))
5309 // No changes, all analyses are preserved.
5310 return PreservedAnalyses::all();
5311
5312 // Mark all the analyses that instcombine updates as preserved.
5315 return PA;
5316}
5317
5319 AU.setPreservesCFG();
5332}
5333
5335 if (skipFunction(F))
5336 return false;
5337
5338 // Required analyses.
5339 auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
5340 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
5341 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
5342 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
5343 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5344 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
5345
5346 // Optional analyses.
5347 auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
5348 auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
5349 ProfileSummaryInfo *PSI =
5350 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
5351 BlockFrequencyInfo *BFI =
5352 (PSI && PSI->hasProfileSummary()) ?
5353 &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
5354 nullptr;
5355
5356 return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
5357 BFI, PSI, LI, InstCombineOptions());
5358}
5359
5361
5364}
5365
5367 "Combine redundant instructions", false, false)
5379
5380// Initialization Routines
5383}
5384
5386 return new InstructionCombiningPass();
5387}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
Expand Atomic instructions
static const Function * getParent(const Value *V)
This is the interface for LLVM's primary stateless and local alias analysis.
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:182
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
#define NL
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
Hexagon Vector Combine
IRTranslator LLVM IR MI
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
Definition: IVUsers.cpp:48
This file provides internal interfaces used to implement the InstCombine.
This file provides the primary interface to the instcombine pass.
static bool isUsedWithinShuffleVector(Value *V)
static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI, Instruction *AI)
static bool shorter_filter(const Value *LHS, const Value *RHS)
static Instruction * foldSelectGEP(GetElementPtrInst &GEP, InstCombiner::BuilderTy &Builder)
Thread a GEP operation with constant indices through the constant true/false arms of a select.
static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src)
static cl::opt< unsigned > MaxArraySize("instcombine-maxarray-size", cl::init(1024), cl::desc("Maximum array size considered when doing a combine"))
static cl::opt< unsigned > ShouldLowerDbgDeclare("instcombine-lower-dbg-declare", cl::Hidden, cl::init(true))
static bool hasNoSignedWrap(BinaryOperator &I)
static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1, InstCombinerImpl &IC)
Combine constant operands of associative operations either before or after a cast to eliminate one of...
static Value * simplifyInstructionWithPHI(Instruction &I, PHINode *PN, Value *InValue, BasicBlock *InBB, const DataLayout &DL, const SimplifyQuery SQ)
static void ClearSubclassDataAfterReassociation(BinaryOperator &I)
Conservatively clears subclassOptionalData after a reassociation or commutation.
static bool isAllocSiteRemovable(Instruction *AI, SmallVectorImpl< WeakTrackingVH > &Users, const TargetLibraryInfo &TLI)
static Value * getIdentityValue(Instruction::BinaryOps Opcode, Value *V)
This function returns identity value for given opcode, which can be used to factor patterns like (X *...
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
static std::optional< std::pair< Value *, Value * > > matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS)
static Value * foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, Value *NewOp, InstCombiner &IC)
static Instruction * tryToMoveFreeBeforeNullTest(CallInst &FI, const DataLayout &DL)
Move the call to free before a NULL test.
static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp)
Return whether "(X LOp Y) ROp Z" is always equal to "(X ROp Z) LOp (Y ROp Z)".
static bool combineInstructionsOverFunction(Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, LoopInfo *LI, const InstCombineOptions &Opts)
static Value * tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ, InstCombiner::BuilderTy &Builder, Instruction::BinaryOps InnerOpcode, Value *A, Value *B, Value *C, Value *D)
This tries to simplify binary operations by factorizing out common terms (e.
static bool isRemovableWrite(CallBase &CB, Value *UsedV, const TargetLibraryInfo &TLI)
Given a call CB which uses an address UsedV, return true if we can prove the call's only possible eff...
static Instruction::BinaryOps getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op, Value *&LHS, Value *&RHS, BinaryOperator *OtherOp)
This function predicates factorization using distributive laws.
static bool hasNoUnsignedWrap(BinaryOperator &I)
static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI)
Check for case where the call writes to an otherwise dead alloca.
static cl::opt< unsigned > MaxSinkNumUsers("instcombine-max-sink-users", cl::init(32), cl::desc("Maximum number of undroppable users for instruction sinking"))
static Constant * constantFoldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, bool IsTrueArm)
static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo)
Return 'true' if the given typeinfo will match anything.
static bool isMergedGEPInBounds(GEPOperator &GEP1, GEPOperator &GEP2)
static cl::opt< bool > EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"), cl::init(true))
static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C)
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file contains the declarations for metadata subclasses.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static bool IsSelect(MachineInstr &MI)
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static unsigned getScalarSizeInBits(Type *Ty)
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This pass exposes codegen information to IR-level passes.
This defines the Use class.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
bool isNoAliasScopeDeclDead(Instruction *Inst)
void analyse(Instruction *I)
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition: APInt.h:401
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1934
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition: APInt.h:805
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:312
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1128
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1947
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:348
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:519
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:500
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
Class to represent array types.
Definition: DerivedTypes.h:371
uint64_t getNumElements() const
Definition: DerivedTypes.h:383
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:647
Type * getElementType() const
Definition: DerivedTypes.h:384
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
uint64_t getDereferenceableBytes() const
Returns the number of dereferenceable bytes from the dereferenceable attribute.
Definition: Attributes.cpp:390
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:193
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:429
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:498
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:396
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:234
InstListType::const_iterator getFirstNonPHIIt() const
Iterator returning form of getFirstNonPHI.
Definition: BasicBlock.cpp:354
const Instruction & front() const
Definition: BasicBlock.h:452
bool isEntryBlock() const
Return true if this is the entry block of the containing function.
Definition: BasicBlock.cpp:551
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:439
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:447
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:205
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:164
const_iterator getFirstNonPHIOrDbgOrAlloca() const
Returns an iterator to the first instruction in this block that is not a PHINode, a debug intrinsic,...
Definition: BasicBlock.cpp:410
size_t size() const
Definition: BasicBlock.h:450
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:220
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore)
Construct a binary instruction, given the opcode and the two operands.
BinaryOps getOpcode() const
Definition: InstrTypes.h:491
static BinaryOperator * CreateNeg(Value *Op, const Twine &Name, BasicBlock::iterator InsertBefore)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
static BinaryOperator * CreateNUW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition: InstrTypes.h:364
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:70
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1455
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1703
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1784
bool doesNotThrow() const
Determine if the call cannot unwind.
Definition: InstrTypes.h:2229
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1648
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1780
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr, BasicBlock::iterator InsertBefore)
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name, BasicBlock::iterator InsertBefore)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:965
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:988
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:990
@ ICMP_EQ
equal
Definition: InstrTypes.h:986
@ ICMP_NE
not equal
Definition: InstrTypes.h:987
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:1128
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:1090
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:1066
ConstantArray - Constant Array Declarations.
Definition: Constants.h:422
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1291
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition: Constants.h:765
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2544
static Constant * getNot(Constant *C)
Definition: Constants.cpp:2531
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
Definition: Constants.cpp:2404
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2562
static Constant * getAdd(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2537
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2598
static Constant * getNeg(Constant *C, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2525
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:849
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:856
static ConstantInt * getBool(LLVMContext &Context, bool V)
Definition: Constants.cpp:863
This class represents a range of values.
Definition: ConstantRange.h:47
bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
static ConstantRange makeExactNoWrapRegion(Instruction::BinaryOps BinOp, const APInt &Other, unsigned NoWrapKind)
Produce the range that contains X if and only if "X BinOp Other" does not wrap.
Constant Vector Declarations.
Definition: Constants.h:506
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1398
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:400
static Constant * replaceUndefsWith(Constant *C, Constant *Replacement)
Try to replace undefined constant C or undefined elements in C with Replacement.
Definition: Constants.cpp:767
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
const Constant * stripPointerCasts() const
Definition: Constant.h:213
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:432
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
Record of a variable value-assignment, aka a non instruction representation of the dbg....
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
SmallVector< APInt > getGEPIndicesForOffset(Type *&ElemTy, APInt &Offset) const
Get GEP indices to access Offset inside ElemTy.
Definition: DataLayout.cpp:998
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:260
unsigned getIndexTypeSizeInBits(Type *Ty) const
Layout size of the index used in GEP calculation.
Definition: DataLayout.cpp:774
IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
Definition: DataLayout.cpp:905
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:420
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:672
int64_t getIndexedOffsetInType(Type *ElemTy, ArrayRef< Value * > Indices) const
Returns the offset from the beginning of the type for the specified indices.
Definition: DataLayout.cpp:920
This is the common base class for debug info intrinsics for variables.
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:72
Identifies a unique instance of a variable.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool empty() const
Definition: DenseMap.h:98
iterator end()
Definition: DenseMap.h:84
void registerBranch(BranchInst *BI)
Add a branch condition to the cache.
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This instruction extracts a struct member or array element value from an aggregate value.
ArrayRef< unsigned > getIndices() const
iterator_range< idx_iterator > indices() const
idx_iterator idx_end() const
static ExtractValueInst * Create(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &NameStr, BasicBlock::iterator InsertBefore)
idx_iterator idx_begin() const
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:200
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:178
const BasicBlock & getEntryBlock() const
Definition: Function.h:782
static bool isTargetIntrinsic(Intrinsic::ID IID)
isTargetIntrinsic - Returns true if IID is an intrinsic specific to a certain target.
Definition: Function.cpp:873
bool isInBounds() const
Test whether this is an inbounds GEP, as defined by LangRef.html.
Definition: Operator.h:420
bool hasAllZeroIndices() const
Return true if all of the indices of this GEP are zeros.
Definition: Operator.h:479
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:973
static Type * getTypeAtIndex(Type *Ty, Value *Idx)
Return the type of the element at the given index of an indexable type.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr, BasicBlock::iterator InsertBefore)
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Create an "inbounds" getelementptr.
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Legacy wrapper pass to provide the GlobalsAAResult object.
This instruction compares its operands according to the predicate given to the constructor.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2006
Value * CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with 2 operands which is mangled on the first type.
Definition: IRBuilder.cpp:921
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1977
Value * CreateLogicalOp(Instruction::BinaryOps Opc, Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1682
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2499
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:932
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1110
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2022
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2518
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:305
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1875
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:227
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:480
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2349
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2380
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1748
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1338
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1789
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2477
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1469
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1321
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1660
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2179
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1450
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1513
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1865
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2334
Value * CreateLogicalOr(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1676
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:510
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
Definition: IRBuilder.h:496
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition: IRBuilder.h:76
This instruction inserts a struct field of array element value into an aggregate value.
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr, BasicBlock::iterator InsertBefore)
InstCombinePass(InstCombineOptions Opts={})
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I)
Tries to simplify binops of select and cast of the select condition.
Instruction * foldBinOpIntoSelectOrPhi(BinaryOperator &I)
This is a convenience wrapper function for the above two functions.
bool SimplifyAssociativeOrCommutative(BinaryOperator &I)
Performs a few simplifications for operators which are associative or commutative.
Instruction * visitGEPOfGEP(GetElementPtrInst &GEP, GEPOperator *Src)
Value * foldUsingDistributiveLaws(BinaryOperator &I)
Tries to simplify binary operations which some other binary operation distributes over.
Instruction * foldBinOpShiftWithShift(BinaryOperator &I)
Instruction * visitUnreachableInst(UnreachableInst &I)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
void handleUnreachableFrom(Instruction *I, SmallVectorImpl< BasicBlock * > &Worklist)
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Instruction * visitFreeze(FreezeInst &I)
void tryToSinkInstructionDPValues(Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock, BasicBlock *DestBlock, SmallVectorImpl< DPValue * > &DPUsers)
void handlePotentiallyDeadBlocks(SmallVectorImpl< BasicBlock * > &Worklist)
Instruction * visitFree(CallInst &FI, Value *FreedOp)
Instruction * visitExtractValueInst(ExtractValueInst &EV)
void handlePotentiallyDeadSuccessors(BasicBlock *BB, BasicBlock *LiveSucc)
Instruction * visitUnconditionalBranchInst(BranchInst &BI)
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * visitLandingPadInst(LandingPadInst &LI)
bool prepareWorklist(Function &F, ReversePostOrderTraversal< BasicBlock * > &RPOT)
Perform early cleanup and prepare the InstCombine worklist.
Instruction * visitReturnInst(ReturnInst &RI)
Instruction * visitSwitchInst(SwitchInst &SI)
Instruction * foldBinopWithPhiOperands(BinaryOperator &BO)
For a binary operator with 2 phi operands, try to hoist the binary operation before the phi.
Constant * getLosslessTrunc(Constant *C, Type *TruncTy, unsigned ExtOp)
Value * SimplifyDemandedUseFPClass(Value *V, FPClassTest DemandedMask, KnownFPClass &Known, unsigned Depth, Instruction *CxtI)
Attempts to replace V with a simpler value based on the demanded floating-point classes.
bool mergeStoreIntoSuccessor(StoreInst &SI)
Try to transform: if () { *P = v1; } else { *P = v2 } or: *P = v1; if () { *P = v2; } into a phi node...
Instruction * tryFoldInstWithCtpopWithNot(Instruction *I)
void tryToSinkInstructionDbgValues(Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock, BasicBlock *DestBlock, SmallVectorImpl< DbgVariableIntrinsic * > &DbgUsers)
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
Value * pushFreezeToPreventPoisonFromPropagating(FreezeInst &FI)
bool run()
Run the combiner over the entire worklist until it is empty.
Instruction * foldVectorBinop(BinaryOperator &Inst)
Canonicalize the position of binops relative to shufflevector.
bool removeInstructionsBeforeUnreachable(Instruction &I)
Value * SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS, Value *RHS)
void addDeadEdge(BasicBlock *From, BasicBlock *To, SmallVectorImpl< BasicBlock * > &Worklist)
Instruction * visitAllocSite(Instruction &FI)
Instruction * visitGetElementPtrInst(GetElementPtrInst &GEP)
Instruction * visitBranchInst(BranchInst &BI)
Value * tryFactorizationFolds(BinaryOperator &I)
This tries to simplify binary operations by factorizing out common terms (e.
Instruction * foldFreezeIntoRecurrence(FreezeInst &I, PHINode *PN)
bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock)
Try to move the specified instruction from its current block into the beginning of DestBlock,...
bool freezeOtherUses(FreezeInst &FI)
void freelyInvertAllUsersOf(Value *V, Value *IgnoredUser=nullptr)
Freely adapt every user of V as-if V was changed to !V.
The core instruction combiner logic.
Definition: InstCombiner.h:47
SimplifyQuery SQ
Definition: InstCombiner.h:76
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:340
static bool isCanonicalPredicate(CmpInst::Predicate Pred)
Predicate canonicalization reduces the number of patterns that need to be matched by other transforms...
Definition: InstCombiner.h:156
bool isFreeToInvert(Value *V, bool WillInvertAllUses, bool &DoesConsume)
Return true if the specified value is free to invert (apply ~ to).
Definition: InstCombiner.h:231
static unsigned getComplexity(Value *V)
Assign a complexity or rank value to LLVM Values.
Definition: InstCombiner.h:138
TargetLibraryInfo & TLI
Definition: InstCombiner.h:73
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Definition: InstCombiner.h:365
AAResults * AA
Definition: InstCombiner.h:69
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:385
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
Definition: InstCombiner.h:55
static bool shouldAvoidAbsorbingNotIntoSelect(const SelectInst &SI)
Definition: InstCombiner.h:190
void replaceUse(Use &U, Value *NewValue)
Replace use and add the previously used value to the worklist.
Definition: InstCombiner.h:417
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Definition: InstCombiner.h:64
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
Definition: InstCombiner.h:374
const DataLayout & DL
Definition: InstCombiner.h:75
unsigned ComputeNumSignBits(const Value *Op, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombiner.h:451
DomConditionCache DC
Definition: InstCombiner.h:80
const bool MinimizeSize
Definition: InstCombiner.h:67
std::optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
void addToWorklist(Instruction *I)
Definition: InstCombiner.h:335
Value * getFreelyInvertedImpl(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume, unsigned Depth)
Return nonnull value if V is free to invert under the condition of WillInvertAllUses.
std::optional< Value * > targetSimplifyDemandedVectorEltsIntrinsic(IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:409
DominatorTree & DT
Definition: InstCombiner.h:74
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
Definition: InstCombiner.h:283
SmallDenseSet< std::pair< BasicBlock *, BasicBlock * >, 8 > DeadEdges
Edges that are known to never be taken.
Definition: InstCombiner.h:89
std::optional< Value * > targetSimplifyDemandedUseBitsIntrinsic(IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
Definition: InstCombiner.h:430
BuilderTy & Builder
Definition: InstCombiner.h:60
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Value * getFreelyInverted(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume)
Definition: InstCombiner.h:212
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87
The legacy pass manager's instcombine pass.
Definition: InstCombine.h:71
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
void pushUsersToWorkList(Instruction &I)
When an instruction is simplified, add all users of the instruction to the work lists because they mi...
void add(Instruction *I)
Add instruction to the worklist.
void push(Instruction *I)
Push the instruction onto the worklist stack.
void zap()
Check that the worklist is empty and nuke the backing store for the map.
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
Definition: Instruction.h:300
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:453
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:80
bool isAssociative() const LLVM_READONLY
Return true if the instruction is associative:
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
const BasicBlock * getParent() const
Definition: Instruction.h:151
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:84
bool isTerminator() const
Definition: Instruction.h:254
void dropUBImplyingAttrsAndMetadata()
Drop any attributes or metadata that can cause immediate undefined behavior.
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
bool willReturn() const LLVM_READONLY
Return true if the instruction will return (unwinding is considered as a form of returning control fl...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:251
bool isBitwiseLogicOp() const
Return true if this is and/or/xor.
Definition: Instruction.h:305
bool isShift() const
Definition: Instruction.h:258
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:450
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool isIntDivRem() const
Definition: Instruction.h:257
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
Invoke instruction.
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, BasicBlock::iterator InsertBefore)
The landingpad instruction holds all of the information necessary to generate correct exception handl...
void addClause(Constant *ClauseVal)
Add a catch or filter clause to the landing pad.
void setCleanup(bool V)
Indicate that this landingpad instruction is a cleanup.
static LandingPadInst * Create(Type *RetTy, unsigned NumReservedClauses, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedClauses is a hint for the number of incoming clauses that this landingpad w...
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
An instruction for reading from memory.
Definition: Instructions.h:184
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
Metadata node.
Definition: Metadata.h:1067
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1434
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:889
This is the common base class for memset/memcpy/memmove.
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
Root of the metadata hierarchy.
Definition: Metadata.h:62
This class represents min/max intrinsics.
Value * getLHS() const
Value * getRHS() const
static ICmpInst::Predicate getPredicate(Intrinsic::ID ID)
Returns the comparison predicate underlying the intrinsic.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MDNode * getScopeList() const
OptimizationRemarkEmitter legacy analysis pass.
The optimization diagnostic interface.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:783
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition: Operator.h:75
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition: Operator.h:108
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition: Operator.h:102
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:37
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition: Constants.h:1398
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:144
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
A global registry used in conjunction with static constructors to make pluggable components (like tar...
Definition: Registry.h:44
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
static ReturnInst * Create(LLVMContext &C, Value *retVal, BasicBlock::iterator InsertBefore)
This class represents a cast from signed integer to floating point.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr, BasicBlock::iterator InsertBefore, Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
size_type size() const
Definition: SmallPtrSet.h:94
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
typename SuperClass::iterator iterator
Definition: SmallVector.h:590
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Multiway switch.
TargetFolder - Create constants with target dependent folding.
Definition: TargetFolder.h:34
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Can be used to implement target-specific instruction combining.
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Can be used to implement target-specific instruction combining.
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Query the target whether the specified address space cast from FromAS to ToAS is valid.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
const fltSemantics & getFltSemantics() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:249
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:302
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
This class represents a cast unsigned integer to floating point.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:242
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition: Value.h:736
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
iterator_range< user_iterator > users()
Definition: Value.h:421
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:149
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:693
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull, bool &CanBeFreed) const
Returns the number of bytes known to be dereferenceable for the pointer value.
Definition: Value.cpp:851
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:676
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
constexpr bool isZero() const
Definition: TypeSize.h:156
An efficient, type-erasing, non-owning reference to a callable.
reverse_self_iterator getReverseIterator()
Definition: ilist_node.h:112
self_iterator getIterator()
Definition: ilist_node.h:109
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isNoFPClassCompatibleType(Type *Ty)
Returns true if this is a type legal for the 'nofpclass' attribute.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1451
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:477
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
Definition: PatternMatch.h:155
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:903
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
apint_match m_APIntAllowUndef(const APInt *&Res)
Match APInt while allowing undefs in splat vector constants.
Definition: PatternMatch.h:300
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:160
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
br_match m_UnconditionalBr(BasicBlock *&Succ)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
BinOpPred_match< LHS, RHS, is_idiv_op > m_IDiv(const LHS &L, const RHS &R)
Matches integer division operations.
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:765
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:821
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
constantexpr_match m_ConstantExpr()
Match a constant expression or a constant that contains a constant expression.
Definition: PatternMatch.h:181
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
Definition: PatternMatch.h:509
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:163
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
CmpClass_match< LHS, RHS, FCmpInst, FCmpInst::Predicate > m_FCmp(FCmpInst::Predicate &Pred, const LHS &L, const RHS &R)
CastOperator_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:800
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
CastInst_match< OpTy, UIToFPInst > m_UIToFP(const OpTy &Op)
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:294
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
CastInst_match< OpTy, SIToFPInst > m_SIToFP(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
Exact_match< T > m_Exact(const T &SubPattern)
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
cstfp_pred_ty< is_non_zero_fp > m_NonZeroFP()
Match a floating-point non-zero.
Definition: PatternMatch.h:740
m_Intrinsic_Ty< Opnd0 >::Ty m_VecReverse(const Opnd0 &Op0)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > >, match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > > > m_MaxOrMin(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:561
CastOperator_match< OpTy, Instruction::PtrToInt > m_PtrToInt(const OpTy &Op)
Matches PtrToInt.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:234
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
void salvageDebugInfoForDbgValues(Instruction &I, ArrayRef< DbgVariableIntrinsic * > Insns, ArrayRef< DPValue * > DPInsns)
Implementation of salvageDebugInfo, applying only to instructions in Insns, rather than all debug use...
Definition: Local.cpp:2225
Intrinsic::ID getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID)
@ Offset
Definition: DWP.cpp:456
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:862
bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if the given value is known to be non-zero when defined.
void stable_sort(R &&Range)
Definition: STLExtras.h:2004
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
bool succ_empty(const Instruction *I)
Definition: CFG.h:255
Value * simplifyFreezeInst(Value *Op, const SimplifyQuery &Q)
Given an operand for a Freeze, see if we can fold the result.
FunctionPass * createInstructionCombiningPass()
std::pair< unsigned, unsigned > removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB)
Remove all instructions from a basic block other than its terminator and any present EH pad instructi...
Definition: Local.cpp:2783
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2415
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1581
auto successors(const MachineBasicBlock *BB)
bool isRemovableAlloc(const CallBase *V, const TargetLibraryInfo *TLI)
Return true if this is a call to an allocation function that does not have side effects that we are r...
std::optional< StringRef > getAllocationFamily(const Value *I, const TargetLibraryInfo *TLI)
If a function is part of an allocation family (e.g.
Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Value * simplifyInstructionWithOperands(Instruction *I, ArrayRef< Value * > NewOps, const SimplifyQuery &Q)
Like simplifyInstruction but the operands of I are replaced with NewOps.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2082
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:665
gep_type_iterator gep_type_end(const User *GEP)
Value * getReallocatedOperand(const CallBase *CB)
If this is a call to a realloc function, return the reallocated operand.
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc,...
bool handleUnreachableTerminator(Instruction *I, SmallVectorImpl< Value * > &PoisonedValues)
If a terminator in an unreachable basic block has an operand of type Instruction, transform it into p...
Definition: Local.cpp:2765
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
Value * simplifyAddInst(Value *LHS, Value *RHS, bool IsNSW, bool IsNUW, const SimplifyQuery &Q)
Given operands for an Add, fold the result or return null.
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
constexpr bool has_single_bit(T Value) noexcept
Definition: bit.h:146
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:399
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Value * emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL, User *GEP, bool NoAssumptions=false)
Given a getelementptr instruction/constantexpr, emit the code necessary to compute the offset from th...
Definition: Local.cpp:22
constexpr unsigned MaxAnalysisRecursionDepth
Definition: ValueTracking.h:47
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1656
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool LowerDbgDeclare(Function &F)
Lowers llvm.dbg.declare intrinsics into appropriate set of llvm.dbg.value intrinsics.
Definition: Local.cpp:1905
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
void ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, StoreInst *SI, DIBuilder &Builder)
===------------------------------------------------------------------—===// Dbg Intrinsic utilities
Definition: Local.cpp:1684
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
Value * simplifyExtractValueInst(Value *Agg, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an ExtractValueInst, fold the result or return null.
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition: Local.cpp:2693
constexpr int PoisonMaskElem
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:336
void findDbgUsers(SmallVectorImpl< DbgVariableIntrinsic * > &DbgInsts, Value *V, SmallVectorImpl< DPValue * > *DPValues=nullptr)
Finds the debug info intrinsics describing a value.
Definition: DebugInfo.cpp:143
Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
@ Or
Bitwise or logical OR of integers.
DWARFExpression::Operation Op
Constant * ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
gep_type_iterator gep_type_begin(const User *GEP)
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1888
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition: STLExtras.h:2034
Value * simplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef< Value * > Indices, bool InBounds, const SimplifyQuery &Q)
Given operands for a GetElementPtrInst, fold the result or return null.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DPValue types only and downcast.
void initializeInstCombine(PassRegistry &)
Initialize all passes linked into the InstCombine library.
void initializeInstructionCombiningPassPass(PassRegistry &)
std::optional< bool > isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool LHSIsTrue=true, unsigned Depth=0)
Return true if RHS is known to be implied true by LHS.
bool isPotentiallyReachable(const Instruction *From, const Instruction *To, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet=nullptr, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether instruction 'To' is reachable from 'From', without passing through any blocks in Ex...
Definition: CFG.cpp:231
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:292
unsigned countMinLeadingOnes() const
Returns the minimum number of leading one bits.
Definition: KnownBits.h:247
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:244
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:91
const DataLayout & DL
Definition: SimplifyQuery.h:61
SimplifyQuery getWithInstruction(const Instruction *I) const
Definition: SimplifyQuery.h:96
SimplifyQuery getWithoutUndef() const