LLVM 18.0.0git
InstructionCombining.cpp
Go to the documentation of this file.
1//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// InstructionCombining - Combine instructions to form fewer, simple
10// instructions. This pass does not modify the CFG. This pass is where
11// algebraic simplification happens.
12//
13// This pass combines things like:
14// %Y = add i32 %X, 1
15// %Z = add i32 %Y, 1
16// into:
17// %Z = add i32 %X, 2
18//
19// This is a simple worklist driven algorithm.
20//
21// This pass guarantees that the following canonicalizations are performed on
22// the program:
23// 1. If a binary operator has a constant operand, it is moved to the RHS
24// 2. Bitwise operators with constant operands are always grouped so that
25// shifts are performed first, then or's, then and's, then xor's.
26// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27// 4. All cmp instructions on boolean values are replaced with logical ops
28// 5. add X, X is represented as (X*2) => (X << 1)
29// 6. Multiplies with a power-of-two constant argument are transformed into
30// shifts.
31// ... etc.
32//
33//===----------------------------------------------------------------------===//
34
35#include "InstCombineInternal.h"
36#include "llvm/ADT/APInt.h"
37#include "llvm/ADT/ArrayRef.h"
38#include "llvm/ADT/DenseMap.h"
41#include "llvm/ADT/Statistic.h"
46#include "llvm/Analysis/CFG.h"
61#include "llvm/IR/BasicBlock.h"
62#include "llvm/IR/CFG.h"
63#include "llvm/IR/Constant.h"
64#include "llvm/IR/Constants.h"
65#include "llvm/IR/DIBuilder.h"
66#include "llvm/IR/DataLayout.h"
67#include "llvm/IR/DebugInfo.h"
69#include "llvm/IR/Dominators.h"
71#include "llvm/IR/Function.h"
73#include "llvm/IR/IRBuilder.h"
74#include "llvm/IR/InstrTypes.h"
75#include "llvm/IR/Instruction.h"
78#include "llvm/IR/Intrinsics.h"
79#include "llvm/IR/Metadata.h"
80#include "llvm/IR/Operator.h"
81#include "llvm/IR/PassManager.h"
83#include "llvm/IR/Type.h"
84#include "llvm/IR/Use.h"
85#include "llvm/IR/User.h"
86#include "llvm/IR/Value.h"
87#include "llvm/IR/ValueHandle.h"
92#include "llvm/Support/Debug.h"
100#include <algorithm>
101#include <cassert>
102#include <cstdint>
103#include <memory>
104#include <optional>
105#include <string>
106#include <utility>
107
108#define DEBUG_TYPE "instcombine"
110#include <optional>
111
112using namespace llvm;
113using namespace llvm::PatternMatch;
114
115STATISTIC(NumWorklistIterations,
116 "Number of instruction combining iterations performed");
117STATISTIC(NumOneIteration, "Number of functions with one iteration");
118STATISTIC(NumTwoIterations, "Number of functions with two iterations");
119STATISTIC(NumThreeIterations, "Number of functions with three iterations");
120STATISTIC(NumFourOrMoreIterations,
121 "Number of functions with four or more iterations");
122
123STATISTIC(NumCombined , "Number of insts combined");
124STATISTIC(NumConstProp, "Number of constant folds");
125STATISTIC(NumDeadInst , "Number of dead inst eliminated");
126STATISTIC(NumSunkInst , "Number of instructions sunk");
127STATISTIC(NumExpand, "Number of expansions");
128STATISTIC(NumFactor , "Number of factorizations");
129STATISTIC(NumReassoc , "Number of reassociations");
130DEBUG_COUNTER(VisitCounter, "instcombine-visit",
131 "Controls which instructions are visited");
132
133static cl::opt<bool>
134EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
135 cl::init(true));
136
138 "instcombine-max-sink-users", cl::init(32),
139 cl::desc("Maximum number of undroppable users for instruction sinking"));
140
142MaxArraySize("instcombine-maxarray-size", cl::init(1024),
143 cl::desc("Maximum array size considered when doing a combine"));
144
145// FIXME: Remove this flag when it is no longer necessary to convert
146// llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
147// increases variable availability at the cost of accuracy. Variables that
148// cannot be promoted by mem2reg or SROA will be described as living in memory
149// for their entire lifetime. However, passes like DSE and instcombine can
150// delete stores to the alloca, leading to misleading and inaccurate debug
151// information. This flag can be removed when those passes are fixed.
152static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
153 cl::Hidden, cl::init(true));
154
155std::optional<Instruction *>
157 // Handle target specific intrinsics
159 return TTI.instCombineIntrinsic(*this, II);
160 }
161 return std::nullopt;
162}
163
165 IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
166 bool &KnownBitsComputed) {
167 // Handle target specific intrinsics
169 return TTI.simplifyDemandedUseBitsIntrinsic(*this, II, DemandedMask, Known,
170 KnownBitsComputed);
171 }
172 return std::nullopt;
173}
174
176 IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2,
177 APInt &UndefElts3,
178 std::function<void(Instruction *, unsigned, APInt, APInt &)>
179 SimplifyAndSetOp) {
180 // Handle target specific intrinsics
183 *this, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
184 SimplifyAndSetOp);
185 }
186 return std::nullopt;
187}
188
189bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
190 return TTI.isValidAddrSpaceCast(FromAS, ToAS);
191}
192
193Value *InstCombinerImpl::EmitGEPOffset(User *GEP) {
195}
196
197/// Legal integers and common types are considered desirable. This is used to
198/// avoid creating instructions with types that may not be supported well by the
199/// the backend.
200/// NOTE: This treats i8, i16 and i32 specially because they are common
201/// types in frontend languages.
202bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
203 switch (BitWidth) {
204 case 8:
205 case 16:
206 case 32:
207 return true;
208 default:
209 return DL.isLegalInteger(BitWidth);
210 }
211}
212
213/// Return true if it is desirable to convert an integer computation from a
214/// given bit width to a new bit width.
215/// We don't want to convert from a legal or desirable type (like i8) to an
216/// illegal type or from a smaller to a larger illegal type. A width of '1'
217/// is always treated as a desirable type because i1 is a fundamental type in
218/// IR, and there are many specialized optimizations for i1 types.
219/// Common/desirable widths are equally treated as legal to convert to, in
220/// order to open up more combining opportunities.
221bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
222 unsigned ToWidth) const {
223 bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth);
224 bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth);
225
226 // Convert to desirable widths even if they are not legal types.
227 // Only shrink types, to prevent infinite loops.
228 if (ToWidth < FromWidth && isDesirableIntType(ToWidth))
229 return true;
230
231 // If this is a legal or desiable integer from type, and the result would be
232 // an illegal type, don't do the transformation.
233 if ((FromLegal || isDesirableIntType(FromWidth)) && !ToLegal)
234 return false;
235
236 // Otherwise, if both are illegal, do not increase the size of the result. We
237 // do allow things like i160 -> i64, but not i64 -> i160.
238 if (!FromLegal && !ToLegal && ToWidth > FromWidth)
239 return false;
240
241 return true;
242}
243
244/// Return true if it is desirable to convert a computation from 'From' to 'To'.
245/// We don't want to convert from a legal to an illegal type or from a smaller
246/// to a larger illegal type. i1 is always treated as a legal type because it is
247/// a fundamental type in IR, and there are many specialized optimizations for
248/// i1 types.
249bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const {
250 // TODO: This could be extended to allow vectors. Datalayout changes might be
251 // needed to properly support that.
252 if (!From->isIntegerTy() || !To->isIntegerTy())
253 return false;
254
255 unsigned FromWidth = From->getPrimitiveSizeInBits();
256 unsigned ToWidth = To->getPrimitiveSizeInBits();
257 return shouldChangeType(FromWidth, ToWidth);
258}
259
260// Return true, if No Signed Wrap should be maintained for I.
261// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
262// where both B and C should be ConstantInts, results in a constant that does
263// not overflow. This function only handles the Add and Sub opcodes. For
264// all other opcodes, the function conservatively returns false.
266 auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
267 if (!OBO || !OBO->hasNoSignedWrap())
268 return false;
269
270 // We reason about Add and Sub Only.
271 Instruction::BinaryOps Opcode = I.getOpcode();
272 if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
273 return false;
274
275 const APInt *BVal, *CVal;
276 if (!match(B, m_APInt(BVal)) || !match(C, m_APInt(CVal)))
277 return false;
278
279 bool Overflow = false;
280 if (Opcode == Instruction::Add)
281 (void)BVal->sadd_ov(*CVal, Overflow);
282 else
283 (void)BVal->ssub_ov(*CVal, Overflow);
284
285 return !Overflow;
286}
287
289 auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
290 return OBO && OBO->hasNoUnsignedWrap();
291}
292
294 auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
295 return OBO && OBO->hasNoSignedWrap();
296}
297
298/// Conservatively clears subclassOptionalData after a reassociation or
299/// commutation. We preserve fast-math flags when applicable as they can be
300/// preserved.
302 FPMathOperator *FPMO = dyn_cast<FPMathOperator>(&I);
303 if (!FPMO) {
304 I.clearSubclassOptionalData();
305 return;
306 }
307
308 FastMathFlags FMF = I.getFastMathFlags();
309 I.clearSubclassOptionalData();
310 I.setFastMathFlags(FMF);
311}
312
313/// Combine constant operands of associative operations either before or after a
314/// cast to eliminate one of the associative operations:
315/// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
316/// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
318 InstCombinerImpl &IC) {
319 auto *Cast = dyn_cast<CastInst>(BinOp1->getOperand(0));
320 if (!Cast || !Cast->hasOneUse())
321 return false;
322
323 // TODO: Enhance logic for other casts and remove this check.
324 auto CastOpcode = Cast->getOpcode();
325 if (CastOpcode != Instruction::ZExt)
326 return false;
327
328 // TODO: Enhance logic for other BinOps and remove this check.
329 if (!BinOp1->isBitwiseLogicOp())
330 return false;
331
332 auto AssocOpcode = BinOp1->getOpcode();
333 auto *BinOp2 = dyn_cast<BinaryOperator>(Cast->getOperand(0));
334 if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
335 return false;
336
337 Constant *C1, *C2;
338 if (!match(BinOp1->getOperand(1), m_Constant(C1)) ||
339 !match(BinOp2->getOperand(1), m_Constant(C2)))
340 return false;
341
342 // TODO: This assumes a zext cast.
343 // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
344 // to the destination type might lose bits.
345
346 // Fold the constants together in the destination type:
347 // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
348 const DataLayout &DL = IC.getDataLayout();
349 Type *DestTy = C1->getType();
350 Constant *CastC2 = ConstantFoldCastOperand(CastOpcode, C2, DestTy, DL);
351 if (!CastC2)
352 return false;
353 Constant *FoldedC = ConstantFoldBinaryOpOperands(AssocOpcode, C1, CastC2, DL);
354 if (!FoldedC)
355 return false;
356
357 IC.replaceOperand(*Cast, 0, BinOp2->getOperand(0));
358 IC.replaceOperand(*BinOp1, 1, FoldedC);
359 return true;
360}
361
362// Simplifies IntToPtr/PtrToInt RoundTrip Cast.
363// inttoptr ( ptrtoint (x) ) --> x
364Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
365 auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
366 if (IntToPtr && DL.getTypeSizeInBits(IntToPtr->getDestTy()) ==
367 DL.getTypeSizeInBits(IntToPtr->getSrcTy())) {
368 auto *PtrToInt = dyn_cast<PtrToIntInst>(IntToPtr->getOperand(0));
369 Type *CastTy = IntToPtr->getDestTy();
370 if (PtrToInt &&
371 CastTy->getPointerAddressSpace() ==
372 PtrToInt->getSrcTy()->getPointerAddressSpace() &&
373 DL.getTypeSizeInBits(PtrToInt->getSrcTy()) ==
374 DL.getTypeSizeInBits(PtrToInt->getDestTy()))
375 return PtrToInt->getOperand(0);
376 }
377 return nullptr;
378}
379
380/// This performs a few simplifications for operators that are associative or
381/// commutative:
382///
383/// Commutative operators:
384///
385/// 1. Order operands such that they are listed from right (least complex) to
386/// left (most complex). This puts constants before unary operators before
387/// binary operators.
388///
389/// Associative operators:
390///
391/// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
392/// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
393///
394/// Associative and commutative operators:
395///
396/// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
397/// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
398/// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
399/// if C1 and C2 are constants.
401 Instruction::BinaryOps Opcode = I.getOpcode();
402 bool Changed = false;
403
404 do {
405 // Order operands such that they are listed from right (least complex) to
406 // left (most complex). This puts constants before unary operators before
407 // binary operators.
408 if (I.isCommutative() && getComplexity(I.getOperand(0)) <
409 getComplexity(I.getOperand(1)))
410 Changed = !I.swapOperands();
411
412 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
413 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
414
415 if (I.isAssociative()) {
416 // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
417 if (Op0 && Op0->getOpcode() == Opcode) {
418 Value *A = Op0->getOperand(0);
419 Value *B = Op0->getOperand(1);
420 Value *C = I.getOperand(1);
421
422 // Does "B op C" simplify?
423 if (Value *V = simplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) {
424 // It simplifies to V. Form "A op V".
425 replaceOperand(I, 0, A);
426 replaceOperand(I, 1, V);
427 bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0);
428 bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(*Op0);
429
430 // Conservatively clear all optional flags since they may not be
431 // preserved by the reassociation. Reset nsw/nuw based on the above
432 // analysis.
434
435 // Note: this is only valid because SimplifyBinOp doesn't look at
436 // the operands to Op0.
437 if (IsNUW)
438 I.setHasNoUnsignedWrap(true);
439
440 if (IsNSW)
441 I.setHasNoSignedWrap(true);
442
443 Changed = true;
444 ++NumReassoc;
445 continue;
446 }
447 }
448
449 // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
450 if (Op1 && Op1->getOpcode() == Opcode) {
451 Value *A = I.getOperand(0);
452 Value *B = Op1->getOperand(0);
453 Value *C = Op1->getOperand(1);
454
455 // Does "A op B" simplify?
456 if (Value *V = simplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) {
457 // It simplifies to V. Form "V op C".
458 replaceOperand(I, 0, V);
459 replaceOperand(I, 1, C);
460 // Conservatively clear the optional flags, since they may not be
461 // preserved by the reassociation.
463 Changed = true;
464 ++NumReassoc;
465 continue;
466 }
467 }
468 }
469
470 if (I.isAssociative() && I.isCommutative()) {
471 if (simplifyAssocCastAssoc(&I, *this)) {
472 Changed = true;
473 ++NumReassoc;
474 continue;
475 }
476
477 // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
478 if (Op0 && Op0->getOpcode() == Opcode) {
479 Value *A = Op0->getOperand(0);
480 Value *B = Op0->getOperand(1);
481 Value *C = I.getOperand(1);
482
483 // Does "C op A" simplify?
484 if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
485 // It simplifies to V. Form "V op B".
486 replaceOperand(I, 0, V);
487 replaceOperand(I, 1, B);
488 // Conservatively clear the optional flags, since they may not be
489 // preserved by the reassociation.
491 Changed = true;
492 ++NumReassoc;
493 continue;
494 }
495 }
496
497 // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
498 if (Op1 && Op1->getOpcode() == Opcode) {
499 Value *A = I.getOperand(0);
500 Value *B = Op1->getOperand(0);
501 Value *C = Op1->getOperand(1);
502
503 // Does "C op A" simplify?
504 if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
505 // It simplifies to V. Form "B op V".
506 replaceOperand(I, 0, B);
507 replaceOperand(I, 1, V);
508 // Conservatively clear the optional flags, since they may not be
509 // preserved by the reassociation.
511 Changed = true;
512 ++NumReassoc;
513 continue;
514 }
515 }
516
517 // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
518 // if C1 and C2 are constants.
519 Value *A, *B;
520 Constant *C1, *C2, *CRes;
521 if (Op0 && Op1 &&
522 Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
523 match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) &&
524 match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2)))) &&
525 (CRes = ConstantFoldBinaryOpOperands(Opcode, C1, C2, DL))) {
526 bool IsNUW = hasNoUnsignedWrap(I) &&
527 hasNoUnsignedWrap(*Op0) &&
528 hasNoUnsignedWrap(*Op1);
529 BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
530 BinaryOperator::CreateNUW(Opcode, A, B) :
531 BinaryOperator::Create(Opcode, A, B);
532
533 if (isa<FPMathOperator>(NewBO)) {
534 FastMathFlags Flags = I.getFastMathFlags() &
535 Op0->getFastMathFlags() &
536 Op1->getFastMathFlags();
537 NewBO->setFastMathFlags(Flags);
538 }
539 InsertNewInstWith(NewBO, I.getIterator());
540 NewBO->takeName(Op1);
541 replaceOperand(I, 0, NewBO);
542 replaceOperand(I, 1, CRes);
543 // Conservatively clear the optional flags, since they may not be
544 // preserved by the reassociation.
546 if (IsNUW)
547 I.setHasNoUnsignedWrap(true);
548
549 Changed = true;
550 continue;
551 }
552 }
553
554 // No further simplifications.
555 return Changed;
556 } while (true);
557}
558
559/// Return whether "X LOp (Y ROp Z)" is always equal to
560/// "(X LOp Y) ROp (X LOp Z)".
563 // X & (Y | Z) <--> (X & Y) | (X & Z)
564 // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
565 if (LOp == Instruction::And)
566 return ROp == Instruction::Or || ROp == Instruction::Xor;
567
568 // X | (Y & Z) <--> (X | Y) & (X | Z)
569 if (LOp == Instruction::Or)
570 return ROp == Instruction::And;
571
572 // X * (Y + Z) <--> (X * Y) + (X * Z)
573 // X * (Y - Z) <--> (X * Y) - (X * Z)
574 if (LOp == Instruction::Mul)
575 return ROp == Instruction::Add || ROp == Instruction::Sub;
576
577 return false;
578}
579
580/// Return whether "(X LOp Y) ROp Z" is always equal to
581/// "(X ROp Z) LOp (Y ROp Z)".
585 return leftDistributesOverRight(ROp, LOp);
586
587 // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
589
590 // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
591 // but this requires knowing that the addition does not overflow and other
592 // such subtleties.
593}
594
595/// This function returns identity value for given opcode, which can be used to
596/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
598 if (isa<Constant>(V))
599 return nullptr;
600
601 return ConstantExpr::getBinOpIdentity(Opcode, V->getType());
602}
603
604/// This function predicates factorization using distributive laws. By default,
605/// it just returns the 'Op' inputs. But for special-cases like
606/// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
607/// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
608/// allow more factorization opportunities.
611 Value *&LHS, Value *&RHS) {
612 assert(Op && "Expected a binary operator");
613 LHS = Op->getOperand(0);
614 RHS = Op->getOperand(1);
615 if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) {
616 Constant *C;
617 if (match(Op, m_Shl(m_Value(), m_Constant(C)))) {
618 // X << C --> X * (1 << C)
619 RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), C);
620 return Instruction::Mul;
621 }
622 // TODO: We can add other conversions e.g. shr => div etc.
623 }
624 return Op->getOpcode();
625}
626
627/// This tries to simplify binary operations by factorizing out common terms
628/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
631 Instruction::BinaryOps InnerOpcode, Value *A,
632 Value *B, Value *C, Value *D) {
633 assert(A && B && C && D && "All values must be provided");
634
635 Value *V = nullptr;
636 Value *RetVal = nullptr;
637 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
638 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
639
640 // Does "X op' Y" always equal "Y op' X"?
641 bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
642
643 // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
644 if (leftDistributesOverRight(InnerOpcode, TopLevelOpcode)) {
645 // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
646 // commutative case, "(A op' B) op (C op' A)"?
647 if (A == C || (InnerCommutative && A == D)) {
648 if (A != C)
649 std::swap(C, D);
650 // Consider forming "A op' (B op D)".
651 // If "B op D" simplifies then it can be formed with no cost.
652 V = simplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
653
654 // If "B op D" doesn't simplify then only go on if one of the existing
655 // operations "A op' B" and "C op' D" will be zapped as no longer used.
656 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
657 V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
658 if (V)
659 RetVal = Builder.CreateBinOp(InnerOpcode, A, V);
660 }
661 }
662
663 // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
664 if (!RetVal && rightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) {
665 // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
666 // commutative case, "(A op' B) op (B op' D)"?
667 if (B == D || (InnerCommutative && B == C)) {
668 if (B != D)
669 std::swap(C, D);
670 // Consider forming "(A op C) op' B".
671 // If "A op C" simplifies then it can be formed with no cost.
672 V = simplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
673
674 // If "A op C" doesn't simplify then only go on if one of the existing
675 // operations "A op' B" and "C op' D" will be zapped as no longer used.
676 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
677 V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
678 if (V)
679 RetVal = Builder.CreateBinOp(InnerOpcode, V, B);
680 }
681 }
682
683 if (!RetVal)
684 return nullptr;
685
686 ++NumFactor;
687 RetVal->takeName(&I);
688
689 // Try to add no-overflow flags to the final value.
690 if (isa<OverflowingBinaryOperator>(RetVal)) {
691 bool HasNSW = false;
692 bool HasNUW = false;
693 if (isa<OverflowingBinaryOperator>(&I)) {
694 HasNSW = I.hasNoSignedWrap();
695 HasNUW = I.hasNoUnsignedWrap();
696 }
697 if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) {
698 HasNSW &= LOBO->hasNoSignedWrap();
699 HasNUW &= LOBO->hasNoUnsignedWrap();
700 }
701
702 if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) {
703 HasNSW &= ROBO->hasNoSignedWrap();
704 HasNUW &= ROBO->hasNoUnsignedWrap();
705 }
706
707 if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) {
708 // We can propagate 'nsw' if we know that
709 // %Y = mul nsw i16 %X, C
710 // %Z = add nsw i16 %Y, %X
711 // =>
712 // %Z = mul nsw i16 %X, C+1
713 //
714 // iff C+1 isn't INT_MIN
715 const APInt *CInt;
716 if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue())
717 cast<Instruction>(RetVal)->setHasNoSignedWrap(HasNSW);
718
719 // nuw can be propagated with any constant or nuw value.
720 cast<Instruction>(RetVal)->setHasNoUnsignedWrap(HasNUW);
721 }
722 }
723 return RetVal;
724}
725
726// (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C))
727// IFF
728// 1) the logic_shifts match
729// 2) either both binops are binops and one is `and` or
730// BinOp1 is `and`
731// (logic_shift (inv_logic_shift C1, C), C) == C1 or
732//
733// -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C)
734//
735// (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt))
736// IFF
737// 1) the logic_shifts match
738// 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`).
739//
740// -> (BinOp (logic_shift (BinOp X, Y)), Mask)
741//
742// (Binop1 (Binop2 (arithmetic_shift X, Amt), Mask), (arithmetic_shift Y, Amt))
743// IFF
744// 1) Binop1 is bitwise logical operator `and`, `or` or `xor`
745// 2) Binop2 is `not`
746//
747// -> (arithmetic_shift Binop1((not X), Y), Amt)
748
750 auto IsValidBinOpc = [](unsigned Opc) {
751 switch (Opc) {
752 default:
753 return false;
754 case Instruction::And:
755 case Instruction::Or:
756 case Instruction::Xor:
757 case Instruction::Add:
758 // Skip Sub as we only match constant masks which will canonicalize to use
759 // add.
760 return true;
761 }
762 };
763
764 // Check if we can distribute binop arbitrarily. `add` + `lshr` has extra
765 // constraints.
766 auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2,
767 unsigned ShOpc) {
768 assert(ShOpc != Instruction::AShr);
769 return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) ||
770 ShOpc == Instruction::Shl;
771 };
772
773 auto GetInvShift = [](unsigned ShOpc) {
774 assert(ShOpc != Instruction::AShr);
775 return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr;
776 };
777
778 auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2,
779 unsigned ShOpc, Constant *CMask,
780 Constant *CShift) {
781 // If the BinOp1 is `and` we don't need to check the mask.
782 if (BinOpc1 == Instruction::And)
783 return true;
784
785 // For all other possible transfers we need complete distributable
786 // binop/shift (anything but `add` + `lshr`).
787 if (!IsCompletelyDistributable(BinOpc1, BinOpc2, ShOpc))
788 return false;
789
790 // If BinOp2 is `and`, any mask works (this only really helps for non-splat
791 // vecs, otherwise the mask will be simplified and the following check will
792 // handle it).
793 if (BinOpc2 == Instruction::And)
794 return true;
795
796 // Otherwise, need mask that meets the below requirement.
797 // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask
798 return ConstantExpr::get(
799 ShOpc, ConstantExpr::get(GetInvShift(ShOpc), CMask, CShift),
800 CShift) == CMask;
801 };
802
803 auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * {
804 Constant *CMask, *CShift;
805 Value *X, *Y, *ShiftedX, *Mask, *Shift;
806 if (!match(I.getOperand(ShOpnum),
807 m_OneUse(m_Shift(m_Value(Y), m_Value(Shift)))))
808 return nullptr;
809 if (!match(I.getOperand(1 - ShOpnum),
810 m_BinOp(m_Value(ShiftedX), m_Value(Mask))))
811 return nullptr;
812
813 if (!match(ShiftedX, m_OneUse(m_Shift(m_Value(X), m_Specific(Shift)))))
814 return nullptr;
815
816 // Make sure we are matching instruction shifts and not ConstantExpr
817 auto *IY = dyn_cast<Instruction>(I.getOperand(ShOpnum));
818 auto *IX = dyn_cast<Instruction>(ShiftedX);
819 if (!IY || !IX)
820 return nullptr;
821
822 // LHS and RHS need same shift opcode
823 unsigned ShOpc = IY->getOpcode();
824 if (ShOpc != IX->getOpcode())
825 return nullptr;
826
827 // Make sure binop is real instruction and not ConstantExpr
828 auto *BO2 = dyn_cast<Instruction>(I.getOperand(1 - ShOpnum));
829 if (!BO2)
830 return nullptr;
831
832 unsigned BinOpc = BO2->getOpcode();
833 // Make sure we have valid binops.
834 if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc))
835 return nullptr;
836
837 if (ShOpc == Instruction::AShr) {
838 if (Instruction::isBitwiseLogicOp(I.getOpcode()) &&
839 BinOpc == Instruction::Xor && match(Mask, m_AllOnes())) {
840 Value *NotX = Builder.CreateNot(X);
841 Value *NewBinOp = Builder.CreateBinOp(I.getOpcode(), Y, NotX);
843 static_cast<Instruction::BinaryOps>(ShOpc), NewBinOp, Shift);
844 }
845
846 return nullptr;
847 }
848
849 // If BinOp1 == BinOp2 and it's bitwise or shl with add, then just
850 // distribute to drop the shift irrelevant of constants.
851 if (BinOpc == I.getOpcode() &&
852 IsCompletelyDistributable(I.getOpcode(), BinOpc, ShOpc)) {
853 Value *NewBinOp2 = Builder.CreateBinOp(I.getOpcode(), X, Y);
854 Value *NewBinOp1 = Builder.CreateBinOp(
855 static_cast<Instruction::BinaryOps>(ShOpc), NewBinOp2, Shift);
856 return BinaryOperator::Create(I.getOpcode(), NewBinOp1, Mask);
857 }
858
859 // Otherwise we can only distribute by constant shifting the mask, so
860 // ensure we have constants.
861 if (!match(Shift, m_ImmConstant(CShift)))
862 return nullptr;
863 if (!match(Mask, m_ImmConstant(CMask)))
864 return nullptr;
865
866 // Check if we can distribute the binops.
867 if (!CanDistributeBinops(I.getOpcode(), BinOpc, ShOpc, CMask, CShift))
868 return nullptr;
869
870 Constant *NewCMask = ConstantExpr::get(GetInvShift(ShOpc), CMask, CShift);
871 Value *NewBinOp2 = Builder.CreateBinOp(
872 static_cast<Instruction::BinaryOps>(BinOpc), X, NewCMask);
873 Value *NewBinOp1 = Builder.CreateBinOp(I.getOpcode(), Y, NewBinOp2);
874 return BinaryOperator::Create(static_cast<Instruction::BinaryOps>(ShOpc),
875 NewBinOp1, CShift);
876 };
877
878 if (Instruction *R = MatchBinOp(0))
879 return R;
880 return MatchBinOp(1);
881}
882
883// (Binop (zext C), (select C, T, F))
884// -> (select C, (binop 1, T), (binop 0, F))
885//
886// (Binop (sext C), (select C, T, F))
887// -> (select C, (binop -1, T), (binop 0, F))
888//
889// Attempt to simplify binary operations into a select with folded args, when
890// one operand of the binop is a select instruction and the other operand is a
891// zext/sext extension, whose value is the select condition.
894 // TODO: this simplification may be extended to any speculatable instruction,
895 // not just binops, and would possibly be handled better in FoldOpIntoSelect.
896 Instruction::BinaryOps Opc = I.getOpcode();
897 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
898 Value *A, *CondVal, *TrueVal, *FalseVal;
899 Value *CastOp;
900
901 auto MatchSelectAndCast = [&](Value *CastOp, Value *SelectOp) {
902 return match(CastOp, m_ZExtOrSExt(m_Value(A))) &&
903 A->getType()->getScalarSizeInBits() == 1 &&
904 match(SelectOp, m_Select(m_Value(CondVal), m_Value(TrueVal),
905 m_Value(FalseVal)));
906 };
907
908 // Make sure one side of the binop is a select instruction, and the other is a
909 // zero/sign extension operating on a i1.
910 if (MatchSelectAndCast(LHS, RHS))
911 CastOp = LHS;
912 else if (MatchSelectAndCast(RHS, LHS))
913 CastOp = RHS;
914 else
915 return nullptr;
916
917 auto NewFoldedConst = [&](bool IsTrueArm, Value *V) {
918 bool IsCastOpRHS = (CastOp == RHS);
919 bool IsZExt = isa<ZExtOperator>(CastOp);
920 Constant *C;
921
922 if (IsTrueArm) {
923 C = Constant::getNullValue(V->getType());
924 } else if (IsZExt) {
925 unsigned BitWidth = V->getType()->getScalarSizeInBits();
926 C = Constant::getIntegerValue(V->getType(), APInt(BitWidth, 1));
927 } else {
928 C = Constant::getAllOnesValue(V->getType());
929 }
930
931 return IsCastOpRHS ? Builder.CreateBinOp(Opc, V, C)
932 : Builder.CreateBinOp(Opc, C, V);
933 };
934
935 // If the value used in the zext/sext is the select condition, or the negated
936 // of the select condition, the binop can be simplified.
937 if (CondVal == A) {
938 Value *NewTrueVal = NewFoldedConst(false, TrueVal);
939 return SelectInst::Create(CondVal, NewTrueVal,
940 NewFoldedConst(true, FalseVal));
941 }
942
943 if (match(A, m_Not(m_Specific(CondVal)))) {
944 Value *NewTrueVal = NewFoldedConst(true, TrueVal);
945 return SelectInst::Create(CondVal, NewTrueVal,
946 NewFoldedConst(false, FalseVal));
947 }
948
949 return nullptr;
950}
951
953 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
954 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
955 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
956 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
957 Value *A, *B, *C, *D;
958 Instruction::BinaryOps LHSOpcode, RHSOpcode;
959
960 if (Op0)
961 LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B);
962 if (Op1)
963 RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D);
964
965 // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
966 // a common term.
967 if (Op0 && Op1 && LHSOpcode == RHSOpcode)
968 if (Value *V = tryFactorization(I, SQ, Builder, LHSOpcode, A, B, C, D))
969 return V;
970
971 // The instruction has the form "(A op' B) op (C)". Try to factorize common
972 // term.
973 if (Op0)
974 if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
975 if (Value *V =
976 tryFactorization(I, SQ, Builder, LHSOpcode, A, B, RHS, Ident))
977 return V;
978
979 // The instruction has the form "(B) op (C op' D)". Try to factorize common
980 // term.
981 if (Op1)
982 if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
983 if (Value *V =
984 tryFactorization(I, SQ, Builder, RHSOpcode, LHS, Ident, C, D))
985 return V;
986
987 return nullptr;
988}
989
990/// This tries to simplify binary operations which some other binary operation
991/// distributes over either by factorizing out common terms
992/// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
993/// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
994/// Returns the simplified value, or null if it didn't simplify.
996 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
997 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
998 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
999 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1000
1001 // Factorization.
1002 if (Value *R = tryFactorizationFolds(I))
1003 return R;
1004
1005 // Expansion.
1006 if (Op0 && rightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
1007 // The instruction has the form "(A op' B) op C". See if expanding it out
1008 // to "(A op C) op' (B op C)" results in simplifications.
1009 Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
1010 Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
1011
1012 // Disable the use of undef because it's not safe to distribute undef.
1013 auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
1014 Value *L = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
1015 Value *R = simplifyBinOp(TopLevelOpcode, B, C, SQDistributive);
1016
1017 // Do "A op C" and "B op C" both simplify?
1018 if (L && R) {
1019 // They do! Return "L op' R".
1020 ++NumExpand;
1021 C = Builder.CreateBinOp(InnerOpcode, L, R);
1022 C->takeName(&I);
1023 return C;
1024 }
1025
1026 // Does "A op C" simplify to the identity value for the inner opcode?
1027 if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
1028 // They do! Return "B op C".
1029 ++NumExpand;
1030 C = Builder.CreateBinOp(TopLevelOpcode, B, C);
1031 C->takeName(&I);
1032 return C;
1033 }
1034
1035 // Does "B op C" simplify to the identity value for the inner opcode?
1036 if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
1037 // They do! Return "A op C".
1038 ++NumExpand;
1039 C = Builder.CreateBinOp(TopLevelOpcode, A, C);
1040 C->takeName(&I);
1041 return C;
1042 }
1043 }
1044
1045 if (Op1 && leftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
1046 // The instruction has the form "A op (B op' C)". See if expanding it out
1047 // to "(A op B) op' (A op C)" results in simplifications.
1048 Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
1049 Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
1050
1051 // Disable the use of undef because it's not safe to distribute undef.
1052 auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
1053 Value *L = simplifyBinOp(TopLevelOpcode, A, B, SQDistributive);
1054 Value *R = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
1055
1056 // Do "A op B" and "A op C" both simplify?
1057 if (L && R) {
1058 // They do! Return "L op' R".
1059 ++NumExpand;
1060 A = Builder.CreateBinOp(InnerOpcode, L, R);
1061 A->takeName(&I);
1062 return A;
1063 }
1064
1065 // Does "A op B" simplify to the identity value for the inner opcode?
1066 if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
1067 // They do! Return "A op C".
1068 ++NumExpand;
1069 A = Builder.CreateBinOp(TopLevelOpcode, A, C);
1070 A->takeName(&I);
1071 return A;
1072 }
1073
1074 // Does "A op C" simplify to the identity value for the inner opcode?
1075 if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
1076 // They do! Return "A op B".
1077 ++NumExpand;
1078 A = Builder.CreateBinOp(TopLevelOpcode, A, B);
1079 A->takeName(&I);
1080 return A;
1081 }
1082 }
1083
1085}
1086
1088 Value *LHS,
1089 Value *RHS) {
1090 Value *A, *B, *C, *D, *E, *F;
1091 bool LHSIsSelect = match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C)));
1092 bool RHSIsSelect = match(RHS, m_Select(m_Value(D), m_Value(E), m_Value(F)));
1093 if (!LHSIsSelect && !RHSIsSelect)
1094 return nullptr;
1095
1096 FastMathFlags FMF;
1098 if (isa<FPMathOperator>(&I)) {
1099 FMF = I.getFastMathFlags();
1101 }
1102
1103 Instruction::BinaryOps Opcode = I.getOpcode();
1105
1106 Value *Cond, *True = nullptr, *False = nullptr;
1107
1108 // Special-case for add/negate combination. Replace the zero in the negation
1109 // with the trailing add operand:
1110 // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N)
1111 // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False
1112 auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * {
1113 // We need an 'add' and exactly 1 arm of the select to have been simplified.
1114 if (Opcode != Instruction::Add || (!True && !False) || (True && False))
1115 return nullptr;
1116
1117 Value *N;
1118 if (True && match(FVal, m_Neg(m_Value(N)))) {
1119 Value *Sub = Builder.CreateSub(Z, N);
1120 return Builder.CreateSelect(Cond, True, Sub, I.getName());
1121 }
1122 if (False && match(TVal, m_Neg(m_Value(N)))) {
1123 Value *Sub = Builder.CreateSub(Z, N);
1124 return Builder.CreateSelect(Cond, Sub, False, I.getName());
1125 }
1126 return nullptr;
1127 };
1128
1129 if (LHSIsSelect && RHSIsSelect && A == D) {
1130 // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
1131 Cond = A;
1132 True = simplifyBinOp(Opcode, B, E, FMF, Q);
1133 False = simplifyBinOp(Opcode, C, F, FMF, Q);
1134
1135 if (LHS->hasOneUse() && RHS->hasOneUse()) {
1136 if (False && !True)
1137 True = Builder.CreateBinOp(Opcode, B, E);
1138 else if (True && !False)
1139 False = Builder.CreateBinOp(Opcode, C, F);
1140 }
1141 } else if (LHSIsSelect && LHS->hasOneUse()) {
1142 // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
1143 Cond = A;
1144 True = simplifyBinOp(Opcode, B, RHS, FMF, Q);
1145 False = simplifyBinOp(Opcode, C, RHS, FMF, Q);
1146 if (Value *NewSel = foldAddNegate(B, C, RHS))
1147 return NewSel;
1148 } else if (RHSIsSelect && RHS->hasOneUse()) {
1149 // X op (D ? E : F) -> D ? (X op E) : (X op F)
1150 Cond = D;
1151 True = simplifyBinOp(Opcode, LHS, E, FMF, Q);
1152 False = simplifyBinOp(Opcode, LHS, F, FMF, Q);
1153 if (Value *NewSel = foldAddNegate(E, F, LHS))
1154 return NewSel;
1155 }
1156
1157 if (!True || !False)
1158 return nullptr;
1159
1160 Value *SI = Builder.CreateSelect(Cond, True, False);
1161 SI->takeName(&I);
1162 return SI;
1163}
1164
1165/// Freely adapt every user of V as-if V was changed to !V.
1166/// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
1168 assert(!isa<Constant>(I) && "Shouldn't invert users of constant");
1169 for (User *U : make_early_inc_range(I->users())) {
1170 if (U == IgnoredUser)
1171 continue; // Don't consider this user.
1172 switch (cast<Instruction>(U)->getOpcode()) {
1173 case Instruction::Select: {
1174 auto *SI = cast<SelectInst>(U);
1175 SI->swapValues();
1176 SI->swapProfMetadata();
1177 break;
1178 }
1179 case Instruction::Br:
1180 cast<BranchInst>(U)->swapSuccessors(); // swaps prof metadata too
1181 break;
1182 case Instruction::Xor:
1183 replaceInstUsesWith(cast<Instruction>(*U), I);
1184 break;
1185 default:
1186 llvm_unreachable("Got unexpected user - out of sync with "
1187 "canFreelyInvertAllUsersOf() ?");
1188 }
1189 }
1190}
1191
1192/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
1193/// constant zero (which is the 'negate' form).
1194Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
1195 Value *NegV;
1196 if (match(V, m_Neg(m_Value(NegV))))
1197 return NegV;
1198
1199 // Constants can be considered to be negated values if they can be folded.
1200 if (ConstantInt *C = dyn_cast<ConstantInt>(V))
1201 return ConstantExpr::getNeg(C);
1202
1203 if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V))
1204 if (C->getType()->getElementType()->isIntegerTy())
1205 return ConstantExpr::getNeg(C);
1206
1207 if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
1208 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
1209 Constant *Elt = CV->getAggregateElement(i);
1210 if (!Elt)
1211 return nullptr;
1212
1213 if (isa<UndefValue>(Elt))
1214 continue;
1215
1216 if (!isa<ConstantInt>(Elt))
1217 return nullptr;
1218 }
1219 return ConstantExpr::getNeg(CV);
1220 }
1221
1222 // Negate integer vector splats.
1223 if (auto *CV = dyn_cast<Constant>(V))
1224 if (CV->getType()->isVectorTy() &&
1225 CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
1226 return ConstantExpr::getNeg(CV);
1227
1228 return nullptr;
1229}
1230
1231/// A binop with a constant operand and a sign-extended boolean operand may be
1232/// converted into a select of constants by applying the binary operation to
1233/// the constant with the two possible values of the extended boolean (0 or -1).
1234Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
1235 // TODO: Handle non-commutative binop (constant is operand 0).
1236 // TODO: Handle zext.
1237 // TODO: Peek through 'not' of cast.
1238 Value *BO0 = BO.getOperand(0);
1239 Value *BO1 = BO.getOperand(1);
1240 Value *X;
1241 Constant *C;
1242 if (!match(BO0, m_SExt(m_Value(X))) || !match(BO1, m_ImmConstant(C)) ||
1243 !X->getType()->isIntOrIntVectorTy(1))
1244 return nullptr;
1245
1246 // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
1249 Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C);
1250 Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C);
1251 return SelectInst::Create(X, TVal, FVal);
1252}
1253
1255 SelectInst *SI,
1256 bool IsTrueArm) {
1257 SmallVector<Constant *> ConstOps;
1258 for (Value *Op : I.operands()) {
1259 CmpInst::Predicate Pred;
1260 Constant *C = nullptr;
1261 if (Op == SI) {
1262 C = dyn_cast<Constant>(IsTrueArm ? SI->getTrueValue()
1263 : SI->getFalseValue());
1264 } else if (match(SI->getCondition(),
1265 m_ICmp(Pred, m_Specific(Op), m_Constant(C))) &&
1266 Pred == (IsTrueArm ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) &&
1268 // Pass
1269 } else {
1270 C = dyn_cast<Constant>(Op);
1271 }
1272 if (C == nullptr)
1273 return nullptr;
1274
1275 ConstOps.push_back(C);
1276 }
1277
1278 return ConstantFoldInstOperands(&I, ConstOps, I.getModule()->getDataLayout());
1279}
1280
1282 Value *NewOp, InstCombiner &IC) {
1283 Instruction *Clone = I.clone();
1284 Clone->replaceUsesOfWith(SI, NewOp);
1285 IC.InsertNewInstBefore(Clone, SI->getIterator());
1286 return Clone;
1287}
1288
1290 bool FoldWithMultiUse) {
1291 // Don't modify shared select instructions unless set FoldWithMultiUse
1292 if (!SI->hasOneUse() && !FoldWithMultiUse)
1293 return nullptr;
1294
1295 Value *TV = SI->getTrueValue();
1296 Value *FV = SI->getFalseValue();
1297 if (!(isa<Constant>(TV) || isa<Constant>(FV)))
1298 return nullptr;
1299
1300 // Bool selects with constant operands can be folded to logical ops.
1301 if (SI->getType()->isIntOrIntVectorTy(1))
1302 return nullptr;
1303
1304 // If it's a bitcast involving vectors, make sure it has the same number of
1305 // elements on both sides.
1306 if (auto *BC = dyn_cast<BitCastInst>(&Op)) {
1307 VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy());
1308 VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy());
1309
1310 // Verify that either both or neither are vectors.
1311 if ((SrcTy == nullptr) != (DestTy == nullptr))
1312 return nullptr;
1313
1314 // If vectors, verify that they have the same number of elements.
1315 if (SrcTy && SrcTy->getElementCount() != DestTy->getElementCount())
1316 return nullptr;
1317 }
1318
1319 // Test if a FCmpInst instruction is used exclusively by a select as
1320 // part of a minimum or maximum operation. If so, refrain from doing
1321 // any other folding. This helps out other analyses which understand
1322 // non-obfuscated minimum and maximum idioms. And in this case, at
1323 // least one of the comparison operands has at least one user besides
1324 // the compare (the select), which would often largely negate the
1325 // benefit of folding anyway.
1326 if (auto *CI = dyn_cast<FCmpInst>(SI->getCondition())) {
1327 if (CI->hasOneUse()) {
1328 Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
1329 if ((TV == Op0 && FV == Op1) || (FV == Op0 && TV == Op1))
1330 return nullptr;
1331 }
1332 }
1333
1334 // Make sure that one of the select arms constant folds successfully.
1335 Value *NewTV = constantFoldOperationIntoSelectOperand(Op, SI, /*IsTrueArm*/ true);
1336 Value *NewFV = constantFoldOperationIntoSelectOperand(Op, SI, /*IsTrueArm*/ false);
1337 if (!NewTV && !NewFV)
1338 return nullptr;
1339
1340 // Create an instruction for the arm that did not fold.
1341 if (!NewTV)
1342 NewTV = foldOperationIntoSelectOperand(Op, SI, TV, *this);
1343 if (!NewFV)
1344 NewFV = foldOperationIntoSelectOperand(Op, SI, FV, *this);
1345 return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);
1346}
1347
1349 Value *InValue, BasicBlock *InBB,
1350 const DataLayout &DL,
1351 const SimplifyQuery SQ) {
1352 // NB: It is a precondition of this transform that the operands be
1353 // phi translatable! This is usually trivially satisfied by limiting it
1354 // to constant ops, and for selects we do a more sophisticated check.
1356 for (Value *Op : I.operands()) {
1357 if (Op == PN)
1358 Ops.push_back(InValue);
1359 else
1360 Ops.push_back(Op->DoPHITranslation(PN->getParent(), InBB));
1361 }
1362
1363 // Don't consider the simplification successful if we get back a constant
1364 // expression. That's just an instruction in hiding.
1365 // Also reject the case where we simplify back to the phi node. We wouldn't
1366 // be able to remove it in that case.
1368 &I, Ops, SQ.getWithInstruction(InBB->getTerminator()));
1369 if (NewVal && NewVal != PN && !match(NewVal, m_ConstantExpr()))
1370 return NewVal;
1371
1372 // Check if incoming PHI value can be replaced with constant
1373 // based on implied condition.
1374 BranchInst *TerminatorBI = dyn_cast<BranchInst>(InBB->getTerminator());
1375 const ICmpInst *ICmp = dyn_cast<ICmpInst>(&I);
1376 if (TerminatorBI && TerminatorBI->isConditional() &&
1377 TerminatorBI->getSuccessor(0) != TerminatorBI->getSuccessor(1) && ICmp) {
1378 bool LHSIsTrue = TerminatorBI->getSuccessor(0) == PN->getParent();
1379 std::optional<bool> ImpliedCond =
1380 isImpliedCondition(TerminatorBI->getCondition(), ICmp->getPredicate(),
1381 Ops[0], Ops[1], DL, LHSIsTrue);
1382 if (ImpliedCond)
1383 return ConstantInt::getBool(I.getType(), ImpliedCond.value());
1384 }
1385
1386 return nullptr;
1387}
1388
1390 unsigned NumPHIValues = PN->getNumIncomingValues();
1391 if (NumPHIValues == 0)
1392 return nullptr;
1393
1394 // We normally only transform phis with a single use. However, if a PHI has
1395 // multiple uses and they are all the same operation, we can fold *all* of the
1396 // uses into the PHI.
1397 if (!PN->hasOneUse()) {
1398 // Walk the use list for the instruction, comparing them to I.
1399 for (User *U : PN->users()) {
1400 Instruction *UI = cast<Instruction>(U);
1401 if (UI != &I && !I.isIdenticalTo(UI))
1402 return nullptr;
1403 }
1404 // Otherwise, we can replace *all* users with the new PHI we form.
1405 }
1406
1407 // Check to see whether the instruction can be folded into each phi operand.
1408 // If there is one operand that does not fold, remember the BB it is in.
1409 // If there is more than one or if *it* is a PHI, bail out.
1410 SmallVector<Value *> NewPhiValues;
1411 BasicBlock *NonSimplifiedBB = nullptr;
1412 Value *NonSimplifiedInVal = nullptr;
1413 for (unsigned i = 0; i != NumPHIValues; ++i) {
1414 Value *InVal = PN->getIncomingValue(i);
1415 BasicBlock *InBB = PN->getIncomingBlock(i);
1416
1417 if (auto *NewVal = simplifyInstructionWithPHI(I, PN, InVal, InBB, DL, SQ)) {
1418 NewPhiValues.push_back(NewVal);
1419 continue;
1420 }
1421
1422 if (NonSimplifiedBB) return nullptr; // More than one non-simplified value.
1423
1424 NonSimplifiedBB = InBB;
1425 NonSimplifiedInVal = InVal;
1426 NewPhiValues.push_back(nullptr);
1427
1428 // If the InVal is an invoke at the end of the pred block, then we can't
1429 // insert a computation after it without breaking the edge.
1430 if (isa<InvokeInst>(InVal))
1431 if (cast<Instruction>(InVal)->getParent() == NonSimplifiedBB)
1432 return nullptr;
1433
1434 // If the incoming non-constant value is reachable from the phis block,
1435 // we'll push the operation across a loop backedge. This could result in
1436 // an infinite combine loop, and is generally non-profitable (especially
1437 // if the operation was originally outside the loop).
1438 if (isPotentiallyReachable(PN->getParent(), NonSimplifiedBB, nullptr, &DT,
1439 LI))
1440 return nullptr;
1441 }
1442
1443 // If there is exactly one non-simplified value, we can insert a copy of the
1444 // operation in that block. However, if this is a critical edge, we would be
1445 // inserting the computation on some other paths (e.g. inside a loop). Only
1446 // do this if the pred block is unconditionally branching into the phi block.
1447 // Also, make sure that the pred block is not dead code.
1448 if (NonSimplifiedBB != nullptr) {
1449 BranchInst *BI = dyn_cast<BranchInst>(NonSimplifiedBB->getTerminator());
1450 if (!BI || !BI->isUnconditional() ||
1451 !DT.isReachableFromEntry(NonSimplifiedBB))
1452 return nullptr;
1453 }
1454
1455 // Okay, we can do the transformation: create the new PHI node.
1456 PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());
1457 InsertNewInstBefore(NewPN, PN->getIterator());
1458 NewPN->takeName(PN);
1459 NewPN->setDebugLoc(PN->getDebugLoc());
1460
1461 // If we are going to have to insert a new computation, do so right before the
1462 // predecessor's terminator.
1463 Instruction *Clone = nullptr;
1464 if (NonSimplifiedBB) {
1465 Clone = I.clone();
1466 for (Use &U : Clone->operands()) {
1467 if (U == PN)
1468 U = NonSimplifiedInVal;
1469 else
1470 U = U->DoPHITranslation(PN->getParent(), NonSimplifiedBB);
1471 }
1472 InsertNewInstBefore(Clone, NonSimplifiedBB->getTerminator()->getIterator());
1473 }
1474
1475 for (unsigned i = 0; i != NumPHIValues; ++i) {
1476 if (NewPhiValues[i])
1477 NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));
1478 else
1479 NewPN->addIncoming(Clone, PN->getIncomingBlock(i));
1480 }
1481
1482 for (User *U : make_early_inc_range(PN->users())) {
1483 Instruction *User = cast<Instruction>(U);
1484 if (User == &I) continue;
1485 replaceInstUsesWith(*User, NewPN);
1487 }
1488
1489 replaceAllDbgUsesWith(const_cast<PHINode &>(*PN),
1490 const_cast<PHINode &>(*NewPN),
1491 const_cast<PHINode &>(*PN), DT);
1492 return replaceInstUsesWith(I, NewPN);
1493}
1494
1496 // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
1497 // we are guarding against replicating the binop in >1 predecessor.
1498 // This could miss matching a phi with 2 constant incoming values.
1499 auto *Phi0 = dyn_cast<PHINode>(BO.getOperand(0));
1500 auto *Phi1 = dyn_cast<PHINode>(BO.getOperand(1));
1501 if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() ||
1502 Phi0->getNumOperands() != Phi1->getNumOperands())
1503 return nullptr;
1504
1505 // TODO: Remove the restriction for binop being in the same block as the phis.
1506 if (BO.getParent() != Phi0->getParent() ||
1507 BO.getParent() != Phi1->getParent())
1508 return nullptr;
1509
1510 // Fold if there is at least one specific constant value in phi0 or phi1's
1511 // incoming values that comes from the same block and this specific constant
1512 // value can be used to do optimization for specific binary operator.
1513 // For example:
1514 // %phi0 = phi i32 [0, %bb0], [%i, %bb1]
1515 // %phi1 = phi i32 [%j, %bb0], [0, %bb1]
1516 // %add = add i32 %phi0, %phi1
1517 // ==>
1518 // %add = phi i32 [%j, %bb0], [%i, %bb1]
1520 /*AllowRHSConstant*/ false);
1521 if (C) {
1522 SmallVector<Value *, 4> NewIncomingValues;
1523 auto CanFoldIncomingValuePair = [&](std::tuple<Use &, Use &> T) {
1524 auto &Phi0Use = std::get<0>(T);
1525 auto &Phi1Use = std::get<1>(T);
1526 if (Phi0->getIncomingBlock(Phi0Use) != Phi1->getIncomingBlock(Phi1Use))
1527 return false;
1528 Value *Phi0UseV = Phi0Use.get();
1529 Value *Phi1UseV = Phi1Use.get();
1530 if (Phi0UseV == C)
1531 NewIncomingValues.push_back(Phi1UseV);
1532 else if (Phi1UseV == C)
1533 NewIncomingValues.push_back(Phi0UseV);
1534 else
1535 return false;
1536 return true;
1537 };
1538
1539 if (all_of(zip(Phi0->operands(), Phi1->operands()),
1540 CanFoldIncomingValuePair)) {
1541 PHINode *NewPhi =
1542 PHINode::Create(Phi0->getType(), Phi0->getNumOperands());
1543 assert(NewIncomingValues.size() == Phi0->getNumOperands() &&
1544 "The number of collected incoming values should equal the number "
1545 "of the original PHINode operands!");
1546 for (unsigned I = 0; I < Phi0->getNumOperands(); I++)
1547 NewPhi->addIncoming(NewIncomingValues[I], Phi0->getIncomingBlock(I));
1548 return NewPhi;
1549 }
1550 }
1551
1552 if (Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
1553 return nullptr;
1554
1555 // Match a pair of incoming constants for one of the predecessor blocks.
1556 BasicBlock *ConstBB, *OtherBB;
1557 Constant *C0, *C1;
1558 if (match(Phi0->getIncomingValue(0), m_ImmConstant(C0))) {
1559 ConstBB = Phi0->getIncomingBlock(0);
1560 OtherBB = Phi0->getIncomingBlock(1);
1561 } else if (match(Phi0->getIncomingValue(1), m_ImmConstant(C0))) {
1562 ConstBB = Phi0->getIncomingBlock(1);
1563 OtherBB = Phi0->getIncomingBlock(0);
1564 } else {
1565 return nullptr;
1566 }
1567 if (!match(Phi1->getIncomingValueForBlock(ConstBB), m_ImmConstant(C1)))
1568 return nullptr;
1569
1570 // The block that we are hoisting to must reach here unconditionally.
1571 // Otherwise, we could be speculatively executing an expensive or
1572 // non-speculative op.
1573 auto *PredBlockBranch = dyn_cast<BranchInst>(OtherBB->getTerminator());
1574 if (!PredBlockBranch || PredBlockBranch->isConditional() ||
1575 !DT.isReachableFromEntry(OtherBB))
1576 return nullptr;
1577
1578 // TODO: This check could be tightened to only apply to binops (div/rem) that
1579 // are not safe to speculatively execute. But that could allow hoisting
1580 // potentially expensive instructions (fdiv for example).
1581 for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
1583 return nullptr;
1584
1585 // Fold constants for the predecessor block with constant incoming values.
1586 Constant *NewC = ConstantFoldBinaryOpOperands(BO.getOpcode(), C0, C1, DL);
1587 if (!NewC)
1588 return nullptr;
1589
1590 // Make a new binop in the predecessor block with the non-constant incoming
1591 // values.
1592 Builder.SetInsertPoint(PredBlockBranch);
1593 Value *NewBO = Builder.CreateBinOp(BO.getOpcode(),
1594 Phi0->getIncomingValueForBlock(OtherBB),
1595 Phi1->getIncomingValueForBlock(OtherBB));
1596 if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(NewBO))
1597 NotFoldedNewBO->copyIRFlags(&BO);
1598
1599 // Replace the binop with a phi of the new values. The old phis are dead.
1600 PHINode *NewPhi = PHINode::Create(BO.getType(), 2);
1601 NewPhi->addIncoming(NewBO, OtherBB);
1602 NewPhi->addIncoming(NewC, ConstBB);
1603 return NewPhi;
1604}
1605
1607 if (!isa<Constant>(I.getOperand(1)))
1608 return nullptr;
1609
1610 if (auto *Sel = dyn_cast<SelectInst>(I.getOperand(0))) {
1611 if (Instruction *NewSel = FoldOpIntoSelect(I, Sel))
1612 return NewSel;
1613 } else if (auto *PN = dyn_cast<PHINode>(I.getOperand(0))) {
1614 if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
1615 return NewPhi;
1616 }
1617 return nullptr;
1618}
1619
1621 // If this GEP has only 0 indices, it is the same pointer as
1622 // Src. If Src is not a trivial GEP too, don't combine
1623 // the indices.
1624 if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
1625 !Src.hasOneUse())
1626 return false;
1627 return true;
1628}
1629
1631 if (!isa<VectorType>(Inst.getType()))
1632 return nullptr;
1633
1634 BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
1635 Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
1636 assert(cast<VectorType>(LHS->getType())->getElementCount() ==
1637 cast<VectorType>(Inst.getType())->getElementCount());
1638 assert(cast<VectorType>(RHS->getType())->getElementCount() ==
1639 cast<VectorType>(Inst.getType())->getElementCount());
1640
1641 // If both operands of the binop are vector concatenations, then perform the
1642 // narrow binop on each pair of the source operands followed by concatenation
1643 // of the results.
1644 Value *L0, *L1, *R0, *R1;
1645 ArrayRef<int> Mask;
1646 if (match(LHS, m_Shuffle(m_Value(L0), m_Value(L1), m_Mask(Mask))) &&
1647 match(RHS, m_Shuffle(m_Value(R0), m_Value(R1), m_SpecificMask(Mask))) &&
1648 LHS->hasOneUse() && RHS->hasOneUse() &&
1649 cast<ShuffleVectorInst>(LHS)->isConcat() &&
1650 cast<ShuffleVectorInst>(RHS)->isConcat()) {
1651 // This transform does not have the speculative execution constraint as
1652 // below because the shuffle is a concatenation. The new binops are
1653 // operating on exactly the same elements as the existing binop.
1654 // TODO: We could ease the mask requirement to allow different undef lanes,
1655 // but that requires an analysis of the binop-with-undef output value.
1656 Value *NewBO0 = Builder.CreateBinOp(Opcode, L0, R0);
1657 if (auto *BO = dyn_cast<BinaryOperator>(NewBO0))
1658 BO->copyIRFlags(&Inst);
1659 Value *NewBO1 = Builder.CreateBinOp(Opcode, L1, R1);
1660 if (auto *BO = dyn_cast<BinaryOperator>(NewBO1))
1661 BO->copyIRFlags(&Inst);
1662 return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
1663 }
1664
1665 auto createBinOpReverse = [&](Value *X, Value *Y) {
1666 Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName());
1667 if (auto *BO = dyn_cast<BinaryOperator>(V))
1668 BO->copyIRFlags(&Inst);
1669 Module *M = Inst.getModule();
1671 M, Intrinsic::experimental_vector_reverse, V->getType());
1672 return CallInst::Create(F, V);
1673 };
1674
1675 // NOTE: Reverse shuffles don't require the speculative execution protection
1676 // below because they don't affect which lanes take part in the computation.
1677
1678 Value *V1, *V2;
1679 if (match(LHS, m_VecReverse(m_Value(V1)))) {
1680 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
1681 if (match(RHS, m_VecReverse(m_Value(V2))) &&
1682 (LHS->hasOneUse() || RHS->hasOneUse() ||
1683 (LHS == RHS && LHS->hasNUses(2))))
1684 return createBinOpReverse(V1, V2);
1685
1686 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
1687 if (LHS->hasOneUse() && isSplatValue(RHS))
1688 return createBinOpReverse(V1, RHS);
1689 }
1690 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
1691 else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2)))))
1692 return createBinOpReverse(LHS, V2);
1693
1694 // It may not be safe to reorder shuffles and things like div, urem, etc.
1695 // because we may trap when executing those ops on unknown vector elements.
1696 // See PR20059.
1697 if (!isSafeToSpeculativelyExecute(&Inst))
1698 return nullptr;
1699
1700 auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) {
1701 Value *XY = Builder.CreateBinOp(Opcode, X, Y);
1702 if (auto *BO = dyn_cast<BinaryOperator>(XY))
1703 BO->copyIRFlags(&Inst);
1704 return new ShuffleVectorInst(XY, M);
1705 };
1706
1707 // If both arguments of the binary operation are shuffles that use the same
1708 // mask and shuffle within a single vector, move the shuffle after the binop.
1709 if (match(LHS, m_Shuffle(m_Value(V1), m_Undef(), m_Mask(Mask))) &&
1710 match(RHS, m_Shuffle(m_Value(V2), m_Undef(), m_SpecificMask(Mask))) &&
1711 V1->getType() == V2->getType() &&
1712 (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
1713 // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
1714 return createBinOpShuffle(V1, V2, Mask);
1715 }
1716
1717 // If both arguments of a commutative binop are select-shuffles that use the
1718 // same mask with commuted operands, the shuffles are unnecessary.
1719 if (Inst.isCommutative() &&
1720 match(LHS, m_Shuffle(m_Value(V1), m_Value(V2), m_Mask(Mask))) &&
1721 match(RHS,
1722 m_Shuffle(m_Specific(V2), m_Specific(V1), m_SpecificMask(Mask)))) {
1723 auto *LShuf = cast<ShuffleVectorInst>(LHS);
1724 auto *RShuf = cast<ShuffleVectorInst>(RHS);
1725 // TODO: Allow shuffles that contain undefs in the mask?
1726 // That is legal, but it reduces undef knowledge.
1727 // TODO: Allow arbitrary shuffles by shuffling after binop?
1728 // That might be legal, but we have to deal with poison.
1729 if (LShuf->isSelect() &&
1730 !is_contained(LShuf->getShuffleMask(), PoisonMaskElem) &&
1731 RShuf->isSelect() &&
1732 !is_contained(RShuf->getShuffleMask(), PoisonMaskElem)) {
1733 // Example:
1734 // LHS = shuffle V1, V2, <0, 5, 6, 3>
1735 // RHS = shuffle V2, V1, <0, 5, 6, 3>
1736 // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
1737 Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
1738 NewBO->copyIRFlags(&Inst);
1739 return NewBO;
1740 }
1741 }
1742
1743 // If one argument is a shuffle within one vector and the other is a constant,
1744 // try moving the shuffle after the binary operation. This canonicalization
1745 // intends to move shuffles closer to other shuffles and binops closer to
1746 // other binops, so they can be folded. It may also enable demanded elements
1747 // transforms.
1748 Constant *C;
1749 auto *InstVTy = dyn_cast<FixedVectorType>(Inst.getType());
1750 if (InstVTy &&
1751 match(&Inst,
1753 m_ImmConstant(C))) &&
1754 cast<FixedVectorType>(V1->getType())->getNumElements() <=
1755 InstVTy->getNumElements()) {
1756 assert(InstVTy->getScalarType() == V1->getType()->getScalarType() &&
1757 "Shuffle should not change scalar type");
1758
1759 // Find constant NewC that has property:
1760 // shuffle(NewC, ShMask) = C
1761 // If such constant does not exist (example: ShMask=<0,0> and C=<1,2>)
1762 // reorder is not possible. A 1-to-1 mapping is not required. Example:
1763 // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef>
1764 bool ConstOp1 = isa<Constant>(RHS);
1765 ArrayRef<int> ShMask = Mask;
1766 unsigned SrcVecNumElts =
1767 cast<FixedVectorType>(V1->getType())->getNumElements();
1768 UndefValue *UndefScalar = UndefValue::get(C->getType()->getScalarType());
1769 SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, UndefScalar);
1770 bool MayChange = true;
1771 unsigned NumElts = InstVTy->getNumElements();
1772 for (unsigned I = 0; I < NumElts; ++I) {
1773 Constant *CElt = C->getAggregateElement(I);
1774 if (ShMask[I] >= 0) {
1775 assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
1776 Constant *NewCElt = NewVecC[ShMask[I]];
1777 // Bail out if:
1778 // 1. The constant vector contains a constant expression.
1779 // 2. The shuffle needs an element of the constant vector that can't
1780 // be mapped to a new constant vector.
1781 // 3. This is a widening shuffle that copies elements of V1 into the
1782 // extended elements (extending with undef is allowed).
1783 if (!CElt || (!isa<UndefValue>(NewCElt) && NewCElt != CElt) ||
1784 I >= SrcVecNumElts) {
1785 MayChange = false;
1786 break;
1787 }
1788 NewVecC[ShMask[I]] = CElt;
1789 }
1790 // If this is a widening shuffle, we must be able to extend with undef
1791 // elements. If the original binop does not produce an undef in the high
1792 // lanes, then this transform is not safe.
1793 // Similarly for undef lanes due to the shuffle mask, we can only
1794 // transform binops that preserve undef.
1795 // TODO: We could shuffle those non-undef constant values into the
1796 // result by using a constant vector (rather than an undef vector)
1797 // as operand 1 of the new binop, but that might be too aggressive
1798 // for target-independent shuffle creation.
1799 if (I >= SrcVecNumElts || ShMask[I] < 0) {
1800 Constant *MaybeUndef =
1801 ConstOp1
1802 ? ConstantFoldBinaryOpOperands(Opcode, UndefScalar, CElt, DL)
1803 : ConstantFoldBinaryOpOperands(Opcode, CElt, UndefScalar, DL);
1804 if (!MaybeUndef || !match(MaybeUndef, m_Undef())) {
1805 MayChange = false;
1806 break;
1807 }
1808 }
1809 }
1810 if (MayChange) {
1811 Constant *NewC = ConstantVector::get(NewVecC);
1812 // It may not be safe to execute a binop on a vector with undef elements
1813 // because the entire instruction can be folded to undef or create poison
1814 // that did not exist in the original code.
1815 if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1))
1816 NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1);
1817
1818 // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
1819 // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
1820 Value *NewLHS = ConstOp1 ? V1 : NewC;
1821 Value *NewRHS = ConstOp1 ? NewC : V1;
1822 return createBinOpShuffle(NewLHS, NewRHS, Mask);
1823 }
1824 }
1825
1826 // Try to reassociate to sink a splat shuffle after a binary operation.
1827 if (Inst.isAssociative() && Inst.isCommutative()) {
1828 // Canonicalize shuffle operand as LHS.
1829 if (isa<ShuffleVectorInst>(RHS))
1830 std::swap(LHS, RHS);
1831
1832 Value *X;
1833 ArrayRef<int> MaskC;
1834 int SplatIndex;
1835 Value *Y, *OtherOp;
1836 if (!match(LHS,
1837 m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(MaskC)))) ||
1838 !match(MaskC, m_SplatOrUndefMask(SplatIndex)) ||
1839 X->getType() != Inst.getType() ||
1840 !match(RHS, m_OneUse(m_BinOp(Opcode, m_Value(Y), m_Value(OtherOp)))))
1841 return nullptr;
1842
1843 // FIXME: This may not be safe if the analysis allows undef elements. By
1844 // moving 'Y' before the splat shuffle, we are implicitly assuming
1845 // that it is not undef/poison at the splat index.
1846 if (isSplatValue(OtherOp, SplatIndex)) {
1847 std::swap(Y, OtherOp);
1848 } else if (!isSplatValue(Y, SplatIndex)) {
1849 return nullptr;
1850 }
1851
1852 // X and Y are splatted values, so perform the binary operation on those
1853 // values followed by a splat followed by the 2nd binary operation:
1854 // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
1855 Value *NewBO = Builder.CreateBinOp(Opcode, X, Y);
1856 SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
1857 Value *NewSplat = Builder.CreateShuffleVector(NewBO, NewMask);
1858 Instruction *R = BinaryOperator::Create(Opcode, NewSplat, OtherOp);
1859
1860 // Intersect FMF on both new binops. Other (poison-generating) flags are
1861 // dropped to be safe.
1862 if (isa<FPMathOperator>(R)) {
1863 R->copyFastMathFlags(&Inst);
1864 R->andIRFlags(RHS);
1865 }
1866 if (auto *NewInstBO = dyn_cast<BinaryOperator>(NewBO))
1867 NewInstBO->copyIRFlags(R);
1868 return R;
1869 }
1870
1871 return nullptr;
1872}
1873
1874/// Try to narrow the width of a binop if at least 1 operand is an extend of
1875/// of a value. This requires a potentially expensive known bits check to make
1876/// sure the narrow op does not overflow.
1877Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
1878 // We need at least one extended operand.
1879 Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1);
1880
1881 // If this is a sub, we swap the operands since we always want an extension
1882 // on the RHS. The LHS can be an extension or a constant.
1883 if (BO.getOpcode() == Instruction::Sub)
1884 std::swap(Op0, Op1);
1885
1886 Value *X;
1887 bool IsSext = match(Op0, m_SExt(m_Value(X)));
1888 if (!IsSext && !match(Op0, m_ZExt(m_Value(X))))
1889 return nullptr;
1890
1891 // If both operands are the same extension from the same source type and we
1892 // can eliminate at least one (hasOneUse), this might work.
1893 CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
1894 Value *Y;
1895 if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
1896 cast<Operator>(Op1)->getOpcode() == CastOpc &&
1897 (Op0->hasOneUse() || Op1->hasOneUse()))) {
1898 // If that did not match, see if we have a suitable constant operand.
1899 // Truncating and extending must produce the same constant.
1900 Constant *WideC;
1901 if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC)))
1902 return nullptr;
1903 Constant *NarrowC = getLosslessTrunc(WideC, X->getType(), CastOpc);
1904 if (!NarrowC)
1905 return nullptr;
1906 Y = NarrowC;
1907 }
1908
1909 // Swap back now that we found our operands.
1910 if (BO.getOpcode() == Instruction::Sub)
1911 std::swap(X, Y);
1912
1913 // Both operands have narrow versions. Last step: the math must not overflow
1914 // in the narrow width.
1915 if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext))
1916 return nullptr;
1917
1918 // bo (ext X), (ext Y) --> ext (bo X, Y)
1919 // bo (ext X), C --> ext (bo X, C')
1920 Value *NarrowBO = Builder.CreateBinOp(BO.getOpcode(), X, Y, "narrow");
1921 if (auto *NewBinOp = dyn_cast<BinaryOperator>(NarrowBO)) {
1922 if (IsSext)
1923 NewBinOp->setHasNoSignedWrap();
1924 else
1925 NewBinOp->setHasNoUnsignedWrap();
1926 }
1927 return CastInst::Create(CastOpc, NarrowBO, BO.getType());
1928}
1929
1931 // At least one GEP must be inbounds.
1932 if (!GEP1.isInBounds() && !GEP2.isInBounds())
1933 return false;
1934
1935 return (GEP1.isInBounds() || GEP1.hasAllZeroIndices()) &&
1936 (GEP2.isInBounds() || GEP2.hasAllZeroIndices());
1937}
1938
1939/// Thread a GEP operation with constant indices through the constant true/false
1940/// arms of a select.
1942 InstCombiner::BuilderTy &Builder) {
1943 if (!GEP.hasAllConstantIndices())
1944 return nullptr;
1945
1946 Instruction *Sel;
1947 Value *Cond;
1948 Constant *TrueC, *FalseC;
1949 if (!match(GEP.getPointerOperand(), m_Instruction(Sel)) ||
1950 !match(Sel,
1951 m_Select(m_Value(Cond), m_Constant(TrueC), m_Constant(FalseC))))
1952 return nullptr;
1953
1954 // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
1955 // Propagate 'inbounds' and metadata from existing instructions.
1956 // Note: using IRBuilder to create the constants for efficiency.
1957 SmallVector<Value *, 4> IndexC(GEP.indices());
1958 bool IsInBounds = GEP.isInBounds();
1959 Type *Ty = GEP.getSourceElementType();
1960 Value *NewTrueC = Builder.CreateGEP(Ty, TrueC, IndexC, "", IsInBounds);
1961 Value *NewFalseC = Builder.CreateGEP(Ty, FalseC, IndexC, "", IsInBounds);
1962 return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel);
1963}
1964
1966 GEPOperator *Src) {
1967 // Combine Indices - If the source pointer to this getelementptr instruction
1968 // is a getelementptr instruction with matching element type, combine the
1969 // indices of the two getelementptr instructions into a single instruction.
1970 if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
1971 return nullptr;
1972
1973 // For constant GEPs, use a more general offset-based folding approach.
1974 Type *PtrTy = Src->getType()->getScalarType();
1975 if (GEP.hasAllConstantIndices() &&
1976 (Src->hasOneUse() || Src->hasAllConstantIndices())) {
1977 // Split Src into a variable part and a constant suffix.
1979 Type *BaseType = GTI.getIndexedType();
1980 bool IsFirstType = true;
1981 unsigned NumVarIndices = 0;
1982 for (auto Pair : enumerate(Src->indices())) {
1983 if (!isa<ConstantInt>(Pair.value())) {
1984 BaseType = GTI.getIndexedType();
1985 IsFirstType = false;
1986 NumVarIndices = Pair.index() + 1;
1987 }
1988 ++GTI;
1989 }
1990
1991 // Determine the offset for the constant suffix of Src.
1993 if (NumVarIndices != Src->getNumIndices()) {
1994 // FIXME: getIndexedOffsetInType() does not handled scalable vectors.
1995 if (BaseType->isScalableTy())
1996 return nullptr;
1997
1998 SmallVector<Value *> ConstantIndices;
1999 if (!IsFirstType)
2000 ConstantIndices.push_back(
2002 append_range(ConstantIndices, drop_begin(Src->indices(), NumVarIndices));
2003 Offset += DL.getIndexedOffsetInType(BaseType, ConstantIndices);
2004 }
2005
2006 // Add the offset for GEP (which is fully constant).
2007 if (!GEP.accumulateConstantOffset(DL, Offset))
2008 return nullptr;
2009
2010 APInt OffsetOld = Offset;
2011 // Convert the total offset back into indices.
2012 SmallVector<APInt> ConstIndices =
2014 if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero())) {
2015 // If both GEP are constant-indexed, and cannot be merged in either way,
2016 // convert them to a GEP of i8.
2017 if (Src->hasAllConstantIndices())
2018 return replaceInstUsesWith(
2020 Builder.getInt8Ty(), Src->getOperand(0),
2021 Builder.getInt(OffsetOld), "",
2022 isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))));
2023 return nullptr;
2024 }
2025
2026 bool IsInBounds = isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP));
2027 SmallVector<Value *> Indices;
2028 append_range(Indices, drop_end(Src->indices(),
2029 Src->getNumIndices() - NumVarIndices));
2030 for (const APInt &Idx : drop_begin(ConstIndices, !IsFirstType)) {
2031 Indices.push_back(ConstantInt::get(GEP.getContext(), Idx));
2032 // Even if the total offset is inbounds, we may end up representing it
2033 // by first performing a larger negative offset, and then a smaller
2034 // positive one. The large negative offset might go out of bounds. Only
2035 // preserve inbounds if all signs are the same.
2036 IsInBounds &= Idx.isNonNegative() == ConstIndices[0].isNonNegative();
2037 }
2038
2039 return replaceInstUsesWith(
2040 GEP, Builder.CreateGEP(Src->getSourceElementType(), Src->getOperand(0),
2041 Indices, "", IsInBounds));
2042 }
2043
2044 if (Src->getResultElementType() != GEP.getSourceElementType())
2045 return nullptr;
2046
2047 SmallVector<Value*, 8> Indices;
2048
2049 // Find out whether the last index in the source GEP is a sequential idx.
2050 bool EndsWithSequential = false;
2051 for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
2052 I != E; ++I)
2053 EndsWithSequential = I.isSequential();
2054
2055 // Can we combine the two pointer arithmetics offsets?
2056 if (EndsWithSequential) {
2057 // Replace: gep (gep %P, long B), long A, ...
2058 // With: T = long A+B; gep %P, T, ...
2059 Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
2060 Value *GO1 = GEP.getOperand(1);
2061
2062 // If they aren't the same type, then the input hasn't been processed
2063 // by the loop above yet (which canonicalizes sequential index types to
2064 // intptr_t). Just avoid transforming this until the input has been
2065 // normalized.
2066 if (SO1->getType() != GO1->getType())
2067 return nullptr;
2068
2069 Value *Sum =
2070 simplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP));
2071 // Only do the combine when we are sure the cost after the
2072 // merge is never more than that before the merge.
2073 if (Sum == nullptr)
2074 return nullptr;
2075
2076 // Update the GEP in place if possible.
2077 if (Src->getNumOperands() == 2) {
2078 GEP.setIsInBounds(isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP)));
2079 replaceOperand(GEP, 0, Src->getOperand(0));
2080 replaceOperand(GEP, 1, Sum);
2081 return &GEP;
2082 }
2083 Indices.append(Src->op_begin()+1, Src->op_end()-1);
2084 Indices.push_back(Sum);
2085 Indices.append(GEP.op_begin()+2, GEP.op_end());
2086 } else if (isa<Constant>(*GEP.idx_begin()) &&
2087 cast<Constant>(*GEP.idx_begin())->isNullValue() &&
2088 Src->getNumOperands() != 1) {
2089 // Otherwise we can do the fold if the first index of the GEP is a zero
2090 Indices.append(Src->op_begin()+1, Src->op_end());
2091 Indices.append(GEP.idx_begin()+1, GEP.idx_end());
2092 }
2093
2094 if (!Indices.empty())
2095 return replaceInstUsesWith(
2097 Src->getSourceElementType(), Src->getOperand(0), Indices, "",
2098 isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))));
2099
2100 return nullptr;
2101}
2102
2104 Value *PtrOp = GEP.getOperand(0);
2105 SmallVector<Value *, 8> Indices(GEP.indices());
2106 Type *GEPType = GEP.getType();
2107 Type *GEPEltType = GEP.getSourceElementType();
2108 bool IsGEPSrcEleScalable = GEPEltType->isScalableTy();
2109 if (Value *V = simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(),
2111 return replaceInstUsesWith(GEP, V);
2112
2113 // For vector geps, use the generic demanded vector support.
2114 // Skip if GEP return type is scalable. The number of elements is unknown at
2115 // compile-time.
2116 if (auto *GEPFVTy = dyn_cast<FixedVectorType>(GEPType)) {
2117 auto VWidth = GEPFVTy->getNumElements();
2118 APInt UndefElts(VWidth, 0);
2119 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2120 if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
2121 UndefElts)) {
2122 if (V != &GEP)
2123 return replaceInstUsesWith(GEP, V);
2124 return &GEP;
2125 }
2126
2127 // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
2128 // possible (decide on canonical form for pointer broadcast), 3) exploit
2129 // undef elements to decrease demanded bits
2130 }
2131
2132 // Eliminate unneeded casts for indices, and replace indices which displace
2133 // by multiples of a zero size type with zero.
2134 bool MadeChange = false;
2135
2136 // Index width may not be the same width as pointer width.
2137 // Data layout chooses the right type based on supported integer types.
2138 Type *NewScalarIndexTy =
2139 DL.getIndexType(GEP.getPointerOperandType()->getScalarType());
2140
2142 for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
2143 ++I, ++GTI) {
2144 // Skip indices into struct types.
2145 if (GTI.isStruct())
2146 continue;
2147
2148 Type *IndexTy = (*I)->getType();
2149 Type *NewIndexType =
2150 IndexTy->isVectorTy()
2151 ? VectorType::get(NewScalarIndexTy,
2152 cast<VectorType>(IndexTy)->getElementCount())
2153 : NewScalarIndexTy;
2154
2155 // If the element type has zero size then any index over it is equivalent
2156 // to an index of zero, so replace it with zero if it is not zero already.
2157 Type *EltTy = GTI.getIndexedType();
2158 if (EltTy->isSized() && DL.getTypeAllocSize(EltTy).isZero())
2159 if (!isa<Constant>(*I) || !match(I->get(), m_Zero())) {
2160 *I = Constant::getNullValue(NewIndexType);
2161 MadeChange = true;
2162 }
2163
2164 if (IndexTy != NewIndexType) {
2165 // If we are using a wider index than needed for this platform, shrink
2166 // it to what we need. If narrower, sign-extend it to what we need.
2167 // This explicit cast can make subsequent optimizations more obvious.
2168 *I = Builder.CreateIntCast(*I, NewIndexType, true);
2169 MadeChange = true;
2170 }
2171 }
2172 if (MadeChange)
2173 return &GEP;
2174
2175 // Check to see if the inputs to the PHI node are getelementptr instructions.
2176 if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
2177 auto *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
2178 if (!Op1)
2179 return nullptr;
2180
2181 // Don't fold a GEP into itself through a PHI node. This can only happen
2182 // through the back-edge of a loop. Folding a GEP into itself means that
2183 // the value of the previous iteration needs to be stored in the meantime,
2184 // thus requiring an additional register variable to be live, but not
2185 // actually achieving anything (the GEP still needs to be executed once per
2186 // loop iteration).
2187 if (Op1 == &GEP)
2188 return nullptr;
2189
2190 int DI = -1;
2191
2192 for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
2193 auto *Op2 = dyn_cast<GetElementPtrInst>(*I);
2194 if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() ||
2195 Op1->getSourceElementType() != Op2->getSourceElementType())
2196 return nullptr;
2197
2198 // As for Op1 above, don't try to fold a GEP into itself.
2199 if (Op2 == &GEP)
2200 return nullptr;
2201
2202 // Keep track of the type as we walk the GEP.
2203 Type *CurTy = nullptr;
2204
2205 for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
2206 if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType())
2207 return nullptr;
2208
2209 if (Op1->getOperand(J) != Op2->getOperand(J)) {
2210 if (DI == -1) {
2211 // We have not seen any differences yet in the GEPs feeding the
2212 // PHI yet, so we record this one if it is allowed to be a
2213 // variable.
2214
2215 // The first two arguments can vary for any GEP, the rest have to be
2216 // static for struct slots
2217 if (J > 1) {
2218 assert(CurTy && "No current type?");
2219 if (CurTy->isStructTy())
2220 return nullptr;
2221 }
2222
2223 DI = J;
2224 } else {
2225 // The GEP is different by more than one input. While this could be
2226 // extended to support GEPs that vary by more than one variable it
2227 // doesn't make sense since it greatly increases the complexity and
2228 // would result in an R+R+R addressing mode which no backend
2229 // directly supports and would need to be broken into several
2230 // simpler instructions anyway.
2231 return nullptr;
2232 }
2233 }
2234
2235 // Sink down a layer of the type for the next iteration.
2236 if (J > 0) {
2237 if (J == 1) {
2238 CurTy = Op1->getSourceElementType();
2239 } else {
2240 CurTy =
2241 GetElementPtrInst::getTypeAtIndex(CurTy, Op1->getOperand(J));
2242 }
2243 }
2244 }
2245 }
2246
2247 // If not all GEPs are identical we'll have to create a new PHI node.
2248 // Check that the old PHI node has only one use so that it will get
2249 // removed.
2250 if (DI != -1 && !PN->hasOneUse())
2251 return nullptr;
2252
2253 auto *NewGEP = cast<GetElementPtrInst>(Op1->clone());
2254 if (DI == -1) {
2255 // All the GEPs feeding the PHI are identical. Clone one down into our
2256 // BB so that it can be merged with the current GEP.
2257 } else {
2258 // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
2259 // into the current block so it can be merged, and create a new PHI to
2260 // set that index.
2261 PHINode *NewPN;
2262 {
2265 NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(),
2266 PN->getNumOperands());
2267 }
2268
2269 for (auto &I : PN->operands())
2270 NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
2271 PN->getIncomingBlock(I));
2272
2273 NewGEP->setOperand(DI, NewPN);
2274 }
2275
2276 NewGEP->insertBefore(*GEP.getParent(), GEP.getParent()->getFirstInsertionPt());
2277 return replaceOperand(GEP, 0, NewGEP);
2278 }
2279
2280 if (auto *Src = dyn_cast<GEPOperator>(PtrOp))
2281 if (Instruction *I = visitGEPOfGEP(GEP, Src))
2282 return I;
2283
2284 // Skip if GEP source element type is scalable. The type alloc size is unknown
2285 // at compile-time.
2286 if (GEP.getNumIndices() == 1 && !IsGEPSrcEleScalable) {
2287 unsigned AS = GEP.getPointerAddressSpace();
2288 if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
2289 DL.getIndexSizeInBits(AS)) {
2290 uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedValue();
2291
2292 bool Matched = false;
2293 uint64_t C;
2294 Value *V = nullptr;
2295 if (TyAllocSize == 1) {
2296 V = GEP.getOperand(1);
2297 Matched = true;
2298 } else if (match(GEP.getOperand(1),
2299 m_AShr(m_Value(V), m_ConstantInt(C)))) {
2300 if (TyAllocSize == 1ULL << C)
2301 Matched = true;
2302 } else if (match(GEP.getOperand(1),
2303 m_SDiv(m_Value(V), m_ConstantInt(C)))) {
2304 if (TyAllocSize == C)
2305 Matched = true;
2306 }
2307
2308 // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y), but
2309 // only if both point to the same underlying object (otherwise provenance
2310 // is not necessarily retained).
2311 Value *Y;
2312 Value *X = GEP.getOperand(0);
2313 if (Matched &&
2317 }
2318 }
2319
2320 // We do not handle pointer-vector geps here.
2321 if (GEPType->isVectorTy())
2322 return nullptr;
2323
2324 if (!GEP.isInBounds()) {
2325 unsigned IdxWidth =
2327 APInt BasePtrOffset(IdxWidth, 0);
2328 Value *UnderlyingPtrOp =
2330 BasePtrOffset);
2331 bool CanBeNull, CanBeFreed;
2332 uint64_t DerefBytes = UnderlyingPtrOp->getPointerDereferenceableBytes(
2333 DL, CanBeNull, CanBeFreed);
2334 if (!CanBeNull && !CanBeFreed && DerefBytes != 0) {
2335 if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
2336 BasePtrOffset.isNonNegative()) {
2337 APInt AllocSize(IdxWidth, DerefBytes);
2338 if (BasePtrOffset.ule(AllocSize)) {
2340 GEP.getSourceElementType(), PtrOp, Indices, GEP.getName());
2341 }
2342 }
2343 }
2344 }
2345
2347 return R;
2348
2349 return nullptr;
2350}
2351
2353 Instruction *AI) {
2354 if (isa<ConstantPointerNull>(V))
2355 return true;
2356 if (auto *LI = dyn_cast<LoadInst>(V))
2357 return isa<GlobalVariable>(LI->getPointerOperand());
2358 // Two distinct allocations will never be equal.
2359 return isAllocLikeFn(V, &TLI) && V != AI;
2360}
2361
2362/// Given a call CB which uses an address UsedV, return true if we can prove the
2363/// call's only possible effect is storing to V.
2364static bool isRemovableWrite(CallBase &CB, Value *UsedV,
2365 const TargetLibraryInfo &TLI) {
2366 if (!CB.use_empty())
2367 // TODO: add recursion if returned attribute is present
2368 return false;
2369
2370 if (CB.isTerminator())
2371 // TODO: remove implementation restriction
2372 return false;
2373
2374 if (!CB.willReturn() || !CB.doesNotThrow())
2375 return false;
2376
2377 // If the only possible side effect of the call is writing to the alloca,
2378 // and the result isn't used, we can safely remove any reads implied by the
2379 // call including those which might read the alloca itself.
2380 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(&CB, TLI);
2381 return Dest && Dest->Ptr == UsedV;
2382}
2383
2386 const TargetLibraryInfo &TLI) {
2388 const std::optional<StringRef> Family = getAllocationFamily(AI, &TLI);
2389 Worklist.push_back(AI);
2390
2391 do {
2392 Instruction *PI = Worklist.pop_back_val();
2393 for (User *U : PI->users()) {
2394 Instruction *I = cast<Instruction>(U);
2395 switch (I->getOpcode()) {
2396 default:
2397 // Give up the moment we see something we can't handle.
2398 return false;
2399
2400 case Instruction::AddrSpaceCast:
2401 case Instruction::BitCast:
2402 case Instruction::GetElementPtr:
2403 Users.emplace_back(I);
2404 Worklist.push_back(I);
2405 continue;
2406
2407 case Instruction::ICmp: {
2408 ICmpInst *ICI = cast<ICmpInst>(I);
2409 // We can fold eq/ne comparisons with null to false/true, respectively.
2410 // We also fold comparisons in some conditions provided the alloc has
2411 // not escaped (see isNeverEqualToUnescapedAlloc).
2412 if (!ICI->isEquality())
2413 return false;
2414 unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0;
2415 if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))
2416 return false;
2417 Users.emplace_back(I);
2418 continue;
2419 }
2420
2421 case Instruction::Call:
2422 // Ignore no-op and store intrinsics.
2423 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
2424 switch (II->getIntrinsicID()) {
2425 default:
2426 return false;
2427
2428 case Intrinsic::memmove:
2429 case Intrinsic::memcpy:
2430 case Intrinsic::memset: {
2431 MemIntrinsic *MI = cast<MemIntrinsic>(II);
2432 if (MI->isVolatile() || MI->getRawDest() != PI)
2433 return false;
2434 [[fallthrough]];
2435 }
2436 case Intrinsic::assume:
2437 case Intrinsic::invariant_start:
2438 case Intrinsic::invariant_end:
2439 case Intrinsic::lifetime_start:
2440 case Intrinsic::lifetime_end:
2441 case Intrinsic::objectsize:
2442 Users.emplace_back(I);
2443 continue;
2444 case Intrinsic::launder_invariant_group:
2445 case Intrinsic::strip_invariant_group:
2446 Users.emplace_back(I);
2447 Worklist.push_back(I);
2448 continue;
2449 }
2450 }
2451
2452 if (isRemovableWrite(*cast<CallBase>(I), PI, TLI)) {
2453 Users.emplace_back(I);
2454 continue;
2455 }
2456
2457 if (getFreedOperand(cast<CallBase>(I), &TLI) == PI &&
2458 getAllocationFamily(I, &TLI) == Family) {
2459 assert(Family);
2460 Users.emplace_back(I);
2461 continue;
2462 }
2463
2464 if (getReallocatedOperand(cast<CallBase>(I)) == PI &&
2465 getAllocationFamily(I, &TLI) == Family) {
2466 assert(Family);
2467 Users.emplace_back(I);
2468 Worklist.push_back(I);
2469 continue;
2470 }
2471
2472 return false;
2473
2474 case Instruction::Store: {
2475 StoreInst *SI = cast<StoreInst>(I);
2476 if (SI->isVolatile() || SI->getPointerOperand() != PI)
2477 return false;
2478 Users.emplace_back(I);
2479 continue;
2480 }
2481 }
2482 llvm_unreachable("missing a return?");
2483 }
2484 } while (!Worklist.empty());
2485 return true;
2486}
2487
2489 assert(isa<AllocaInst>(MI) || isRemovableAlloc(&cast<CallBase>(MI), &TLI));
2490
2491 // If we have a malloc call which is only used in any amount of comparisons to
2492 // null and free calls, delete the calls and replace the comparisons with true
2493 // or false as appropriate.
2494
2495 // This is based on the principle that we can substitute our own allocation
2496 // function (which will never return null) rather than knowledge of the
2497 // specific function being called. In some sense this can change the permitted
2498 // outputs of a program (when we convert a malloc to an alloca, the fact that
2499 // the allocation is now on the stack is potentially visible, for example),
2500 // but we believe in a permissible manner.
2502
2503 // If we are removing an alloca with a dbg.declare, insert dbg.value calls
2504 // before each store.
2506 std::unique_ptr<DIBuilder> DIB;
2507 if (isa<AllocaInst>(MI)) {
2508 findDbgUsers(DVIs, &MI);
2509 DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
2510 }
2511
2512 if (isAllocSiteRemovable(&MI, Users, TLI)) {
2513 for (unsigned i = 0, e = Users.size(); i != e; ++i) {
2514 // Lowering all @llvm.objectsize calls first because they may
2515 // use a bitcast/GEP of the alloca we are removing.
2516 if (!Users[i])
2517 continue;
2518
2519 Instruction *I = cast<Instruction>(&*Users[i]);
2520
2521 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
2522 if (II->getIntrinsicID() == Intrinsic::objectsize) {
2523 SmallVector<Instruction *> InsertedInstructions;
2524 Value *Result = lowerObjectSizeCall(
2525 II, DL, &TLI, AA, /*MustSucceed=*/true, &InsertedInstructions);
2526 for (Instruction *Inserted : InsertedInstructions)
2527 Worklist.add(Inserted);
2528 replaceInstUsesWith(*I, Result);
2530 Users[i] = nullptr; // Skip examining in the next loop.
2531 }
2532 }
2533 }
2534 for (unsigned i = 0, e = Users.size(); i != e; ++i) {
2535 if (!Users[i])
2536 continue;
2537
2538 Instruction *I = cast<Instruction>(&*Users[i]);
2539
2540 if (ICmpInst *C = dyn_cast<ICmpInst>(I)) {
2542 ConstantInt::get(Type::getInt1Ty(C->getContext()),
2543 C->isFalseWhenEqual()));
2544 } else if (auto *SI = dyn_cast<StoreInst>(I)) {
2545 for (auto *DVI : DVIs)
2546 if (DVI->isAddressOfVariable())
2547 ConvertDebugDeclareToDebugValue(DVI, SI, *DIB);
2548 } else {
2549 // Casts, GEP, or anything else: we're about to delete this instruction,
2550 // so it can not have any valid uses.
2551 replaceInstUsesWith(*I, PoisonValue::get(I->getType()));
2552 }
2554 }
2555
2556 if (InvokeInst *II = dyn_cast<InvokeInst>(&MI)) {
2557 // Replace invoke with a NOP intrinsic to maintain the original CFG
2558 Module *M = II->getModule();
2559 Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
2560 InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
2561 std::nullopt, "", II->getParent());
2562 }
2563
2564 // Remove debug intrinsics which describe the value contained within the
2565 // alloca. In addition to removing dbg.{declare,addr} which simply point to
2566 // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
2567 //
2568 // ```
2569 // define void @foo(i32 %0) {
2570 // %a = alloca i32 ; Deleted.
2571 // store i32 %0, i32* %a
2572 // dbg.value(i32 %0, "arg0") ; Not deleted.
2573 // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
2574 // call void @trivially_inlinable_no_op(i32* %a)
2575 // ret void
2576 // }
2577 // ```
2578 //
2579 // This may not be required if we stop describing the contents of allocas
2580 // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
2581 // the LowerDbgDeclare utility.
2582 //
2583 // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
2584 // "arg0" dbg.value may be stale after the call. However, failing to remove
2585 // the DW_OP_deref dbg.value causes large gaps in location coverage.
2586 for (auto *DVI : DVIs)
2587 if (DVI->isAddressOfVariable() || DVI->getExpression()->startsWithDeref())
2588 DVI->eraseFromParent();
2589
2590 return eraseInstFromFunction(MI);
2591 }
2592 return nullptr;
2593}
2594
2595/// Move the call to free before a NULL test.
2596///
2597/// Check if this free is accessed after its argument has been test
2598/// against NULL (property 0).
2599/// If yes, it is legal to move this call in its predecessor block.
2600///
2601/// The move is performed only if the block containing the call to free
2602/// will be removed, i.e.:
2603/// 1. it has only one predecessor P, and P has two successors
2604/// 2. it contains the call, noops, and an unconditional branch
2605/// 3. its successor is the same as its predecessor's successor
2606///
2607/// The profitability is out-of concern here and this function should
2608/// be called only if the caller knows this transformation would be
2609/// profitable (e.g., for code size).
2611 const DataLayout &DL) {
2612 Value *Op = FI.getArgOperand(0);
2613 BasicBlock *FreeInstrBB = FI.getParent();
2614 BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
2615
2616 // Validate part of constraint #1: Only one predecessor
2617 // FIXME: We can extend the number of predecessor, but in that case, we
2618 // would duplicate the call to free in each predecessor and it may
2619 // not be profitable even for code size.
2620 if (!PredBB)
2621 return nullptr;
2622
2623 // Validate constraint #2: Does this block contains only the call to
2624 // free, noops, and an unconditional branch?
2625 BasicBlock *SuccBB;
2626 Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
2627 if (!match(FreeInstrBBTerminator, m_UnconditionalBr(SuccBB)))
2628 return nullptr;
2629
2630 // If there are only 2 instructions in the block, at this point,
2631 // this is the call to free and unconditional.
2632 // If there are more than 2 instructions, check that they are noops
2633 // i.e., they won't hurt the performance of the generated code.
2634 if (FreeInstrBB->size() != 2) {
2635 for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) {
2636 if (&Inst == &FI || &Inst == FreeInstrBBTerminator)
2637 continue;
2638 auto *Cast = dyn_cast<CastInst>(&Inst);
2639 if (!Cast || !Cast->isNoopCast(DL))
2640 return nullptr;
2641 }
2642 }
2643 // Validate the rest of constraint #1 by matching on the pred branch.
2644 Instruction *TI = PredBB->getTerminator();
2645 BasicBlock *TrueBB, *FalseBB;
2647 if (!match(TI, m_Br(m_ICmp(Pred,
2649 m_Specific(Op->stripPointerCasts())),
2650 m_Zero()),
2651 TrueBB, FalseBB)))
2652 return nullptr;
2653 if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
2654 return nullptr;
2655
2656 // Validate constraint #3: Ensure the null case just falls through.
2657 if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
2658 return nullptr;
2659 assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
2660 "Broken CFG: missing edge from predecessor to successor");
2661
2662 // At this point, we know that everything in FreeInstrBB can be moved
2663 // before TI.
2664 for (Instruction &Instr : llvm::make_early_inc_range(*FreeInstrBB)) {
2665 if (&Instr == FreeInstrBBTerminator)
2666 break;
2667 Instr.moveBeforePreserving(TI);
2668 }
2669 assert(FreeInstrBB->size() == 1 &&
2670 "Only the branch instruction should remain");
2671
2672 // Now that we've moved the call to free before the NULL check, we have to
2673 // remove any attributes on its parameter that imply it's non-null, because
2674 // those attributes might have only been valid because of the NULL check, and
2675 // we can get miscompiles if we keep them. This is conservative if non-null is
2676 // also implied by something other than the NULL check, but it's guaranteed to
2677 // be correct, and the conservativeness won't matter in practice, since the
2678 // attributes are irrelevant for the call to free itself and the pointer
2679 // shouldn't be used after the call.
2680 AttributeList Attrs = FI.getAttributes();
2681 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::NonNull);
2682 Attribute Dereferenceable = Attrs.getParamAttr(0, Attribute::Dereferenceable);
2683 if (Dereferenceable.isValid()) {
2684 uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
2685 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0,
2686 Attribute::Dereferenceable);
2687 Attrs = Attrs.addDereferenceableOrNullParamAttr(FI.getContext(), 0, Bytes);
2688 }
2689 FI.setAttributes(Attrs);
2690
2691 return &FI;
2692}
2693
2695 // free undef -> unreachable.
2696 if (isa<UndefValue>(Op)) {
2697 // Leave a marker since we can't modify the CFG here.
2699 return eraseInstFromFunction(FI);
2700 }
2701
2702 // If we have 'free null' delete the instruction. This can happen in stl code
2703 // when lots of inlining happens.
2704 if (isa<ConstantPointerNull>(Op))
2705 return eraseInstFromFunction(FI);
2706
2707 // If we had free(realloc(...)) with no intervening uses, then eliminate the
2708 // realloc() entirely.
2709 CallInst *CI = dyn_cast<CallInst>(Op);
2710 if (CI && CI->hasOneUse())
2711 if (Value *ReallocatedOp = getReallocatedOperand(CI))
2712 return eraseInstFromFunction(*replaceInstUsesWith(*CI, ReallocatedOp));
2713
2714 // If we optimize for code size, try to move the call to free before the null
2715 // test so that simplify cfg can remove the empty block and dead code
2716 // elimination the branch. I.e., helps to turn something like:
2717 // if (foo) free(foo);
2718 // into
2719 // free(foo);
2720 //
2721 // Note that we can only do this for 'free' and not for any flavor of
2722 // 'operator delete'; there is no 'operator delete' symbol for which we are
2723 // permitted to invent a call, even if we're passing in a null pointer.
2724 if (MinimizeSize) {
2725 LibFunc Func;
2726 if (TLI.getLibFunc(FI, Func) && TLI.has(Func) && Func == LibFunc_free)
2728 return I;
2729 }
2730
2731 return nullptr;
2732}
2733
2735 // Nothing for now.
2736 return nullptr;
2737}
2738
2739// WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
2741 // Try to remove the previous instruction if it must lead to unreachable.
2742 // This includes instructions like stores and "llvm.assume" that may not get
2743 // removed by simple dead code elimination.
2744 bool Changed = false;
2745 while (Instruction *Prev = I.getPrevNonDebugInstruction()) {
2746 // While we theoretically can erase EH, that would result in a block that
2747 // used to start with an EH no longer starting with EH, which is invalid.
2748 // To make it valid, we'd need to fixup predecessors to no longer refer to
2749 // this block, but that changes CFG, which is not allowed in InstCombine.
2750 if (Prev->isEHPad())
2751 break; // Can not drop any more instructions. We're done here.
2752
2754 break; // Can not drop any more instructions. We're done here.
2755 // Otherwise, this instruction can be freely erased,
2756 // even if it is not side-effect free.
2757
2758 // A value may still have uses before we process it here (for example, in
2759 // another unreachable block), so convert those to poison.
2760 replaceInstUsesWith(*Prev, PoisonValue::get(Prev->getType()));
2761 eraseInstFromFunction(*Prev);
2762 Changed = true;
2763 }
2764 return Changed;
2765}
2766
2769 return nullptr;
2770}
2771
2773 assert(BI.isUnconditional() && "Only for unconditional branches.");
2774
2775 // If this store is the second-to-last instruction in the basic block
2776 // (excluding debug info and bitcasts of pointers) and if the block ends with
2777 // an unconditional branch, try to move the store to the successor block.
2778
2779 auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
2780 auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) {
2781 return BBI->isDebugOrPseudoInst() ||
2782 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy());
2783 };
2784
2785 BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
2786 do {
2787 if (BBI != FirstInstr)
2788 --BBI;
2789 } while (BBI != FirstInstr && IsNoopInstrForStoreMerging(BBI));
2790
2791 return dyn_cast<StoreInst>(BBI);
2792 };
2793
2794 if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
2795 if (mergeStoreIntoSuccessor(*SI))
2796 return &BI;
2797
2798 return nullptr;
2799}
2800
2803 if (!DeadEdges.insert({From, To}).second)
2804 return;
2805
2806 // Replace phi node operands in successor with poison.
2807 for (PHINode &PN : To->phis())
2808 for (Use &U : PN.incoming_values())
2809 if (PN.getIncomingBlock(U) == From && !isa<PoisonValue>(U)) {
2810 replaceUse(U, PoisonValue::get(PN.getType()));
2811 addToWorklist(&PN);
2812 MadeIRChange = true;
2813 }
2814
2815 Worklist.push_back(To);
2816}
2817
2818// Under the assumption that I is unreachable, remove it and following
2819// instructions. Changes are reported directly to MadeIRChange.
2822 BasicBlock *BB = I->getParent();
2823 for (Instruction &Inst : make_early_inc_range(
2824 make_range(std::next(BB->getTerminator()->getReverseIterator()),
2825 std::next(I->getReverseIterator())))) {
2826 if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) {
2827 replaceInstUsesWith(Inst, PoisonValue::get(Inst.getType()));
2828 MadeIRChange = true;
2829 }
2830 if (Inst.isEHPad() || Inst.getType()->isTokenTy())
2831 continue;
2833 MadeIRChange = true;
2834 }
2835
2836 // Handle potentially dead successors.
2837 for (BasicBlock *Succ : successors(BB))
2838 addDeadEdge(BB, Succ, Worklist);
2839}
2840
2843 while (!Worklist.empty()) {
2844 BasicBlock *BB = Worklist.pop_back_val();
2845 if (!all_of(predecessors(BB), [&](BasicBlock *Pred) {
2846 return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred);
2847 }))
2848 continue;
2849
2851 }
2852}
2853
2855 BasicBlock *LiveSucc) {
2857 for (BasicBlock *Succ : successors(BB)) {
2858 // The live successor isn't dead.
2859 if (Succ == LiveSucc)
2860 continue;
2861
2862 addDeadEdge(BB, Succ, Worklist);
2863 }
2864
2866}
2867
2869 if (BI.isUnconditional())
2871
2872 // Change br (not X), label True, label False to: br X, label False, True
2873 Value *Cond = BI.getCondition();
2874 Value *X;
2875 if (match(Cond, m_Not(m_Value(X))) && !isa<Constant>(X)) {
2876 // Swap Destinations and condition...
2877 BI.swapSuccessors();
2878 return replaceOperand(BI, 0, X);
2879 }
2880
2881 // Canonicalize logical-and-with-invert as logical-or-with-invert.
2882 // This is done by inverting the condition and swapping successors:
2883 // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T
2884 Value *Y;
2885 if (isa<SelectInst>(Cond) &&
2886 match(Cond,
2888 Value *NotX = Builder.CreateNot(X, "not." + X->getName());
2889 Value *Or = Builder.CreateLogicalOr(NotX, Y);
2890 BI.swapSuccessors();
2891 return replaceOperand(BI, 0, Or);
2892 }
2893
2894 // If the condition is irrelevant, remove the use so that other
2895 // transforms on the condition become more effective.
2896 if (!isa<ConstantInt>(Cond) && BI.getSuccessor(0) == BI.getSuccessor(1))
2897 return replaceOperand(BI, 0, ConstantInt::getFalse(Cond->getType()));
2898
2899 // Canonicalize, for example, fcmp_one -> fcmp_oeq.
2900 CmpInst::Predicate Pred;
2901 if (match(Cond, m_OneUse(m_FCmp(Pred, m_Value(), m_Value()))) &&
2902 !isCanonicalPredicate(Pred)) {
2903 // Swap destinations and condition.
2904 auto *Cmp = cast<CmpInst>(Cond);
2905 Cmp->setPredicate(CmpInst::getInversePredicate(Pred));
2906 BI.swapSuccessors();
2907 Worklist.push(Cmp);
2908 return &BI;
2909 }
2910
2911 if (isa<UndefValue>(Cond)) {
2912 handlePotentiallyDeadSuccessors(BI.getParent(), /*LiveSucc*/ nullptr);
2913 return nullptr;
2914 }
2915 if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
2917 BI.getSuccessor(!CI->getZExtValue()));
2918 return nullptr;
2919 }
2920
2921 return nullptr;
2922}
2923
2925 Value *Cond = SI.getCondition();
2926 Value *Op0;
2927 ConstantInt *AddRHS;
2928 if (match(Cond, m_Add(m_Value(Op0), m_ConstantInt(AddRHS)))) {
2929 // Change 'switch (X+4) case 1:' into 'switch (X) case -3'.
2930 for (auto Case : SI.cases()) {
2931 Constant *NewCase = ConstantExpr::getSub(Case.getCaseValue(), AddRHS);
2932 assert(isa<ConstantInt>(NewCase) &&
2933 "Result of expression should be constant");
2934 Case.setValue(cast<ConstantInt>(NewCase));
2935 }
2936 return replaceOperand(SI, 0, Op0);
2937 }
2938
2939 KnownBits Known = computeKnownBits(Cond, 0, &SI);
2940 unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
2941 unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
2942
2943 // Compute the number of leading bits we can ignore.
2944 // TODO: A better way to determine this would use ComputeNumSignBits().
2945 for (const auto &C : SI.cases()) {
2946 LeadingKnownZeros =
2947 std::min(LeadingKnownZeros, C.getCaseValue()->getValue().countl_zero());
2948 LeadingKnownOnes =
2949 std::min(LeadingKnownOnes, C.getCaseValue()->getValue().countl_one());
2950 }
2951
2952 unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
2953
2954 // Shrink the condition operand if the new type is smaller than the old type.
2955 // But do not shrink to a non-standard type, because backend can't generate
2956 // good code for that yet.
2957 // TODO: We can make it aggressive again after fixing PR39569.
2958 if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
2959 shouldChangeType(Known.getBitWidth(), NewWidth)) {
2960 IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
2962 Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc");
2963
2964 for (auto Case : SI.cases()) {
2965 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
2966 Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
2967 }
2968 return replaceOperand(SI, 0, NewCond);
2969 }
2970
2971 if (isa<UndefValue>(Cond)) {
2972 handlePotentiallyDeadSuccessors(SI.getParent(), /*LiveSucc*/ nullptr);
2973 return nullptr;
2974 }
2975 if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
2976 handlePotentiallyDeadSuccessors(SI.getParent(),
2977 SI.findCaseValue(CI)->getCaseSuccessor());
2978 return nullptr;
2979 }
2980
2981 return nullptr;
2982}
2983
2985InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
2986 auto *WO = dyn_cast<WithOverflowInst>(EV.getAggregateOperand());
2987 if (!WO)
2988 return nullptr;
2989
2990 Intrinsic::ID OvID = WO->getIntrinsicID();
2991 const APInt *C = nullptr;
2992 if (match(WO->getRHS(), m_APIntAllowUndef(C))) {
2993 if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow ||
2994 OvID == Intrinsic::umul_with_overflow)) {
2995 // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
2996 if (C->isAllOnes())
2997 return BinaryOperator::CreateNeg(WO->getLHS());
2998 // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
2999 if (C->isPowerOf2()) {
3000 return BinaryOperator::CreateShl(
3001 WO->getLHS(),
3002 ConstantInt::get(WO->getLHS()->getType(), C->logBase2()));
3003 }
3004 }
3005 }
3006
3007 // We're extracting from an overflow intrinsic. See if we're the only user.
3008 // That allows us to simplify multiple result intrinsics to simpler things
3009 // that just get one value.
3010 if (!WO->hasOneUse())
3011 return nullptr;
3012
3013 // Check if we're grabbing only the result of a 'with overflow' intrinsic
3014 // and replace it with a traditional binary instruction.
3015 if (*EV.idx_begin() == 0) {
3016 Instruction::BinaryOps BinOp = WO->getBinaryOp();
3017 Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
3018 // Replace the old instruction's uses with poison.
3019 replaceInstUsesWith(*WO, PoisonValue::get(WO->getType()));
3021 return BinaryOperator::Create(BinOp, LHS, RHS);
3022 }
3023
3024 assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst");
3025
3026 // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS.
3027 if (OvID == Intrinsic::usub_with_overflow)
3028 return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS());
3029
3030 // smul with i1 types overflows when both sides are set: -1 * -1 == +1, but
3031 // +1 is not possible because we assume signed values.
3032 if (OvID == Intrinsic::smul_with_overflow &&
3033 WO->getLHS()->getType()->isIntOrIntVectorTy(1))
3034 return BinaryOperator::CreateAnd(WO->getLHS(), WO->getRHS());
3035
3036 // If only the overflow result is used, and the right hand side is a
3037 // constant (or constant splat), we can remove the intrinsic by directly
3038 // checking for overflow.
3039 if (C) {
3040 // Compute the no-wrap range for LHS given RHS=C, then construct an
3041 // equivalent icmp, potentially using an offset.
3043 WO->getBinaryOp(), *C, WO->getNoWrapKind());
3044
3045 CmpInst::Predicate Pred;
3046 APInt NewRHSC, Offset;
3047 NWR.getEquivalentICmp(Pred, NewRHSC, Offset);
3048 auto *OpTy = WO->getRHS()->getType();
3049 auto *NewLHS = WO->getLHS();
3050 if (Offset != 0)
3051 NewLHS = Builder.CreateAdd(NewLHS, ConstantInt::get(OpTy, Offset));
3052 return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS,
3053 ConstantInt::get(OpTy, NewRHSC));
3054 }
3055
3056 return nullptr;
3057}
3058
3060 Value *Agg = EV.getAggregateOperand();
3061
3062 if (!EV.hasIndices())
3063 return replaceInstUsesWith(EV, Agg);
3064
3065 if (Value *V = simplifyExtractValueInst(Agg, EV.getIndices(),
3066 SQ.getWithInstruction(&EV)))
3067 return replaceInstUsesWith(EV, V);
3068
3069 if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
3070 // We're extracting from an insertvalue instruction, compare the indices
3071 const unsigned *exti, *exte, *insi, *inse;
3072 for (exti = EV.idx_begin(), insi = IV->idx_begin(),
3073 exte = EV.idx_end(), inse = IV->idx_end();
3074 exti != exte && insi != inse;
3075 ++exti, ++insi) {
3076 if (*insi != *exti)
3077 // The insert and extract both reference distinctly different elements.
3078 // This means the extract is not influenced by the insert, and we can
3079 // replace the aggregate operand of the extract with the aggregate
3080 // operand of the insert. i.e., replace
3081 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3082 // %E = extractvalue { i32, { i32 } } %I, 0
3083 // with
3084 // %E = extractvalue { i32, { i32 } } %A, 0
3085 return ExtractValueInst::Create(IV->getAggregateOperand(),
3086 EV.getIndices());
3087 }
3088 if (exti == exte && insi == inse)
3089 // Both iterators are at the end: Index lists are identical. Replace
3090 // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3091 // %C = extractvalue { i32, { i32 } } %B, 1, 0
3092 // with "i32 42"
3093 return replaceInstUsesWith(EV, IV->getInsertedValueOperand());
3094 if (exti == exte) {
3095 // The extract list is a prefix of the insert list. i.e. replace
3096 // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3097 // %E = extractvalue { i32, { i32 } } %I, 1
3098 // with
3099 // %X = extractvalue { i32, { i32 } } %A, 1
3100 // %E = insertvalue { i32 } %X, i32 42, 0
3101 // by switching the order of the insert and extract (though the
3102 // insertvalue should be left in, since it may have other uses).
3103 Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(),
3104 EV.getIndices());
3105 return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
3106 ArrayRef(insi, inse));
3107 }
3108 if (insi == inse)
3109 // The insert list is a prefix of the extract list
3110 // We can simply remove the common indices from the extract and make it
3111 // operate on the inserted value instead of the insertvalue result.
3112 // i.e., replace
3113 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3114 // %E = extractvalue { i32, { i32 } } %I, 1, 0
3115 // with
3116 // %E extractvalue { i32 } { i32 42 }, 0
3117 return ExtractValueInst::Create(IV->getInsertedValueOperand(),
3118 ArrayRef(exti, exte));
3119 }
3120
3121 if (Instruction *R = foldExtractOfOverflowIntrinsic(EV))
3122 return R;
3123
3124 if (LoadInst *L = dyn_cast<LoadInst>(Agg)) {
3125 // Bail out if the aggregate contains scalable vector type
3126 if (auto *STy = dyn_cast<StructType>(Agg->getType());
3127 STy && STy->containsScalableVectorType())
3128 return nullptr;
3129
3130 // If the (non-volatile) load only has one use, we can rewrite this to a
3131 // load from a GEP. This reduces the size of the load. If a load is used
3132 // only by extractvalue instructions then this either must have been
3133 // optimized before, or it is a struct with padding, in which case we
3134 // don't want to do the transformation as it loses padding knowledge.
3135 if (L->isSimple() && L->hasOneUse()) {
3136 // extractvalue has integer indices, getelementptr has Value*s. Convert.
3137 SmallVector<Value*, 4> Indices;
3138 // Prefix an i32 0 since we need the first element.
3139 Indices.push_back(Builder.getInt32(0));
3140 for (unsigned Idx : EV.indices())
3141 Indices.push_back(Builder.getInt32(Idx));
3142
3143 // We need to insert these at the location of the old load, not at that of
3144 // the extractvalue.
3146 Value *GEP = Builder.CreateInBoundsGEP(L->getType(),
3147 L->getPointerOperand(), Indices);
3149 // Whatever aliasing information we had for the orignal load must also
3150 // hold for the smaller load, so propagate the annotations.
3151 NL->setAAMetadata(L->getAAMetadata());
3152 // Returning the load directly will cause the main loop to insert it in
3153 // the wrong spot, so use replaceInstUsesWith().
3154 return replaceInstUsesWith(EV, NL);
3155 }
3156 }
3157
3158 if (auto *PN = dyn_cast<PHINode>(Agg))
3159 if (Instruction *Res = foldOpIntoPhi(EV, PN))
3160 return Res;
3161
3162 // We could simplify extracts from other values. Note that nested extracts may
3163 // already be simplified implicitly by the above: extract (extract (insert) )
3164 // will be translated into extract ( insert ( extract ) ) first and then just
3165 // the value inserted, if appropriate. Similarly for extracts from single-use
3166 // loads: extract (extract (load)) will be translated to extract (load (gep))
3167 // and if again single-use then via load (gep (gep)) to load (gep).
3168 // However, double extracts from e.g. function arguments or return values
3169 // aren't handled yet.
3170 return nullptr;
3171}
3172
3173/// Return 'true' if the given typeinfo will match anything.
3174static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
3175 switch (Personality) {
3179 // The GCC C EH and Rust personality only exists to support cleanups, so
3180 // it's not clear what the semantics of catch clauses are.
3181 return false;
3183 return false;
3185 // While __gnat_all_others_value will match any Ada exception, it doesn't
3186 // match foreign exceptions (or didn't, before gcc-4.7).
3187 return false;
3197 return TypeInfo->isNullValue();
3198 }
3199 llvm_unreachable("invalid enum");
3200}
3201
3202static bool shorter_filter(const Value *LHS, const Value *RHS) {
3203 return
3204 cast<ArrayType>(LHS->getType())->getNumElements()
3205 <
3206 cast<ArrayType>(RHS->getType())->getNumElements();
3207}
3208
3210 // The logic here should be correct for any real-world personality function.
3211 // However if that turns out not to be true, the offending logic can always
3212 // be conditioned on the personality function, like the catch-all logic is.
3213 EHPersonality Personality =
3214 classifyEHPersonality(LI.getParent()->getParent()->getPersonalityFn());
3215
3216 // Simplify the list of clauses, eg by removing repeated catch clauses
3217 // (these are often created by inlining).
3218 bool MakeNewInstruction = false; // If true, recreate using the following:
3219 SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
3220 bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
3221
3222 SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
3223 for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
3224 bool isLastClause = i + 1 == e;
3225 if (LI.isCatch(i)) {
3226 // A catch clause.
3227 Constant *CatchClause = LI.getClause(i);
3228 Constant *TypeInfo = CatchClause->stripPointerCasts();
3229
3230 // If we already saw this clause, there is no point in having a second
3231 // copy of it.
3232 if (AlreadyCaught.insert(TypeInfo).second) {
3233 // This catch clause was not already seen.
3234 NewClauses.push_back(CatchClause);
3235 } else {
3236 // Repeated catch clause - drop the redundant copy.
3237 MakeNewInstruction = true;
3238 }
3239
3240 // If this is a catch-all then there is no point in keeping any following
3241 // clauses or marking the landingpad as having a cleanup.
3242 if (isCatchAll(Personality, TypeInfo)) {
3243 if (!isLastClause)
3244 MakeNewInstruction = true;
3245 CleanupFlag = false;
3246 break;
3247 }
3248 } else {
3249 // A filter clause. If any of the filter elements were already caught
3250 // then they can be dropped from the filter. It is tempting to try to
3251 // exploit the filter further by saying that any typeinfo that does not
3252 // occur in the filter can't be caught later (and thus can be dropped).
3253 // However this would be wrong, since typeinfos can match without being
3254 // equal (for example if one represents a C++ class, and the other some
3255 // class derived from it).
3256 assert(LI.isFilter(i) && "Unsupported landingpad clause!");
3257 Constant *FilterClause = LI.getClause(i);
3258 ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
3259 unsigned NumTypeInfos = FilterType->getNumElements();
3260
3261 // An empty filter catches everything, so there is no point in keeping any
3262 // following clauses or marking the landingpad as having a cleanup. By
3263 // dealing with this case here the following code is made a bit simpler.
3264 if (!NumTypeInfos) {
3265 NewClauses.push_back(FilterClause);
3266 if (!isLastClause)
3267 MakeNewInstruction = true;
3268 CleanupFlag = false;
3269 break;
3270 }
3271
3272 bool MakeNewFilter = false; // If true, make a new filter.
3273 SmallVector<Constant *, 16> NewFilterElts; // New elements.
3274 if (isa<ConstantAggregateZero>(FilterClause)) {
3275 // Not an empty filter - it contains at least one null typeinfo.
3276 assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
3277 Constant *TypeInfo =
3279 // If this typeinfo is a catch-all then the filter can never match.
3280 if (isCatchAll(Personality, TypeInfo)) {
3281 // Throw the filter away.
3282 MakeNewInstruction = true;
3283 continue;
3284 }
3285
3286 // There is no point in having multiple copies of this typeinfo, so
3287 // discard all but the first copy if there is more than one.
3288 NewFilterElts.push_back(TypeInfo);
3289 if (NumTypeInfos > 1)
3290 MakeNewFilter = true;
3291 } else {
3292 ConstantArray *Filter = cast<ConstantArray>(FilterClause);
3293 SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
3294 NewFilterElts.reserve(NumTypeInfos);
3295
3296 // Remove any filter elements that were already caught or that already
3297 // occurred in the filter. While there, see if any of the elements are
3298 // catch-alls. If so, the filter can be discarded.
3299 bool SawCatchAll = false;
3300 for (unsigned j = 0; j != NumTypeInfos; ++j) {
3301 Constant *Elt = Filter->getOperand(j);
3302 Constant *TypeInfo = Elt->stripPointerCasts();
3303 if (isCatchAll(Personality, TypeInfo)) {
3304 // This element is a catch-all. Bail out, noting this fact.
3305 SawCatchAll = true;
3306 break;
3307 }
3308
3309 // Even if we've seen a type in a catch clause, we don't want to
3310 // remove it from the filter. An unexpected type handler may be
3311 // set up for a call site which throws an exception of the same
3312 // type caught. In order for the exception thrown by the unexpected
3313 // handler to propagate correctly, the filter must be correctly
3314 // described for the call site.
3315 //
3316 // Example:
3317 //
3318 // void unexpected() { throw 1;}
3319 // void foo() throw (int) {
3320 // std::set_unexpected(unexpected);
3321 // try {
3322 // throw 2.0;
3323 // } catch (int i) {}
3324 // }
3325
3326 // There is no point in having multiple copies of the same typeinfo in
3327 // a filter, so only add it if we didn't already.
3328 if (SeenInFilter.insert(TypeInfo).second)
3329 NewFilterElts.push_back(cast<Constant>(Elt));
3330 }
3331 // A filter containing a catch-all cannot match anything by definition.
3332 if (SawCatchAll) {
3333 // Throw the filter away.
3334 MakeNewInstruction = true;
3335 continue;
3336 }
3337
3338 // If we dropped something from the filter, make a new one.
3339 if (NewFilterElts.size() < NumTypeInfos)
3340 MakeNewFilter = true;
3341 }
3342 if (MakeNewFilter) {
3343 FilterType = ArrayType::get(FilterType->getElementType(),
3344 NewFilterElts.size());
3345 FilterClause = ConstantArray::get(FilterType, NewFilterElts);
3346 MakeNewInstruction = true;
3347 }
3348
3349 NewClauses.push_back(FilterClause);
3350
3351 // If the new filter is empty then it will catch everything so there is
3352 // no point in keeping any following clauses or marking the landingpad
3353 // as having a cleanup. The case of the original filter being empty was
3354 // already handled above.
3355 if (MakeNewFilter && !NewFilterElts.size()) {
3356 assert(MakeNewInstruction && "New filter but not a new instruction!");
3357 CleanupFlag = false;
3358 break;
3359 }
3360 }
3361 }
3362
3363 // If several filters occur in a row then reorder them so that the shortest
3364 // filters come first (those with the smallest number of elements). This is
3365 // advantageous because shorter filters are more likely to match, speeding up
3366 // unwinding, but mostly because it increases the effectiveness of the other
3367 // filter optimizations below.
3368 for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
3369 unsigned j;
3370 // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
3371 for (j = i; j != e; ++j)
3372 if (!isa<ArrayType>(NewClauses[j]->getType()))
3373 break;
3374
3375 // Check whether the filters are already sorted by length. We need to know
3376 // if sorting them is actually going to do anything so that we only make a
3377 // new landingpad instruction if it does.
3378 for (unsigned k = i; k + 1 < j; ++k)
3379 if (shorter_filter(NewClauses[k+1], NewClauses[k])) {
3380 // Not sorted, so sort the filters now. Doing an unstable sort would be
3381 // correct too but reordering filters pointlessly might confuse users.
3382 std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j,
3384 MakeNewInstruction = true;
3385 break;
3386 }
3387
3388 // Look for the next batch of filters.
3389 i = j + 1;
3390 }
3391
3392 // If typeinfos matched if and only if equal, then the elements of a filter L
3393 // that occurs later than a filter F could be replaced by the intersection of
3394 // the elements of F and L. In reality two typeinfos can match without being
3395 // equal (for example if one represents a C++ class, and the other some class
3396 // derived from it) so it would be wrong to perform this transform in general.
3397 // However the transform is correct and useful if F is a subset of L. In that
3398 // case L can be replaced by F, and thus removed altogether since repeating a
3399 // filter is pointless. So here we look at all pairs of filters F and L where
3400 // L follows F in the list of clauses, and remove L if every element of F is
3401 // an element of L. This can occur when inlining C++ functions with exception
3402 // specifications.
3403 for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
3404 // Examine each filter in turn.
3405 Value *Filter = NewClauses[i];
3406 ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType());
3407 if (!FTy)
3408 // Not a filter - skip it.
3409 continue;
3410 unsigned FElts = FTy->getNumElements();
3411 // Examine each filter following this one. Doing this backwards means that
3412 // we don't have to worry about filters disappearing under us when removed.
3413 for (unsigned j = NewClauses.size() - 1; j != i; --j) {
3414 Value *LFilter = NewClauses[j];
3415 ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType());
3416 if (!LTy)
3417 // Not a filter - skip it.
3418 continue;
3419 // If Filter is a subset of LFilter, i.e. every element of Filter is also
3420 // an element of LFilter, then discard LFilter.
3421 SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
3422 // If Filter is empty then it is a subset of LFilter.
3423 if (!FElts) {
3424 // Discard LFilter.
3425 NewClauses.erase(J);
3426 MakeNewInstruction = true;
3427 // Move on to the next filter.
3428 continue;
3429 }
3430 unsigned LElts = LTy->getNumElements();
3431 // If Filter is longer than LFilter then it cannot be a subset of it.
3432 if (FElts > LElts)
3433 // Move on to the next filter.
3434 continue;
3435 // At this point we know that LFilter has at least one element.
3436 if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros.
3437 // Filter is a subset of LFilter iff Filter contains only zeros (as we
3438 // already know that Filter is not longer than LFilter).
3439 if (isa<ConstantAggregateZero>(Filter)) {
3440 assert(FElts <= LElts && "Should have handled this case earlier!");
3441 // Discard LFilter.
3442 NewClauses.erase(J);
3443 MakeNewInstruction = true;
3444 }
3445 // Move on to the next filter.
3446 continue;
3447 }
3448 ConstantArray *LArray = cast<ConstantArray>(LFilter);
3449 if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros.
3450 // Since Filter is non-empty and contains only zeros, it is a subset of
3451 // LFilter iff LFilter contains a zero.
3452 assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
3453 for (unsigned l = 0; l != LElts; ++l)
3454 if (LArray->getOperand(l)->isNullValue()) {
3455 // LFilter contains a zero - discard it.
3456 NewClauses.erase(J);
3457 MakeNewInstruction = true;
3458 break;
3459 }
3460 // Move on to the next filter.
3461 continue;
3462 }
3463 // At this point we know that both filters are ConstantArrays. Loop over
3464 // operands to see whether every element of Filter is also an element of
3465 // LFilter. Since filters tend to be short this is probably faster than
3466 // using a method that scales nicely.
3467 ConstantArray *FArray = cast<ConstantArray>(Filter);
3468 bool AllFound = true;
3469 for (unsigned f = 0; f != FElts; ++f) {
3470 Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts();
3471 AllFound = false;
3472 for (unsigned l = 0; l != LElts; ++l) {
3473 Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts();
3474 if (LTypeInfo == FTypeInfo) {
3475 AllFound = true;
3476 break;
3477 }
3478 }
3479 if (!AllFound)
3480 break;
3481 }
3482 if (AllFound) {
3483 // Discard LFilter.
3484 NewClauses.erase(J);
3485 MakeNewInstruction = true;
3486 }
3487 // Move on to the next filter.
3488 }
3489 }
3490
3491 // If we changed any of the clauses, replace the old landingpad instruction
3492 // with a new one.
3493 if (MakeNewInstruction) {
3494 LandingPadInst *NLI = LandingPadInst::Create(LI.getType(),
3495 NewClauses.size());
3496 for (unsigned i = 0, e = NewClauses.size(); i != e; ++i)
3497 NLI->addClause(NewClauses[i]);
3498 // A landing pad with no clauses must have the cleanup flag set. It is
3499 // theoretically possible, though highly unlikely, that we eliminated all
3500 // clauses. If so, force the cleanup flag to true.
3501 if (NewClauses.empty())
3502 CleanupFlag = true;
3503 NLI->setCleanup(CleanupFlag);
3504 return NLI;
3505 }
3506
3507 // Even if none of the clauses changed, we may nonetheless have understood
3508 // that the cleanup flag is pointless. Clear it if so.
3509 if (LI.isCleanup() != CleanupFlag) {
3510 assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
3511 LI.setCleanup(CleanupFlag);
3512 return &LI;
3513 }
3514
3515 return nullptr;
3516}
3517
3518Value *
3520 // Try to push freeze through instructions that propagate but don't produce
3521 // poison as far as possible. If an operand of freeze follows three
3522 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
3523 // guaranteed-non-poison operands then push the freeze through to the one
3524 // operand that is not guaranteed non-poison. The actual transform is as
3525 // follows.
3526 // Op1 = ... ; Op1 can be posion
3527 // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
3528 // ; single guaranteed-non-poison operands
3529 // ... = Freeze(Op0)
3530 // =>
3531 // Op1 = ...
3532 // Op1.fr = Freeze(Op1)
3533 // ... = Inst(Op1.fr, NonPoisonOps...)
3534 auto *OrigOp = OrigFI.getOperand(0);
3535 auto *OrigOpInst = dyn_cast<Instruction>(OrigOp);
3536
3537 // While we could change the other users of OrigOp to use freeze(OrigOp), that
3538 // potentially reduces their optimization potential, so let's only do this iff
3539 // the OrigOp is only used by the freeze.
3540 if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(OrigOp))
3541 return nullptr;
3542
3543 // We can't push the freeze through an instruction which can itself create
3544 // poison. If the only source of new poison is flags, we can simply
3545 // strip them (since we know the only use is the freeze and nothing can
3546 // benefit from them.)
3547 if (canCreateUndefOrPoison(cast<Operator>(OrigOp),
3548 /*ConsiderFlagsAndMetadata*/ false))
3549 return nullptr;
3550
3551 // If operand is guaranteed not to be poison, there is no need to add freeze
3552 // to the operand. So we first find the operand that is not guaranteed to be
3553 // poison.
3554 Use *MaybePoisonOperand = nullptr;
3555 for (Use &U : OrigOpInst->operands()) {
3556 if (isa<MetadataAsValue>(U.get()) ||
3558 continue;
3559 if (!MaybePoisonOperand)
3560 MaybePoisonOperand = &U;
3561 else
3562 return nullptr;
3563 }
3564
3565 OrigOpInst->dropPoisonGeneratingFlagsAndMetadata();
3566
3567 // If all operands are guaranteed to be non-poison, we can drop freeze.
3568 if (!MaybePoisonOperand)
3569 return OrigOp;
3570
3571 Builder.SetInsertPoint(OrigOpInst);
3572 auto *FrozenMaybePoisonOperand = Builder.CreateFreeze(
3573 MaybePoisonOperand->get(), MaybePoisonOperand->get()->getName() + ".fr");
3574
3575 replaceUse(*MaybePoisonOperand, FrozenMaybePoisonOperand);
3576 return OrigOp;
3577}
3578
3580 PHINode *PN) {
3581 // Detect whether this is a recurrence with a start value and some number of
3582 // backedge values. We'll check whether we can push the freeze through the
3583 // backedge values (possibly dropping poison flags along the way) until we
3584 // reach the phi again. In that case, we can move the freeze to the start
3585 // value.
3586 Use *StartU = nullptr;
3588 for (Use &U : PN->incoming_values()) {
3589 if (DT.dominates(PN->getParent(), PN->getIncomingBlock(U))) {
3590 // Add backedge value to worklist.
3591 Worklist.push_back(U.get());
3592 continue;
3593 }
3594
3595 // Don't bother handling multiple start values.
3596 if (StartU)
3597 return nullptr;
3598 StartU = &U;
3599 }
3600
3601 if (!StartU || Worklist.empty())
3602 return nullptr; // Not a recurrence.
3603
3604 Value *StartV = StartU->get();
3605 BasicBlock *StartBB = PN->getIncomingBlock(*StartU);
3606 bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(StartV);
3607 // We can't insert freeze if the start value is the result of the
3608 // terminator (e.g. an invoke).
3609 if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
3610 return nullptr;
3611
3614 while (!Worklist.empty()) {
3615 Value *V = Worklist.pop_back_val();
3616 if (!Visited.insert(V).second)
3617 continue;
3618
3619 if (Visited.size() > 32)
3620 return nullptr; // Limit the total number of values we inspect.
3621
3622 // Assume that PN is non-poison, because it will be after the transform.
3623 if (V == PN || isGuaranteedNotToBeUndefOrPoison(V))
3624 continue;
3625
3626 Instruction *I = dyn_cast<Instruction>(V);
3627 if (!I || canCreateUndefOrPoison(cast<Operator>(I),
3628 /*ConsiderFlagsAndMetadata*/ false))
3629 return nullptr;
3630
3631 DropFlags.push_back(I);
3632 append_range(Worklist, I->operands());
3633 }
3634
3635 for (Instruction *I : DropFlags)
3636 I->dropPoisonGeneratingFlagsAndMetadata();
3637
3638 if (StartNeedsFreeze) {
3640 Value *FrozenStartV = Builder.CreateFreeze(StartV,
3641 StartV->getName() + ".fr");
3642 replaceUse(*StartU, FrozenStartV);
3643 }
3644 return replaceInstUsesWith(FI, PN);
3645}
3646
3648 Value *Op = FI.getOperand(0);
3649
3650 if (isa<Constant>(Op) || Op->hasOneUse())
3651 return false;
3652
3653 // Move the freeze directly after the definition of its operand, so that
3654 // it dominates the maximum number of uses. Note that it may not dominate
3655 // *all* uses if the operand is an invoke/callbr and the use is in a phi on
3656 // the normal/default destination. This is why the domination check in the
3657 // replacement below is still necessary.
3658 Instruction *MoveBefore;
3659 if (isa<Argument>(Op)) {
3660 MoveBefore =
3662 } else {
3663 MoveBefore = cast<Instruction>(Op)->getInsertionPointAfterDef();
3664 if (!MoveBefore)
3665 return false;
3666 }
3667
3668 bool Changed = false;
3669 if (&FI != MoveBefore) {
3670 FI.moveBefore(*MoveBefore->getParent(), MoveBefore->getIterator());
3671 Changed = true;
3672 }
3673
3674 Op->replaceUsesWithIf(&FI, [&](Use &U) -> bool {
3675 bool Dominates = DT.dominates(&FI, U);
3676 Changed |= Dominates;
3677 return Dominates;
3678 });
3679
3680 return Changed;
3681}
3682
3683// Check if any direct or bitcast user of this value is a shuffle instruction.
3685 for (auto *U : V->users()) {
3686 if (isa<ShuffleVectorInst>(U))
3687 return true;
3688 else if (match(U, m_BitCast(m_Specific(V))) && isUsedWithinShuffleVector(U))
3689 return true;
3690 }
3691 return false;
3692}
3693
3695 Value *Op0 = I.getOperand(0);
3696
3698 return replaceInstUsesWith(I, V);
3699
3700 // freeze (phi const, x) --> phi const, (freeze x)
3701 if (auto *PN = dyn_cast<PHINode>(Op0)) {
3702 if (Instruction *NV = foldOpIntoPhi(I, PN))
3703 return NV;
3704 if (Instruction *NV = foldFreezeIntoRecurrence(I, PN))
3705 return NV;
3706 }
3707
3709 return replaceInstUsesWith(I, NI);
3710
3711 // If I is freeze(undef), check its uses and fold it to a fixed constant.
3712 // - or: pick -1
3713 // - select's condition: if the true value is constant, choose it by making
3714 // the condition true.
3715 // - default: pick 0
3716 //
3717 // Note that this transform is intentionally done here rather than
3718 // via an analysis in InstSimplify or at individual user sites. That is
3719 // because we must produce the same value for all uses of the freeze -
3720 // it's the reason "freeze" exists!
3721 //
3722 // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
3723 // duplicating logic for binops at least.
3724 auto getUndefReplacement = [&I](Type *Ty) {
3725 Constant *BestValue = nullptr;
3726 Constant *NullValue = Constant::getNullValue(Ty);
3727 for (const auto *U : I.users()) {
3728 Constant *C = NullValue;
3729 if (match(U, m_Or(m_Value(), m_Value())))
3731 else if (match(U, m_Select(m_Specific(&I), m_Constant(), m_Value())))
3732 C = ConstantInt::getTrue(Ty);
3733
3734 if (!BestValue)
3735 BestValue = C;
3736 else if (BestValue != C)
3737 BestValue = NullValue;
3738 }
3739 assert(BestValue && "Must have at least one use");
3740 return BestValue;
3741 };
3742
3743 if (match(Op0, m_Undef())) {
3744 // Don't fold freeze(undef/poison) if it's used as a vector operand in
3745 // a shuffle. This may improve codegen for shuffles that allow
3746 // unspecified inputs.
3748 return nullptr;
3749 return replaceInstUsesWith(I, getUndefReplacement(I.getType()));
3750 }
3751
3752 Constant *C;
3753 if (match(Op0, m_Constant(C)) && C->containsUndefOrPoisonElement()) {
3754 Constant *ReplaceC = getUndefReplacement(I.getType()->getScalarType());
3756 }
3757
3758 // Replace uses of Op with freeze(Op).
3759 if (freezeOtherUses(I))
3760 return &I;
3761
3762 return nullptr;
3763}
3764
3765/// Check for case where the call writes to an otherwise dead alloca. This
3766/// shows up for unused out-params in idiomatic C/C++ code. Note that this
3767/// helper *only* analyzes the write; doesn't check any other legality aspect.
3769 auto *CB = dyn_cast<CallBase>(I);
3770 if (!CB)
3771 // TODO: handle e.g. store to alloca here - only worth doing if we extend
3772 // to allow reload along used path as described below. Otherwise, this
3773 // is simply a store to a dead allocation which will be removed.
3774 return false;
3775 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CB, TLI);
3776 if (!Dest)
3777 return false;
3778 auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(Dest->Ptr));
3779 if (!AI)
3780 // TODO: allow malloc?
3781 return false;
3782 // TODO: allow memory access dominated by move point? Note that since AI
3783 // could have a reference to itself captured by the call, we would need to
3784 // account for cycles in doing so.
3785 SmallVector<const User *> AllocaUsers;
3787 auto pushUsers = [&](const Instruction &I) {
3788 for (const User *U : I.users()) {
3789 if (Visited.insert(U).second)
3790 AllocaUsers.push_back(U);
3791 }
3792 };
3793 pushUsers(*AI);
3794 while (!AllocaUsers.empty()) {
3795 auto *UserI = cast<Instruction>(AllocaUsers.pop_back_val());
3796 if (isa<BitCastInst>(UserI) || isa<GetElementPtrInst>(UserI) ||
3797 isa<AddrSpaceCastInst>(UserI)) {
3798 pushUsers(*UserI);
3799 continue;
3800 }
3801 if (UserI == CB)
3802 continue;
3803 // TODO: support lifetime.start/end here
3804 return false;
3805 }
3806 return true;
3807}
3808
3809/// Try to move the specified instruction from its current block into the
3810/// beginning of DestBlock, which can only happen if it's safe to move the
3811/// instruction past all of the instructions between it and the end of its
3812/// block.
3814 BasicBlock *DestBlock) {
3815 BasicBlock *SrcBlock = I->getParent();
3816
3817 // Cannot move control-flow-involving, volatile loads, vaarg, etc.
3818 if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
3819 I->isTerminator())
3820 return false;
3821
3822 // Do not sink static or dynamic alloca instructions. Static allocas must
3823 // remain in the entry block, and dynamic allocas must not be sunk in between
3824 // a stacksave / stackrestore pair, which would incorrectly shorten its
3825 // lifetime.
3826 if (isa<AllocaInst>(I))
3827 return false;
3828
3829 // Do not sink into catchswitch blocks.
3830 if (isa<CatchSwitchInst>(DestBlock->getTerminator()))
3831 return false;
3832
3833 // Do not sink convergent call instructions.
3834 if (auto *CI = dyn_cast<CallInst>(I)) {
3835 if (CI->isConvergent())
3836 return false;
3837 }
3838
3839 // Unless we can prove that the memory write isn't visibile except on the
3840 // path we're sinking to, we must bail.
3841 if (I->mayWriteToMemory()) {
3842 if (!SoleWriteToDeadLocal(I, TLI))
3843 return false;
3844 }
3845
3846 // We can only sink load instructions if there is nothing between the load and
3847 // the end of block that could change the value.
3848 if (I->mayReadFromMemory()) {
3849 // We don't want to do any sophisticated alias analysis, so we only check
3850 // the instructions after I in I's parent block if we try to sink to its
3851 // successor block.
3852 if (DestBlock->getUniquePredecessor() != I->getParent())
3853 return false;
3854 for (BasicBlock::iterator Scan = std::next(I->getIterator()),
3855 E = I->getParent()->end();
3856 Scan != E; ++Scan)
3857 if (Scan->mayWriteToMemory())
3858 return false;
3859 }
3860
3861 I->dropDroppableUses([&](const Use *U) {
3862 auto *I = dyn_cast<Instruction>(U->getUser());
3863 if (I && I->getParent() != DestBlock) {
3864 Worklist.add(I);
3865 return true;
3866 }
3867 return false;
3868 });
3869 /// FIXME: We could remove droppable uses that are not dominated by
3870 /// the new position.
3871
3872 BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
3873 I->moveBefore(*DestBlock, InsertPos);
3874 ++NumSunkInst;
3875
3876 // Also sink all related debug uses from the source basic block. Otherwise we
3877 // get debug use before the def. Attempt to salvage debug uses first, to
3878 // maximise the range variables have location for. If we cannot salvage, then
3879 // mark the location undef: we know it was supposed to receive a new location
3880 // here, but that computation has been sunk.
3882 findDbgUsers(DbgUsers, I);
3883
3884 // For all debug values in the destination block, the sunk instruction
3885 // will still be available, so they do not need to be dropped.
3887 for (auto &DbgUser : DbgUsers)
3888 if (DbgUser->getParent() != DestBlock)
3889 DbgUsersToSalvage.push_back(DbgUser);
3890
3891 // Process the sinking DbgUsersToSalvage in reverse order, as we only want
3892 // to clone the last appearing debug intrinsic for each given variable.
3894 for (DbgVariableIntrinsic *DVI : DbgUsersToSalvage)
3895 if (DVI->getParent() == SrcBlock)
3896 DbgUsersToSink.push_back(DVI);
3897 llvm::sort(DbgUsersToSink,
3898 [](auto *A, auto *B) { return B->comesBefore(A); });
3899
3901 SmallSet<DebugVariable, 4> SunkVariables;
3902 for (auto *User : DbgUsersToSink) {
3903 // A dbg.declare instruction should not be cloned, since there can only be
3904 // one per variable fragment. It should be left in the original place
3905 // because the sunk instruction is not an alloca (otherwise we could not be
3906 // here).
3907 if (isa<DbgDeclareInst>(User))
3908 continue;
3909
3910 DebugVariable DbgUserVariable =
3911 DebugVariable(User->getVariable(), User->getExpression(),
3912 User->getDebugLoc()->getInlinedAt());
3913
3914 if (!SunkVariables.insert(DbgUserVariable).second)
3915 continue;
3916
3917 // Leave dbg.assign intrinsics in their original positions and there should
3918 // be no need to insert a clone.
3919 if (isa<DbgAssignIntrinsic>(User))
3920 continue;
3921
3922 DIIClones.emplace_back(cast<DbgVariableIntrinsic>(User->clone()));
3923 if (isa<DbgDeclareInst>(User) && isa<CastInst>(I))
3924 DIIClones.back()->replaceVariableLocationOp(I, I->getOperand(0));
3925 LLVM_DEBUG(dbgs() << "CLONE: " << *DIIClones.back() << '\n');
3926 }
3927
3928 // Perform salvaging without the clones, then sink the clones.
3929 if (!DIIClones.empty()) {
3930 salvageDebugInfoForDbgValues(*I, DbgUsersToSalvage);
3931 // The clones are in reverse order of original appearance, reverse again to
3932 // maintain the original order.
3933 for (auto &DIIClone : llvm::reverse(DIIClones)) {
3934 DIIClone->insertBefore(&*InsertPos);
3935 LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone << '\n');
3936 }
3937 }
3938
3939 return true;
3940}
3941
3943 while (!Worklist.isEmpty()) {
3944 // Walk deferred instructions in reverse order, and push them to the
3945 // worklist, which means they'll end up popped from the worklist in-order.
3946 while (Instruction *I = Worklist.popDeferred()) {
3947 // Check to see if we can DCE the instruction. We do this already here to
3948 // reduce the number of uses and thus allow other folds to trigger.
3949 // Note that eraseInstFromFunction() may push additional instructions on
3950 // the deferred worklist, so this will DCE whole instruction chains.
3953 ++NumDeadInst;
3954 continue;
3955 }
3956
3957 Worklist.push(I);
3958 }
3959
3961 if (I == nullptr) continue; // skip null values.
3962
3963 // Check to see if we can DCE the instruction.
3966 ++NumDeadInst;
3967 continue;
3968 }
3969
3970 if (!DebugCounter::shouldExecute(VisitCounter))
3971 continue;
3972
3973 // See if we can trivially sink this instruction to its user if we can
3974 // prove that the successor is not executed more frequently than our block.
3975 // Return the UserBlock if successful.
3976 auto getOptionalSinkBlockForInst =
3977 [this](Instruction *I) -> std::optional<BasicBlock *> {
3978 if (!EnableCodeSinking)
3979 return std::nullopt;
3980
3981 BasicBlock *BB = I->getParent();
3982 BasicBlock *UserParent = nullptr;
3983 unsigned NumUsers = 0;
3984
3985 for (auto *U : I->users()) {
3986 if (U->isDroppable())
3987 continue;
3988 if (NumUsers > MaxSinkNumUsers)
3989 return std::nullopt;
3990
3991 Instruction *UserInst = cast<Instruction>(U);
3992 // Special handling for Phi nodes - get the block the use occurs in.
3993 if (PHINode *PN = dyn_cast<PHINode>(UserInst)) {
3994 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
3995 if (PN->getIncomingValue(i) == I) {
3996 // Bail out if we have uses in different blocks. We don't do any
3997 // sophisticated analysis (i.e finding NearestCommonDominator of
3998 // these use blocks).
3999 if (UserParent && UserParent != PN->getIncomingBlock(i))
4000 return std::nullopt;
4001 UserParent = PN->getIncomingBlock(i);
4002 }
4003 }
4004 assert(UserParent && "expected to find user block!");
4005 } else {
4006 if (UserParent && UserParent != UserInst->getParent())
4007 return std::nullopt;
4008 UserParent = UserInst->getParent();
4009 }
4010
4011 // Make sure these checks are done only once, naturally we do the checks
4012 // the first time we get the userparent, this will save compile time.
4013 if (NumUsers == 0) {
4014 // Try sinking to another block. If that block is unreachable, then do
4015 // not bother. SimplifyCFG should handle it.
4016 if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
4017 return std::nullopt;
4018
4019 auto *Term = UserParent->getTerminator();
4020 // See if the user is one of our successors that has only one
4021 // predecessor, so that we don't have to split the critical edge.
4022 // Another option where we can sink is a block that ends with a
4023 // terminator that does not pass control to other block (such as
4024 // return or unreachable or resume). In this case:
4025 // - I dominates the User (by SSA form);
4026 // - the User will be executed at most once.
4027 // So sinking I down to User is always profitable or neutral.
4028 if (UserParent->getUniquePredecessor() != BB && !succ_empty(Term))
4029 return std::nullopt;
4030
4031 assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
4032 }
4033
4034 NumUsers++;
4035 }
4036
4037 // No user or only has droppable users.
4038 if (!UserParent)
4039 return std::nullopt;
4040
4041 return UserParent;
4042 };
4043
4044 auto OptBB = getOptionalSinkBlockForInst(I);
4045 if (OptBB) {
4046 auto *UserParent = *OptBB;
4047 // Okay, the CFG is simple enough, try to sink this instruction.
4048 if (tryToSinkInstruction(I, UserParent)) {
4049 LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
4050 MadeIRChange = true;
4051 // We'll add uses of the sunk instruction below, but since
4052 // sinking can expose opportunities for it's *operands* add
4053 // them to the worklist
4054 for (Use &U : I->operands())
4055 if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
4056 Worklist.push(OpI);
4057 }
4058 }
4059
4060 // Now that we have an instruction, try combining it to simplify it.
4063 I, {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
4064
4065#ifndef NDEBUG
4066 std::string OrigI;
4067#endif
4068 LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
4069 LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
4070
4071 if (Instruction *Result = visit(*I)) {
4072 ++NumCombined;
4073 // Should we replace the old instruction with a new one?
4074 if (Result != I) {
4075 LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
4076 << " New = " << *Result << '\n');
4077
4078 Result->copyMetadata(*I,
4079 {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
4080 // Everything uses the new instruction now.
4081 I->replaceAllUsesWith(Result);
4082
4083 // Move the name to the new instruction first.
4084 Result->takeName(I);
4085
4086 // Insert the new instruction into the basic block...
4087 BasicBlock *InstParent = I->getParent();
4088 BasicBlock::iterator InsertPos = I->getIterator();
4089
4090 // Are we replace a PHI with something that isn't a PHI, or vice versa?
4091 if (isa<PHINode>(Result) != isa<PHINode>(I)) {
4092 // We need to fix up the insertion point.
4093 if (isa<PHINode>(I)) // PHI -> Non-PHI
4094 InsertPos = InstParent->getFirstInsertionPt();
4095 else // Non-PHI -> PHI
4096 InsertPos = InstParent->getFirstNonPHI()->getIterator();
4097 }
4098
4099 Result->insertInto(InstParent, InsertPos);
4100
4101 // Push the new instruction and any users onto the worklist.
4103 Worklist.push(Result);
4104
4106 } else {
4107 LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
4108 << " New = " << *I << '\n');
4109
4110 // If the instruction was modified, it's possible that it is now dead.
4111 // if so, remove it.
4114 } else {
4116 Worklist.push(I);
4117 }
4118 }
4119 MadeIRChange = true;
4120 }
4121 }
4122
4123 Worklist.zap();
4124 return MadeIRChange;
4125}
4126
4127// Track the scopes used by !alias.scope and !noalias. In a function, a
4128// @llvm.experimental.noalias.scope.decl is only useful if that scope is used
4129// by both sets. If not, the declaration of the scope can be safely omitted.
4130// The MDNode of the scope can be omitted as well for the instructions that are
4131// part of this function. We do not do that at this point, as this might become
4132// too time consuming to do.
4134 SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists;
4135 SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists;
4136
4137public:
4139 // This seems to be faster than checking 'mayReadOrWriteMemory()'.
4140 if (!I->hasMetadataOtherThanDebugLoc())
4141 return;
4142
4143 auto Track = [](Metadata *ScopeList, auto &Container) {
4144 const auto *MDScopeList = dyn_cast_or_null<MDNode>(ScopeList);
4145 if (!MDScopeList || !Container.insert(MDScopeList).second)
4146 return;
4147 for (const auto &MDOperand : MDScopeList->operands())
4148 if (auto *MDScope = dyn_cast<MDNode>(MDOperand))
4149 Container.insert(MDScope);
4150 };
4151
4152 Track(I->getMetadata(LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
4153 Track(I->getMetadata(LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
4154 }
4155
4157 NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Inst);
4158 if (!Decl)
4159 return false;
4160
4161 assert(Decl->use_empty() &&
4162 "llvm.experimental.noalias.scope.decl in use ?");
4163 const MDNode *MDSL = Decl->getScopeList();
4164 assert(MDSL->getNumOperands() == 1 &&
4165 "llvm.experimental.noalias.scope should refer to a single scope");
4166 auto &MDOperand = MDSL->getOperand(0);
4167 if (auto *MD = dyn_cast<MDNode>(MDOperand))
4168 return !UsedAliasScopesAndLists.contains(MD) ||
4169 !UsedNoAliasScopesAndLists.contains(MD);
4170
4171 // Not an MDNode ? throw away.
4172 return true;
4173 }
4174};
4175
4176/// Populate the IC worklist from a function, by walking it in reverse
4177/// post-order and adding all reachable code to the worklist.
4178///
4179/// This has a couple of tricks to make the code faster and more powerful. In
4180/// particular, we constant fold and DCE instructions as we go, to avoid adding
4181/// them to the worklist (this significantly speeds up instcombine on code where
4182/// many instructions are dead or constant). Additionally, if we find a branch
4183/// whose condition is a known constant, we only visit the reachable successors.
4186 bool MadeIRChange = false;
4188 SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
4189 DenseMap<Constant *, Constant *> FoldedConstants;
4190 AliasScopeTracker SeenAliasScopes;
4191
4192 auto HandleOnlyLiveSuccessor = [&](BasicBlock *BB, BasicBlock *LiveSucc) {
4193 for (BasicBlock *Succ : successors(BB))
4194 if (Succ != LiveSucc && DeadEdges.insert({BB, Succ}).second)
4195 for (PHINode &PN : Succ->phis())
4196 for (Use &U : PN.incoming_values())
4197 if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(U)) {
4198 U.set(PoisonValue::get(PN.getType()));
4199 MadeIRChange = true;
4200 }
4201 };
4202
4203 for (BasicBlock *BB : RPOT) {
4204 if (!BB->isEntryBlock() && all_of(predecessors(BB), [&](BasicBlock *Pred) {
4205 return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred);
4206 })) {
4207 HandleOnlyLiveSuccessor(BB, nullptr);
4208 continue;
4209 }
4210 LiveBlocks.insert(BB);
4211
4212 for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
4213 // ConstantProp instruction if trivially constant.
4214 if (!Inst.use_empty() &&
4215 (Inst.getNumOperands() == 0 || isa<Constant>(Inst.getOperand(0))))
4216 if (Constant *C = ConstantFoldInstruction(&Inst, DL, &TLI)) {
4217 LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
4218 << '\n');
4219 Inst.replaceAllUsesWith(C);
4220 ++NumConstProp;
4221 if (isInstructionTriviallyDead(&Inst, &TLI))
4222 Inst.eraseFromParent();
4223 MadeIRChange = true;
4224 continue;
4225 }
4226
4227 // See if we can constant fold its operands.
4228 for (Use &U : Inst.operands()) {
4229 if (!isa<ConstantVector>(U) && !isa<ConstantExpr>(U))
4230 continue;
4231
4232 auto *C = cast<Constant>(U);
4233 Constant *&FoldRes = FoldedConstants[C];
4234 if (!FoldRes)
4235 FoldRes = ConstantFoldConstant(C, DL, &TLI);
4236
4237 if (FoldRes != C) {
4238 LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
4239 << "\n Old = " << *C
4240 << "\n New = " << *FoldRes << '\n');
4241 U = FoldRes;
4242 MadeIRChange = true;
4243 }
4244 }
4245
4246 // Skip processing debug and pseudo intrinsics in InstCombine. Processing
4247 // these call instructions consumes non-trivial amount of time and
4248 // provides no value for the optimization.
4249 if (!Inst.isDebugOrPseudoInst()) {
4250 InstrsForInstructionWorklist.push_back(&Inst);
4251 SeenAliasScopes.analyse(&Inst);
4252 }
4253 }
4254
4255 // If this is a branch or switch on a constant, mark only the single
4256 // live successor. Otherwise assume all successors are live.
4257 Instruction *TI = BB->getTerminator();
4258 if (BranchInst *BI = dyn_cast<BranchInst>(TI); BI && BI->isConditional()) {
4259 if (isa<UndefValue>(BI->getCondition())) {
4260 // Branch on undef is UB.
4261 HandleOnlyLiveSuccessor(BB, nullptr);
4262 continue;
4263 }
4264 if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
4265 bool CondVal = Cond->getZExtValue();
4266 HandleOnlyLiveSuccessor(BB, BI->getSuccessor(!CondVal));
4267 continue;
4268 }
4269 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
4270 if (isa<UndefValue>(SI->getCondition())) {
4271 // Switch on undef is UB.
4272 HandleOnlyLiveSuccessor(BB, nullptr);
4273 continue;
4274 }
4275 if (auto *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
4276 HandleOnlyLiveSuccessor(BB,
4277 SI->findCaseValue(Cond)->getCaseSuccessor());
4278 continue;
4279 }
4280 }
4281 }
4282
4283 // Remove instructions inside unreachable blocks. This prevents the
4284 // instcombine code from having to deal with some bad special cases, and
4285 // reduces use counts of instructions.
4286 for (BasicBlock &BB : F) {
4287 if (LiveBlocks.count(&BB))
4288 continue;
4289
4290 unsigned NumDeadInstInBB;
4291 unsigned NumDeadDbgInstInBB;
4292 std::tie(NumDeadInstInBB, NumDeadDbgInstInBB) =
4294
4295 MadeIRChange |= NumDeadInstInBB + NumDeadDbgInstInBB > 0;
4296 NumDeadInst += NumDeadInstInBB;
4297 }
4298
4299 // Once we've found all of the instructions to add to instcombine's worklist,
4300 // add them in reverse order. This way instcombine will visit from the top
4301 // of the function down. This jives well with the way that it adds all uses
4302 // of instructions to the worklist after doing a transformation, thus avoiding
4303 // some N^2 behavior in pathological cases.
4304 Worklist.reserve(InstrsForInstructionWorklist.size());
4305 for (Instruction *Inst : reverse(InstrsForInstructionWorklist)) {
4306 // DCE instruction if trivially dead. As we iterate in reverse program
4307 // order here, we will clean up whole chains of dead instructions.
4308 if (isInstructionTriviallyDead(Inst, &TLI) ||
4309 SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
4310 ++NumDeadInst;
4311 LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
4312 salvageDebugInfo(*Inst);
4313 Inst->eraseFromParent();
4314 MadeIRChange = true;
4315 continue;
4316 }
4317
4318 Worklist.push(Inst);
4319 }
4320
4321 return MadeIRChange;
4322}
4323
4328 ProfileSummaryInfo *PSI, unsigned MaxIterations, bool VerifyFixpoint,
4329 LoopInfo *LI) {
4330 auto &DL = F.getParent()->getDataLayout();
4331
4332 /// Builder - This is an IRBuilder that automatically inserts new
4333 /// instructions into the worklist when they are created.
4335 F.getContext(), TargetFolder(DL),
4336 IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) {
4337 Worklist.add(I);
4338 if (auto *Assume = dyn_cast<AssumeInst>(I))
4339 AC.registerAssumption(Assume);
4340 }));
4341
4343
4344 // Lower dbg.declare intrinsics otherwise their value may be clobbered
4345 // by instcombiner.
4346 bool MadeIRChange = false;
4348 MadeIRChange = LowerDbgDeclare(F);
4349
4350 // Iterate while there is work to do.
4351 unsigned Iteration = 0;
4352 while (true) {
4353 ++Iteration;
4354
4355 if (Iteration > MaxIterations && !VerifyFixpoint) {
4356 LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << MaxIterations
4357 << " on " << F.getName()
4358 << " reached; stopping without verifying fixpoint\n");
4359 break;
4360 }
4361
4362 ++NumWorklistIterations;
4363 LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
4364 << F.getName() << "\n");
4365
4366 InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
4367 ORE, BFI, PSI, DL, LI);
4369 bool MadeChangeInThisIteration = IC.prepareWorklist(F, RPOT);
4370 MadeChangeInThisIteration |= IC.run();
4371 if (!MadeChangeInThisIteration)
4372 break;
4373
4374 MadeIRChange = true;
4375 if (Iteration > MaxIterations) {
4377 "Instruction Combining did not reach a fixpoint after " +
4378 Twine(MaxIterations) + " iterations");
4379 }
4380 }
4381
4382 if (Iteration == 1)
4383 ++NumOneIteration;
4384 else if (Iteration == 2)
4385 ++NumTwoIterations;
4386 else if (Iteration == 3)
4387 ++NumThreeIterations;
4388 else
4389 ++NumFourOrMoreIterations;
4390
4391 return MadeIRChange;
4392}
4393
4395
4397 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
4398 static_cast<PassInfoMixin<InstCombinePass> *>(this)->printPipeline(
4399 OS, MapClassName2PassName);
4400 OS << '<';
4401 OS << "max-iterations=" << Options.MaxIterations << ";";
4402 OS << (Options.UseLoopInfo ? "" : "no-") << "use-loop-info;";
4403 OS << (Options.VerifyFixpoint ? "" : "no-") << "verify-fixpoint";
4404 OS << '>';
4405}
4406
4409 auto &AC = AM.getResult<AssumptionAnalysis>(F);
4410 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
4411 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
4413 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
4414
4415 // TODO: Only use LoopInfo when the option is set. This requires that the
4416 // callers in the pass pipeline explicitly set the option.
4417 auto *LI = AM.getCachedResult<LoopAnalysis>(F);
4418 if (!LI && Options.UseLoopInfo)
4419 LI = &AM.getResult<LoopAnalysis>(F);
4420
4421 auto *AA = &AM.getResult<AAManager>(F);
4422 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
4423 ProfileSummaryInfo *PSI =
4424 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
4425 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
4426 &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
4427
4428 if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
4429 BFI, PSI, Options.MaxIterations,
4430 Options.VerifyFixpoint, LI))
4431 // No changes, all analyses are preserved.
4432 return PreservedAnalyses::all();
4433
4434 // Mark all the analyses that instcombine updates as preserved.
4437 return PA;
4438}
4439
4441 AU.setPreservesCFG();
4454}
4455
4457 if (skipFunction(F))
4458 return false;
4459
4460 // Required analyses.
4461 auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
4462 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
4463 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
4464 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
4465 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
4466 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
4467
4468 // Optional analyses.
4469 auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
4470 auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
4471 ProfileSummaryInfo *PSI =
4472 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
4473 BlockFrequencyInfo *BFI =
4474 (PSI && PSI->hasProfileSummary()) ?
4475 &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
4476 nullptr;
4477
4478 return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
4479 BFI, PSI,
4481 /*VerifyFixpoint */ false, LI);
4482}
4483
4485
4488}
4489
4491 "Combine redundant instructions", false, false)
4503
4504// Initialization Routines
4507}
4508
4510 return new InstructionCombiningPass();
4511}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
assume Assume Builder
static const Function * getParent(const Value *V)
This is the interface for LLVM's primary stateless and local alias analysis.
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:182
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
#define NL
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
Hexagon Vector Combine
IRTranslator LLVM IR MI
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
Definition: IVUsers.cpp:48
This file provides internal interfaces used to implement the InstCombine.
This file provides the primary interface to the instcombine pass.
static bool isUsedWithinShuffleVector(Value *V)
static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI, Instruction *AI)
static bool shorter_filter(const Value *LHS, const Value *RHS)
static Instruction * foldSelectGEP(GetElementPtrInst &GEP, InstCombiner::BuilderTy &Builder)
Thread a GEP operation with constant indices through the constant true/false arms of a select.
static bool combineInstructionsOverFunction(Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, unsigned MaxIterations, bool VerifyFixpoint, LoopInfo *LI)
static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src)
static cl::opt< unsigned > MaxArraySize("instcombine-maxarray-size", cl::init(1024), cl::desc("Maximum array size considered when doing a combine"))
static cl::opt< unsigned > ShouldLowerDbgDeclare("instcombine-lower-dbg-declare", cl::Hidden, cl::init(true))
static bool hasNoSignedWrap(BinaryOperator &I)
static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1, InstCombinerImpl &IC)
Combine constant operands of associative operations either before or after a cast to eliminate one of...
static Value * simplifyInstructionWithPHI(Instruction &I, PHINode *PN, Value *InValue, BasicBlock *InBB, const DataLayout &DL, const SimplifyQuery SQ)
static void ClearSubclassDataAfterReassociation(BinaryOperator &I)
Conservatively clears subclassOptionalData after a reassociation or commutation.
static bool isAllocSiteRemovable(Instruction *AI, SmallVectorImpl< WeakTrackingVH > &Users, const TargetLibraryInfo &TLI)
static Value * getIdentityValue(Instruction::BinaryOps Opcode, Value *V)
This function returns identity value for given opcode, which can be used to factor patterns like (X *...
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
static Value * foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, Value *NewOp, InstCombiner &IC)
static Instruction * tryToMoveFreeBeforeNullTest(CallInst &FI, const DataLayout &DL)
Move the call to free before a NULL test.
static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp)
Return whether "(X LOp Y) ROp Z" is always equal to "(X ROp Z) LOp (Y ROp Z)".
static Value * tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ, InstCombiner::BuilderTy &Builder, Instruction::BinaryOps InnerOpcode, Value *A, Value *B, Value *C, Value *D)
This tries to simplify binary operations by factorizing out common terms (e.
static bool isRemovableWrite(CallBase &CB, Value *UsedV, const TargetLibraryInfo &TLI)
Given a call CB which uses an address UsedV, return true if we can prove the call's only possible eff...
static Instruction::BinaryOps getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op, Value *&LHS, Value *&RHS)
This function predicates factorization using distributive laws.
static bool hasNoUnsignedWrap(BinaryOperator &I)
static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI)
Check for case where the call writes to an otherwise dead alloca.
static cl::opt< unsigned > MaxSinkNumUsers("instcombine-max-sink-users", cl::init(32), cl::desc("Maximum number of undroppable users for instruction sinking"))
static Constant * constantFoldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, bool IsTrueArm)
static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo)
Return 'true' if the given typeinfo will match anything.
static bool isMergedGEPInBounds(GEPOperator &GEP1, GEPOperator &GEP2)
static cl::opt< bool > EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"), cl::init(true))
static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C)
Select target instructions out of generic instructions
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file contains the declarations for metadata subclasses.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This pass exposes codegen information to IR-level passes.
This defines the Use class.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
bool isNoAliasScopeDeclDead(Instruction *Inst)
void analyse(Instruction *I)
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212<