LLVM  16.0.0git
InstructionCombining.cpp
Go to the documentation of this file.
1 //===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // InstructionCombining - Combine instructions to form fewer, simple
10 // instructions. This pass does not modify the CFG. This pass is where
11 // algebraic simplification happens.
12 //
13 // This pass combines things like:
14 // %Y = add i32 %X, 1
15 // %Z = add i32 %Y, 1
16 // into:
17 // %Z = add i32 %X, 2
18 //
19 // This is a simple worklist driven algorithm.
20 //
21 // This pass guarantees that the following canonicalizations are performed on
22 // the program:
23 // 1. If a binary operator has a constant operand, it is moved to the RHS
24 // 2. Bitwise operators with constant operands are always grouped so that
25 // shifts are performed first, then or's, then and's, then xor's.
26 // 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27 // 4. All cmp instructions on boolean values are replaced with logical ops
28 // 5. add X, X is represented as (X*2) => (X << 1)
29 // 6. Multiplies with a power-of-two constant argument are transformed into
30 // shifts.
31 // ... etc.
32 //
33 //===----------------------------------------------------------------------===//
34 
35 #include "InstCombineInternal.h"
36 #include "llvm-c/Initialization.h"
38 #include "llvm/ADT/APInt.h"
39 #include "llvm/ADT/ArrayRef.h"
40 #include "llvm/ADT/DenseMap.h"
41 #include "llvm/ADT/None.h"
42 #include "llvm/ADT/SmallPtrSet.h"
43 #include "llvm/ADT/SmallVector.h"
44 #include "llvm/ADT/Statistic.h"
49 #include "llvm/Analysis/CFG.h"
55 #include "llvm/Analysis/LoopInfo.h"
65 #include "llvm/IR/BasicBlock.h"
66 #include "llvm/IR/CFG.h"
67 #include "llvm/IR/Constant.h"
68 #include "llvm/IR/Constants.h"
69 #include "llvm/IR/DIBuilder.h"
70 #include "llvm/IR/DataLayout.h"
71 #include "llvm/IR/DebugInfo.h"
72 #include "llvm/IR/DerivedTypes.h"
73 #include "llvm/IR/Dominators.h"
74 #include "llvm/IR/Function.h"
76 #include "llvm/IR/IRBuilder.h"
77 #include "llvm/IR/InstrTypes.h"
78 #include "llvm/IR/Instruction.h"
79 #include "llvm/IR/Instructions.h"
80 #include "llvm/IR/IntrinsicInst.h"
81 #include "llvm/IR/Intrinsics.h"
83 #include "llvm/IR/Metadata.h"
84 #include "llvm/IR/Operator.h"
85 #include "llvm/IR/PassManager.h"
86 #include "llvm/IR/PatternMatch.h"
87 #include "llvm/IR/Type.h"
88 #include "llvm/IR/Use.h"
89 #include "llvm/IR/User.h"
90 #include "llvm/IR/Value.h"
91 #include "llvm/IR/ValueHandle.h"
92 #include "llvm/InitializePasses.h"
93 #include "llvm/Support/Casting.h"
95 #include "llvm/Support/Compiler.h"
96 #include "llvm/Support/Debug.h"
99 #include "llvm/Support/KnownBits.h"
104 #include <algorithm>
105 #include <cassert>
106 #include <cstdint>
107 #include <memory>
108 #include <optional>
109 #include <string>
110 #include <utility>
111 
112 #define DEBUG_TYPE "instcombine"
114 #include <optional>
115 
116 using namespace llvm;
117 using namespace llvm::PatternMatch;
118 
119 STATISTIC(NumWorklistIterations,
120  "Number of instruction combining iterations performed");
121 
122 STATISTIC(NumCombined , "Number of insts combined");
123 STATISTIC(NumConstProp, "Number of constant folds");
124 STATISTIC(NumDeadInst , "Number of dead inst eliminated");
125 STATISTIC(NumSunkInst , "Number of instructions sunk");
126 STATISTIC(NumExpand, "Number of expansions");
127 STATISTIC(NumFactor , "Number of factorizations");
128 STATISTIC(NumReassoc , "Number of reassociations");
129 DEBUG_COUNTER(VisitCounter, "instcombine-visit",
130  "Controls which instructions are visited");
131 
132 // FIXME: these limits eventually should be as low as 2.
133 static constexpr unsigned InstCombineDefaultMaxIterations = 1000;
134 #ifndef NDEBUG
135 static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 100;
136 #else
137 static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 1000;
138 #endif
139 
140 static cl::opt<bool>
141 EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
142  cl::init(true));
143 
145  "instcombine-max-sink-users", cl::init(32),
146  cl::desc("Maximum number of undroppable users for instruction sinking"));
147 
149  "instcombine-max-iterations",
150  cl::desc("Limit the maximum number of instruction combining iterations"),
152 
154  "instcombine-infinite-loop-threshold",
155  cl::desc("Number of instruction combining iterations considered an "
156  "infinite loop"),
158 
159 static cl::opt<unsigned>
160 MaxArraySize("instcombine-maxarray-size", cl::init(1024),
161  cl::desc("Maximum array size considered when doing a combine"));
162 
163 // FIXME: Remove this flag when it is no longer necessary to convert
164 // llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
165 // increases variable availability at the cost of accuracy. Variables that
166 // cannot be promoted by mem2reg or SROA will be described as living in memory
167 // for their entire lifetime. However, passes like DSE and instcombine can
168 // delete stores to the alloca, leading to misleading and inaccurate debug
169 // information. This flag can be removed when those passes are fixed.
170 static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
171  cl::Hidden, cl::init(true));
172 
173 std::optional<Instruction *>
175  // Handle target specific intrinsics
176  if (II.getCalledFunction()->isTargetIntrinsic()) {
177  return TTI.instCombineIntrinsic(*this, II);
178  }
179  return std::nullopt;
180 }
181 
183  IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
184  bool &KnownBitsComputed) {
185  // Handle target specific intrinsics
186  if (II.getCalledFunction()->isTargetIntrinsic()) {
187  return TTI.simplifyDemandedUseBitsIntrinsic(*this, II, DemandedMask, Known,
188  KnownBitsComputed);
189  }
190  return std::nullopt;
191 }
192 
194  IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2,
195  APInt &UndefElts3,
196  std::function<void(Instruction *, unsigned, APInt, APInt &)>
197  SimplifyAndSetOp) {
198  // Handle target specific intrinsics
199  if (II.getCalledFunction()->isTargetIntrinsic()) {
201  *this, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
202  SimplifyAndSetOp);
203  }
204  return std::nullopt;
205 }
206 
207 Value *InstCombinerImpl::EmitGEPOffset(User *GEP) {
208  return llvm::EmitGEPOffset(&Builder, DL, GEP);
209 }
210 
211 /// Legal integers and common types are considered desirable. This is used to
212 /// avoid creating instructions with types that may not be supported well by the
213 /// the backend.
214 /// NOTE: This treats i8, i16 and i32 specially because they are common
215 /// types in frontend languages.
216 bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
217  switch (BitWidth) {
218  case 8:
219  case 16:
220  case 32:
221  return true;
222  default:
223  return DL.isLegalInteger(BitWidth);
224  }
225 }
226 
227 /// Return true if it is desirable to convert an integer computation from a
228 /// given bit width to a new bit width.
229 /// We don't want to convert from a legal or desirable type (like i8) to an
230 /// illegal type or from a smaller to a larger illegal type. A width of '1'
231 /// is always treated as a desirable type because i1 is a fundamental type in
232 /// IR, and there are many specialized optimizations for i1 types.
233 /// Common/desirable widths are equally treated as legal to convert to, in
234 /// order to open up more combining opportunities.
235 bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
236  unsigned ToWidth) const {
237  bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth);
238  bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth);
239 
240  // Convert to desirable widths even if they are not legal types.
241  // Only shrink types, to prevent infinite loops.
242  if (ToWidth < FromWidth && isDesirableIntType(ToWidth))
243  return true;
244 
245  // If this is a legal or desiable integer from type, and the result would be
246  // an illegal type, don't do the transformation.
247  if ((FromLegal || isDesirableIntType(FromWidth)) && !ToLegal)
248  return false;
249 
250  // Otherwise, if both are illegal, do not increase the size of the result. We
251  // do allow things like i160 -> i64, but not i64 -> i160.
252  if (!FromLegal && !ToLegal && ToWidth > FromWidth)
253  return false;
254 
255  return true;
256 }
257 
258 /// Return true if it is desirable to convert a computation from 'From' to 'To'.
259 /// We don't want to convert from a legal to an illegal type or from a smaller
260 /// to a larger illegal type. i1 is always treated as a legal type because it is
261 /// a fundamental type in IR, and there are many specialized optimizations for
262 /// i1 types.
263 bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const {
264  // TODO: This could be extended to allow vectors. Datalayout changes might be
265  // needed to properly support that.
266  if (!From->isIntegerTy() || !To->isIntegerTy())
267  return false;
268 
269  unsigned FromWidth = From->getPrimitiveSizeInBits();
270  unsigned ToWidth = To->getPrimitiveSizeInBits();
271  return shouldChangeType(FromWidth, ToWidth);
272 }
273 
274 // Return true, if No Signed Wrap should be maintained for I.
275 // The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
276 // where both B and C should be ConstantInts, results in a constant that does
277 // not overflow. This function only handles the Add and Sub opcodes. For
278 // all other opcodes, the function conservatively returns false.
280  auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
281  if (!OBO || !OBO->hasNoSignedWrap())
282  return false;
283 
284  // We reason about Add and Sub Only.
285  Instruction::BinaryOps Opcode = I.getOpcode();
286  if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
287  return false;
288 
289  const APInt *BVal, *CVal;
290  if (!match(B, m_APInt(BVal)) || !match(C, m_APInt(CVal)))
291  return false;
292 
293  bool Overflow = false;
294  if (Opcode == Instruction::Add)
295  (void)BVal->sadd_ov(*CVal, Overflow);
296  else
297  (void)BVal->ssub_ov(*CVal, Overflow);
298 
299  return !Overflow;
300 }
301 
303  auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
304  return OBO && OBO->hasNoUnsignedWrap();
305 }
306 
308  auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
309  return OBO && OBO->hasNoSignedWrap();
310 }
311 
312 /// Conservatively clears subclassOptionalData after a reassociation or
313 /// commutation. We preserve fast-math flags when applicable as they can be
314 /// preserved.
316  FPMathOperator *FPMO = dyn_cast<FPMathOperator>(&I);
317  if (!FPMO) {
318  I.clearSubclassOptionalData();
319  return;
320  }
321 
322  FastMathFlags FMF = I.getFastMathFlags();
323  I.clearSubclassOptionalData();
324  I.setFastMathFlags(FMF);
325 }
326 
327 /// Combine constant operands of associative operations either before or after a
328 /// cast to eliminate one of the associative operations:
329 /// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
330 /// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
332  InstCombinerImpl &IC) {
333  auto *Cast = dyn_cast<CastInst>(BinOp1->getOperand(0));
334  if (!Cast || !Cast->hasOneUse())
335  return false;
336 
337  // TODO: Enhance logic for other casts and remove this check.
338  auto CastOpcode = Cast->getOpcode();
339  if (CastOpcode != Instruction::ZExt)
340  return false;
341 
342  // TODO: Enhance logic for other BinOps and remove this check.
343  if (!BinOp1->isBitwiseLogicOp())
344  return false;
345 
346  auto AssocOpcode = BinOp1->getOpcode();
347  auto *BinOp2 = dyn_cast<BinaryOperator>(Cast->getOperand(0));
348  if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
349  return false;
350 
351  Constant *C1, *C2;
352  if (!match(BinOp1->getOperand(1), m_Constant(C1)) ||
353  !match(BinOp2->getOperand(1), m_Constant(C2)))
354  return false;
355 
356  // TODO: This assumes a zext cast.
357  // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
358  // to the destination type might lose bits.
359 
360  // Fold the constants together in the destination type:
361  // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
362  Type *DestTy = C1->getType();
363  Constant *CastC2 = ConstantExpr::getCast(CastOpcode, C2, DestTy);
364  Constant *FoldedC = ConstantExpr::get(AssocOpcode, C1, CastC2);
365  IC.replaceOperand(*Cast, 0, BinOp2->getOperand(0));
366  IC.replaceOperand(*BinOp1, 1, FoldedC);
367  return true;
368 }
369 
370 // Simplifies IntToPtr/PtrToInt RoundTrip Cast To BitCast.
371 // inttoptr ( ptrtoint (x) ) --> x
372 Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
373  auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
374  if (IntToPtr && DL.getPointerTypeSizeInBits(IntToPtr->getDestTy()) ==
375  DL.getTypeSizeInBits(IntToPtr->getSrcTy())) {
376  auto *PtrToInt = dyn_cast<PtrToIntInst>(IntToPtr->getOperand(0));
377  Type *CastTy = IntToPtr->getDestTy();
378  if (PtrToInt &&
379  CastTy->getPointerAddressSpace() ==
380  PtrToInt->getSrcTy()->getPointerAddressSpace() &&
381  DL.getPointerTypeSizeInBits(PtrToInt->getSrcTy()) ==
382  DL.getTypeSizeInBits(PtrToInt->getDestTy())) {
383  return CastInst::CreateBitOrPointerCast(PtrToInt->getOperand(0), CastTy,
384  "", PtrToInt);
385  }
386  }
387  return nullptr;
388 }
389 
390 /// This performs a few simplifications for operators that are associative or
391 /// commutative:
392 ///
393 /// Commutative operators:
394 ///
395 /// 1. Order operands such that they are listed from right (least complex) to
396 /// left (most complex). This puts constants before unary operators before
397 /// binary operators.
398 ///
399 /// Associative operators:
400 ///
401 /// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
402 /// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
403 ///
404 /// Associative and commutative operators:
405 ///
406 /// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
407 /// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
408 /// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
409 /// if C1 and C2 are constants.
411  Instruction::BinaryOps Opcode = I.getOpcode();
412  bool Changed = false;
413 
414  do {
415  // Order operands such that they are listed from right (least complex) to
416  // left (most complex). This puts constants before unary operators before
417  // binary operators.
418  if (I.isCommutative() && getComplexity(I.getOperand(0)) <
419  getComplexity(I.getOperand(1)))
420  Changed = !I.swapOperands();
421 
422  BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
423  BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
424 
425  if (I.isAssociative()) {
426  // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
427  if (Op0 && Op0->getOpcode() == Opcode) {
428  Value *A = Op0->getOperand(0);
429  Value *B = Op0->getOperand(1);
430  Value *C = I.getOperand(1);
431 
432  // Does "B op C" simplify?
433  if (Value *V = simplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) {
434  // It simplifies to V. Form "A op V".
435  replaceOperand(I, 0, A);
436  replaceOperand(I, 1, V);
437  bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0);
438  bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(*Op0);
439 
440  // Conservatively clear all optional flags since they may not be
441  // preserved by the reassociation. Reset nsw/nuw based on the above
442  // analysis.
444 
445  // Note: this is only valid because SimplifyBinOp doesn't look at
446  // the operands to Op0.
447  if (IsNUW)
448  I.setHasNoUnsignedWrap(true);
449 
450  if (IsNSW)
451  I.setHasNoSignedWrap(true);
452 
453  Changed = true;
454  ++NumReassoc;
455  continue;
456  }
457  }
458 
459  // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
460  if (Op1 && Op1->getOpcode() == Opcode) {
461  Value *A = I.getOperand(0);
462  Value *B = Op1->getOperand(0);
463  Value *C = Op1->getOperand(1);
464 
465  // Does "A op B" simplify?
466  if (Value *V = simplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) {
467  // It simplifies to V. Form "V op C".
468  replaceOperand(I, 0, V);
469  replaceOperand(I, 1, C);
470  // Conservatively clear the optional flags, since they may not be
471  // preserved by the reassociation.
473  Changed = true;
474  ++NumReassoc;
475  continue;
476  }
477  }
478  }
479 
480  if (I.isAssociative() && I.isCommutative()) {
481  if (simplifyAssocCastAssoc(&I, *this)) {
482  Changed = true;
483  ++NumReassoc;
484  continue;
485  }
486 
487  // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
488  if (Op0 && Op0->getOpcode() == Opcode) {
489  Value *A = Op0->getOperand(0);
490  Value *B = Op0->getOperand(1);
491  Value *C = I.getOperand(1);
492 
493  // Does "C op A" simplify?
494  if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
495  // It simplifies to V. Form "V op B".
496  replaceOperand(I, 0, V);
497  replaceOperand(I, 1, B);
498  // Conservatively clear the optional flags, since they may not be
499  // preserved by the reassociation.
501  Changed = true;
502  ++NumReassoc;
503  continue;
504  }
505  }
506 
507  // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
508  if (Op1 && Op1->getOpcode() == Opcode) {
509  Value *A = I.getOperand(0);
510  Value *B = Op1->getOperand(0);
511  Value *C = Op1->getOperand(1);
512 
513  // Does "C op A" simplify?
514  if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
515  // It simplifies to V. Form "B op V".
516  replaceOperand(I, 0, B);
517  replaceOperand(I, 1, V);
518  // Conservatively clear the optional flags, since they may not be
519  // preserved by the reassociation.
521  Changed = true;
522  ++NumReassoc;
523  continue;
524  }
525  }
526 
527  // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
528  // if C1 and C2 are constants.
529  Value *A, *B;
530  Constant *C1, *C2, *CRes;
531  if (Op0 && Op1 &&
532  Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
533  match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) &&
534  match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2)))) &&
535  (CRes = ConstantFoldBinaryOpOperands(Opcode, C1, C2, DL))) {
536  bool IsNUW = hasNoUnsignedWrap(I) &&
537  hasNoUnsignedWrap(*Op0) &&
538  hasNoUnsignedWrap(*Op1);
539  BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
540  BinaryOperator::CreateNUW(Opcode, A, B) :
541  BinaryOperator::Create(Opcode, A, B);
542 
543  if (isa<FPMathOperator>(NewBO)) {
544  FastMathFlags Flags = I.getFastMathFlags();
545  Flags &= Op0->getFastMathFlags();
546  Flags &= Op1->getFastMathFlags();
547  NewBO->setFastMathFlags(Flags);
548  }
549  InsertNewInstWith(NewBO, I);
550  NewBO->takeName(Op1);
551  replaceOperand(I, 0, NewBO);
552  replaceOperand(I, 1, CRes);
553  // Conservatively clear the optional flags, since they may not be
554  // preserved by the reassociation.
556  if (IsNUW)
557  I.setHasNoUnsignedWrap(true);
558 
559  Changed = true;
560  continue;
561  }
562  }
563 
564  // No further simplifications.
565  return Changed;
566  } while (true);
567 }
568 
569 /// Return whether "X LOp (Y ROp Z)" is always equal to
570 /// "(X LOp Y) ROp (X LOp Z)".
573  // X & (Y | Z) <--> (X & Y) | (X & Z)
574  // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
575  if (LOp == Instruction::And)
576  return ROp == Instruction::Or || ROp == Instruction::Xor;
577 
578  // X | (Y & Z) <--> (X | Y) & (X | Z)
579  if (LOp == Instruction::Or)
580  return ROp == Instruction::And;
581 
582  // X * (Y + Z) <--> (X * Y) + (X * Z)
583  // X * (Y - Z) <--> (X * Y) - (X * Z)
584  if (LOp == Instruction::Mul)
585  return ROp == Instruction::Add || ROp == Instruction::Sub;
586 
587  return false;
588 }
589 
590 /// Return whether "(X LOp Y) ROp Z" is always equal to
591 /// "(X ROp Z) LOp (Y ROp Z)".
595  return leftDistributesOverRight(ROp, LOp);
596 
597  // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
599 
600  // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
601  // but this requires knowing that the addition does not overflow and other
602  // such subtleties.
603 }
604 
605 /// This function returns identity value for given opcode, which can be used to
606 /// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
608  if (isa<Constant>(V))
609  return nullptr;
610 
611  return ConstantExpr::getBinOpIdentity(Opcode, V->getType());
612 }
613 
614 /// This function predicates factorization using distributive laws. By default,
615 /// it just returns the 'Op' inputs. But for special-cases like
616 /// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
617 /// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
618 /// allow more factorization opportunities.
621  Value *&LHS, Value *&RHS) {
622  assert(Op && "Expected a binary operator");
623  LHS = Op->getOperand(0);
624  RHS = Op->getOperand(1);
625  if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) {
626  Constant *C;
627  if (match(Op, m_Shl(m_Value(), m_Constant(C)))) {
628  // X << C --> X * (1 << C)
629  RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), C);
630  return Instruction::Mul;
631  }
632  // TODO: We can add other conversions e.g. shr => div etc.
633  }
634  return Op->getOpcode();
635 }
636 
637 /// This tries to simplify binary operations by factorizing out common terms
638 /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
641  Instruction::BinaryOps InnerOpcode, Value *A,
642  Value *B, Value *C, Value *D) {
643  assert(A && B && C && D && "All values must be provided");
644 
645  Value *V = nullptr;
646  Value *RetVal = nullptr;
647  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
648  Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
649 
650  // Does "X op' Y" always equal "Y op' X"?
651  bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
652 
653  // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
654  if (leftDistributesOverRight(InnerOpcode, TopLevelOpcode)) {
655  // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
656  // commutative case, "(A op' B) op (C op' A)"?
657  if (A == C || (InnerCommutative && A == D)) {
658  if (A != C)
659  std::swap(C, D);
660  // Consider forming "A op' (B op D)".
661  // If "B op D" simplifies then it can be formed with no cost.
662  V = simplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
663 
664  // If "B op D" doesn't simplify then only go on if one of the existing
665  // operations "A op' B" and "C op' D" will be zapped as no longer used.
666  if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
667  V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
668  if (V)
669  RetVal = Builder.CreateBinOp(InnerOpcode, A, V);
670  }
671  }
672 
673  // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
674  if (!RetVal && rightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) {
675  // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
676  // commutative case, "(A op' B) op (B op' D)"?
677  if (B == D || (InnerCommutative && B == C)) {
678  if (B != D)
679  std::swap(C, D);
680  // Consider forming "(A op C) op' B".
681  // If "A op C" simplifies then it can be formed with no cost.
682  V = simplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
683 
684  // If "A op C" doesn't simplify then only go on if one of the existing
685  // operations "A op' B" and "C op' D" will be zapped as no longer used.
686  if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
687  V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
688  if (V)
689  RetVal = Builder.CreateBinOp(InnerOpcode, V, B);
690  }
691  }
692 
693  if (!RetVal)
694  return nullptr;
695 
696  ++NumFactor;
697  RetVal->takeName(&I);
698 
699  // Try to add no-overflow flags to the final value.
700  if (isa<OverflowingBinaryOperator>(RetVal)) {
701  bool HasNSW = false;
702  bool HasNUW = false;
703  if (isa<OverflowingBinaryOperator>(&I)) {
704  HasNSW = I.hasNoSignedWrap();
705  HasNUW = I.hasNoUnsignedWrap();
706  }
707  if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) {
708  HasNSW &= LOBO->hasNoSignedWrap();
709  HasNUW &= LOBO->hasNoUnsignedWrap();
710  }
711 
712  if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) {
713  HasNSW &= ROBO->hasNoSignedWrap();
714  HasNUW &= ROBO->hasNoUnsignedWrap();
715  }
716 
717  if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) {
718  // We can propagate 'nsw' if we know that
719  // %Y = mul nsw i16 %X, C
720  // %Z = add nsw i16 %Y, %X
721  // =>
722  // %Z = mul nsw i16 %X, C+1
723  //
724  // iff C+1 isn't INT_MIN
725  const APInt *CInt;
726  if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue())
727  cast<Instruction>(RetVal)->setHasNoSignedWrap(HasNSW);
728 
729  // nuw can be propagated with any constant or nuw value.
730  cast<Instruction>(RetVal)->setHasNoUnsignedWrap(HasNUW);
731  }
732  }
733  return RetVal;
734 }
735 
737  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
738  BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
739  BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
740  Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
741  Value *A, *B, *C, *D;
742  Instruction::BinaryOps LHSOpcode, RHSOpcode;
743 
744  if (Op0)
745  LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B);
746  if (Op1)
747  RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D);
748 
749  // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
750  // a common term.
751  if (Op0 && Op1 && LHSOpcode == RHSOpcode)
752  if (Value *V = tryFactorization(I, SQ, Builder, LHSOpcode, A, B, C, D))
753  return V;
754 
755  // The instruction has the form "(A op' B) op (C)". Try to factorize common
756  // term.
757  if (Op0)
758  if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
759  if (Value *V =
760  tryFactorization(I, SQ, Builder, LHSOpcode, A, B, RHS, Ident))
761  return V;
762 
763  // The instruction has the form "(B) op (C op' D)". Try to factorize common
764  // term.
765  if (Op1)
766  if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
767  if (Value *V =
768  tryFactorization(I, SQ, Builder, RHSOpcode, LHS, Ident, C, D))
769  return V;
770 
771  return nullptr;
772 }
773 
774 /// This tries to simplify binary operations which some other binary operation
775 /// distributes over either by factorizing out common terms
776 /// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
777 /// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
778 /// Returns the simplified value, or null if it didn't simplify.
780  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
781  BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
782  BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
783  Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
784 
785  // Factorization.
786  if (Value *R = tryFactorizationFolds(I))
787  return R;
788 
789  // Expansion.
790  if (Op0 && rightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
791  // The instruction has the form "(A op' B) op C". See if expanding it out
792  // to "(A op C) op' (B op C)" results in simplifications.
793  Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
794  Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
795 
796  // Disable the use of undef because it's not safe to distribute undef.
797  auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
798  Value *L = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
799  Value *R = simplifyBinOp(TopLevelOpcode, B, C, SQDistributive);
800 
801  // Do "A op C" and "B op C" both simplify?
802  if (L && R) {
803  // They do! Return "L op' R".
804  ++NumExpand;
805  C = Builder.CreateBinOp(InnerOpcode, L, R);
806  C->takeName(&I);
807  return C;
808  }
809 
810  // Does "A op C" simplify to the identity value for the inner opcode?
811  if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
812  // They do! Return "B op C".
813  ++NumExpand;
814  C = Builder.CreateBinOp(TopLevelOpcode, B, C);
815  C->takeName(&I);
816  return C;
817  }
818 
819  // Does "B op C" simplify to the identity value for the inner opcode?
820  if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
821  // They do! Return "A op C".
822  ++NumExpand;
823  C = Builder.CreateBinOp(TopLevelOpcode, A, C);
824  C->takeName(&I);
825  return C;
826  }
827  }
828 
829  if (Op1 && leftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
830  // The instruction has the form "A op (B op' C)". See if expanding it out
831  // to "(A op B) op' (A op C)" results in simplifications.
832  Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
833  Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
834 
835  // Disable the use of undef because it's not safe to distribute undef.
836  auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
837  Value *L = simplifyBinOp(TopLevelOpcode, A, B, SQDistributive);
838  Value *R = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
839 
840  // Do "A op B" and "A op C" both simplify?
841  if (L && R) {
842  // They do! Return "L op' R".
843  ++NumExpand;
844  A = Builder.CreateBinOp(InnerOpcode, L, R);
845  A->takeName(&I);
846  return A;
847  }
848 
849  // Does "A op B" simplify to the identity value for the inner opcode?
850  if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
851  // They do! Return "A op C".
852  ++NumExpand;
853  A = Builder.CreateBinOp(TopLevelOpcode, A, C);
854  A->takeName(&I);
855  return A;
856  }
857 
858  // Does "A op C" simplify to the identity value for the inner opcode?
859  if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
860  // They do! Return "A op B".
861  ++NumExpand;
862  A = Builder.CreateBinOp(TopLevelOpcode, A, B);
863  A->takeName(&I);
864  return A;
865  }
866  }
867 
868  return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
869 }
870 
872  Value *LHS,
873  Value *RHS) {
874  Value *A, *B, *C, *D, *E, *F;
875  bool LHSIsSelect = match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C)));
876  bool RHSIsSelect = match(RHS, m_Select(m_Value(D), m_Value(E), m_Value(F)));
877  if (!LHSIsSelect && !RHSIsSelect)
878  return nullptr;
879 
880  FastMathFlags FMF;
882  if (isa<FPMathOperator>(&I)) {
883  FMF = I.getFastMathFlags();
884  Builder.setFastMathFlags(FMF);
885  }
886 
887  Instruction::BinaryOps Opcode = I.getOpcode();
889 
890  Value *Cond, *True = nullptr, *False = nullptr;
891 
892  // Special-case for add/negate combination. Replace the zero in the negation
893  // with the trailing add operand:
894  // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N)
895  // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False
896  auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * {
897  // We need an 'add' and exactly 1 arm of the select to have been simplified.
898  if (Opcode != Instruction::Add || (!True && !False) || (True && False))
899  return nullptr;
900 
901  Value *N;
902  if (True && match(FVal, m_Neg(m_Value(N)))) {
903  Value *Sub = Builder.CreateSub(Z, N);
904  return Builder.CreateSelect(Cond, True, Sub, I.getName());
905  }
906  if (False && match(TVal, m_Neg(m_Value(N)))) {
907  Value *Sub = Builder.CreateSub(Z, N);
908  return Builder.CreateSelect(Cond, Sub, False, I.getName());
909  }
910  return nullptr;
911  };
912 
913  if (LHSIsSelect && RHSIsSelect && A == D) {
914  // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
915  Cond = A;
916  True = simplifyBinOp(Opcode, B, E, FMF, Q);
917  False = simplifyBinOp(Opcode, C, F, FMF, Q);
918 
919  if (LHS->hasOneUse() && RHS->hasOneUse()) {
920  if (False && !True)
921  True = Builder.CreateBinOp(Opcode, B, E);
922  else if (True && !False)
923  False = Builder.CreateBinOp(Opcode, C, F);
924  }
925  } else if (LHSIsSelect && LHS->hasOneUse()) {
926  // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
927  Cond = A;
928  True = simplifyBinOp(Opcode, B, RHS, FMF, Q);
929  False = simplifyBinOp(Opcode, C, RHS, FMF, Q);
930  if (Value *NewSel = foldAddNegate(B, C, RHS))
931  return NewSel;
932  } else if (RHSIsSelect && RHS->hasOneUse()) {
933  // X op (D ? E : F) -> D ? (X op E) : (X op F)
934  Cond = D;
935  True = simplifyBinOp(Opcode, LHS, E, FMF, Q);
936  False = simplifyBinOp(Opcode, LHS, F, FMF, Q);
937  if (Value *NewSel = foldAddNegate(E, F, LHS))
938  return NewSel;
939  }
940 
941  if (!True || !False)
942  return nullptr;
943 
944  Value *SI = Builder.CreateSelect(Cond, True, False);
945  SI->takeName(&I);
946  return SI;
947 }
948 
949 /// Freely adapt every user of V as-if V was changed to !V.
950 /// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
951 void InstCombinerImpl::freelyInvertAllUsersOf(Value *I) {
952  for (User *U : I->users()) {
953  switch (cast<Instruction>(U)->getOpcode()) {
954  case Instruction::Select: {
955  auto *SI = cast<SelectInst>(U);
956  SI->swapValues();
957  SI->swapProfMetadata();
958  break;
959  }
960  case Instruction::Br:
961  cast<BranchInst>(U)->swapSuccessors(); // swaps prof metadata too
962  break;
963  case Instruction::Xor:
964  replaceInstUsesWith(cast<Instruction>(*U), I);
965  break;
966  default:
967  llvm_unreachable("Got unexpected user - out of sync with "
968  "canFreelyInvertAllUsersOf() ?");
969  }
970  }
971 }
972 
973 /// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
974 /// constant zero (which is the 'negate' form).
975 Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
976  Value *NegV;
977  if (match(V, m_Neg(m_Value(NegV))))
978  return NegV;
979 
980  // Constants can be considered to be negated values if they can be folded.
981  if (ConstantInt *C = dyn_cast<ConstantInt>(V))
982  return ConstantExpr::getNeg(C);
983 
984  if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V))
985  if (C->getType()->getElementType()->isIntegerTy())
986  return ConstantExpr::getNeg(C);
987 
988  if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
989  for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
990  Constant *Elt = CV->getAggregateElement(i);
991  if (!Elt)
992  return nullptr;
993 
994  if (isa<UndefValue>(Elt))
995  continue;
996 
997  if (!isa<ConstantInt>(Elt))
998  return nullptr;
999  }
1000  return ConstantExpr::getNeg(CV);
1001  }
1002 
1003  // Negate integer vector splats.
1004  if (auto *CV = dyn_cast<Constant>(V))
1005  if (CV->getType()->isVectorTy() &&
1006  CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
1007  return ConstantExpr::getNeg(CV);
1008 
1009  return nullptr;
1010 }
1011 
1012 /// A binop with a constant operand and a sign-extended boolean operand may be
1013 /// converted into a select of constants by applying the binary operation to
1014 /// the constant with the two possible values of the extended boolean (0 or -1).
1015 Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
1016  // TODO: Handle non-commutative binop (constant is operand 0).
1017  // TODO: Handle zext.
1018  // TODO: Peek through 'not' of cast.
1019  Value *BO0 = BO.getOperand(0);
1020  Value *BO1 = BO.getOperand(1);
1021  Value *X;
1022  Constant *C;
1023  if (!match(BO0, m_SExt(m_Value(X))) || !match(BO1, m_ImmConstant(C)) ||
1024  !X->getType()->isIntOrIntVectorTy(1))
1025  return nullptr;
1026 
1027  // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
1030  Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C);
1031  Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C);
1032  return SelectInst::Create(X, TVal, FVal);
1033 }
1034 
1036  Instruction &I, SelectInst *SI, Value *SO) {
1037  auto *ConstSO = dyn_cast<Constant>(SO);
1038  if (!ConstSO)
1039  return nullptr;
1040 
1041  SmallVector<Constant *> ConstOps;
1042  for (Value *Op : I.operands()) {
1043  if (Op == SI)
1044  ConstOps.push_back(ConstSO);
1045  else if (auto *C = dyn_cast<Constant>(Op))
1046  ConstOps.push_back(C);
1047  else
1048  llvm_unreachable("Operands should be select or constant");
1049  }
1050  return ConstantFoldInstOperands(&I, ConstOps, I.getModule()->getDataLayout());
1051 }
1052 
1055  if (auto *Cast = dyn_cast<CastInst>(&I))
1056  return Builder.CreateCast(Cast->getOpcode(), SO, I.getType());
1057 
1058  if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
1059  assert(canConstantFoldCallTo(II, cast<Function>(II->getCalledOperand())) &&
1060  "Expected constant-foldable intrinsic");
1061  Intrinsic::ID IID = II->getIntrinsicID();
1062  if (II->arg_size() == 1)
1063  return Builder.CreateUnaryIntrinsic(IID, SO);
1064 
1065  // This works for real binary ops like min/max (where we always expect the
1066  // constant operand to be canonicalized as op1) and unary ops with a bonus
1067  // constant argument like ctlz/cttz.
1068  // TODO: Handle non-commutative binary intrinsics as below for binops.
1069  assert(II->arg_size() == 2 && "Expected binary intrinsic");
1070  assert(isa<Constant>(II->getArgOperand(1)) && "Expected constant operand");
1071  return Builder.CreateBinaryIntrinsic(IID, SO, II->getArgOperand(1));
1072  }
1073 
1074  if (auto *EI = dyn_cast<ExtractElementInst>(&I))
1075  return Builder.CreateExtractElement(SO, EI->getIndexOperand());
1076 
1077  assert(I.isBinaryOp() && "Unexpected opcode for select folding");
1078 
1079  // Figure out if the constant is the left or the right argument.
1080  bool ConstIsRHS = isa<Constant>(I.getOperand(1));
1081  Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS));
1082 
1083  Value *Op0 = SO, *Op1 = ConstOperand;
1084  if (!ConstIsRHS)
1085  std::swap(Op0, Op1);
1086 
1087  Value *NewBO = Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), Op0,
1088  Op1, SO->getName() + ".op");
1089  if (auto *NewBOI = dyn_cast<Instruction>(NewBO))
1090  NewBOI->copyIRFlags(&I);
1091  return NewBO;
1092 }
1093 
1095  bool FoldWithMultiUse) {
1096  // Don't modify shared select instructions unless set FoldWithMultiUse
1097  if (!SI->hasOneUse() && !FoldWithMultiUse)
1098  return nullptr;
1099 
1100  Value *TV = SI->getTrueValue();
1101  Value *FV = SI->getFalseValue();
1102  if (!(isa<Constant>(TV) || isa<Constant>(FV)))
1103  return nullptr;
1104 
1105  // Bool selects with constant operands can be folded to logical ops.
1106  if (SI->getType()->isIntOrIntVectorTy(1))
1107  return nullptr;
1108 
1109  // If it's a bitcast involving vectors, make sure it has the same number of
1110  // elements on both sides.
1111  if (auto *BC = dyn_cast<BitCastInst>(&Op)) {
1112  VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy());
1113  VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy());
1114 
1115  // Verify that either both or neither are vectors.
1116  if ((SrcTy == nullptr) != (DestTy == nullptr))
1117  return nullptr;
1118 
1119  // If vectors, verify that they have the same number of elements.
1120  if (SrcTy && SrcTy->getElementCount() != DestTy->getElementCount())
1121  return nullptr;
1122  }
1123 
1124  // Test if a CmpInst instruction is used exclusively by a select as
1125  // part of a minimum or maximum operation. If so, refrain from doing
1126  // any other folding. This helps out other analyses which understand
1127  // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
1128  // and CodeGen. And in this case, at least one of the comparison
1129  // operands has at least one user besides the compare (the select),
1130  // which would often largely negate the benefit of folding anyway.
1131  if (auto *CI = dyn_cast<CmpInst>(SI->getCondition())) {
1132  if (CI->hasOneUse()) {
1133  Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
1134 
1135  // FIXME: This is a hack to avoid infinite looping with min/max patterns.
1136  // We have to ensure that vector constants that only differ with
1137  // undef elements are treated as equivalent.
1138  auto areLooselyEqual = [](Value *A, Value *B) {
1139  if (A == B)
1140  return true;
1141 
1142  // Test for vector constants.
1143  Constant *ConstA, *ConstB;
1144  if (!match(A, m_Constant(ConstA)) || !match(B, m_Constant(ConstB)))
1145  return false;
1146 
1147  // TODO: Deal with FP constants?
1148  if (!A->getType()->isIntOrIntVectorTy() || A->getType() != B->getType())
1149  return false;
1150 
1151  // Compare for equality including undefs as equal.
1152  auto *Cmp = ConstantExpr::getCompare(ICmpInst::ICMP_EQ, ConstA, ConstB);
1153  const APInt *C;
1154  return match(Cmp, m_APIntAllowUndef(C)) && C->isOne();
1155  };
1156 
1157  if ((areLooselyEqual(TV, Op0) && areLooselyEqual(FV, Op1)) ||
1158  (areLooselyEqual(FV, Op0) && areLooselyEqual(TV, Op1)))
1159  return nullptr;
1160  }
1161  }
1162 
1163  // Make sure that one of the select arms constant folds successfully.
1166  if (!NewTV && !NewFV)
1167  return nullptr;
1168 
1169  // Create an instruction for the arm that did not fold.
1170  if (!NewTV)
1172  if (!NewFV)
1174  return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);
1175 }
1176 
1178  unsigned NumPHIValues = PN->getNumIncomingValues();
1179  if (NumPHIValues == 0)
1180  return nullptr;
1181 
1182  // We normally only transform phis with a single use. However, if a PHI has
1183  // multiple uses and they are all the same operation, we can fold *all* of the
1184  // uses into the PHI.
1185  if (!PN->hasOneUse()) {
1186  // Walk the use list for the instruction, comparing them to I.
1187  for (User *U : PN->users()) {
1188  Instruction *UI = cast<Instruction>(U);
1189  if (UI != &I && !I.isIdenticalTo(UI))
1190  return nullptr;
1191  }
1192  // Otherwise, we can replace *all* users with the new PHI we form.
1193  }
1194 
1195  // Check to see whether the instruction can be folded into each phi operand.
1196  // If there is one operand that does not fold, remember the BB it is in.
1197  // If there is more than one or if *it* is a PHI, bail out.
1198  SmallVector<Value *> NewPhiValues;
1199  BasicBlock *NonSimplifiedBB = nullptr;
1200  Value *NonSimplifiedInVal = nullptr;
1201  for (unsigned i = 0; i != NumPHIValues; ++i) {
1202  Value *InVal = PN->getIncomingValue(i);
1203  BasicBlock *InBB = PN->getIncomingBlock(i);
1204 
1205  // NB: It is a precondition of this transform that the operands be
1206  // phi translatable! This is usually trivially satisfied by limiting it
1207  // to constant ops, and for selects we do a more sophisticated check.
1209  for (Value *Op : I.operands()) {
1210  if (Op == PN)
1211  Ops.push_back(InVal);
1212  else
1213  Ops.push_back(Op->DoPHITranslation(PN->getParent(), InBB));
1214  }
1215 
1216  // Don't consider the simplification successful if we get back a constant
1217  // expression. That's just an instruction in hiding.
1218  // Also reject the case where we simplify back to the phi node. We wouldn't
1219  // be able to remove it in that case.
1221  &I, Ops, SQ.getWithInstruction(InBB->getTerminator()));
1222  if (NewVal && NewVal != PN && !match(NewVal, m_ConstantExpr())) {
1223  NewPhiValues.push_back(NewVal);
1224  continue;
1225  }
1226 
1227  if (isa<PHINode>(InVal)) return nullptr; // Itself a phi.
1228  if (NonSimplifiedBB) return nullptr; // More than one non-simplified value.
1229 
1230  NonSimplifiedBB = InBB;
1231  NonSimplifiedInVal = InVal;
1232  NewPhiValues.push_back(nullptr);
1233 
1234  // If the InVal is an invoke at the end of the pred block, then we can't
1235  // insert a computation after it without breaking the edge.
1236  if (isa<InvokeInst>(InVal))
1237  if (cast<Instruction>(InVal)->getParent() == NonSimplifiedBB)
1238  return nullptr;
1239 
1240  // If the incoming non-constant value is reachable from the phis block,
1241  // we'll push the operation across a loop backedge. This could result in
1242  // an infinite combine loop, and is generally non-profitable (especially
1243  // if the operation was originally outside the loop).
1244  if (isPotentiallyReachable(PN->getParent(), NonSimplifiedBB, nullptr, &DT,
1245  LI))
1246  return nullptr;
1247  }
1248 
1249  // If there is exactly one non-simplified value, we can insert a copy of the
1250  // operation in that block. However, if this is a critical edge, we would be
1251  // inserting the computation on some other paths (e.g. inside a loop). Only
1252  // do this if the pred block is unconditionally branching into the phi block.
1253  // Also, make sure that the pred block is not dead code.
1254  if (NonSimplifiedBB != nullptr) {
1255  BranchInst *BI = dyn_cast<BranchInst>(NonSimplifiedBB->getTerminator());
1256  if (!BI || !BI->isUnconditional() ||
1257  !DT.isReachableFromEntry(NonSimplifiedBB))
1258  return nullptr;
1259  }
1260 
1261  // Okay, we can do the transformation: create the new PHI node.
1262  PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());
1263  InsertNewInstBefore(NewPN, *PN);
1264  NewPN->takeName(PN);
1265 
1266  // If we are going to have to insert a new computation, do so right before the
1267  // predecessor's terminator.
1268  Instruction *Clone = nullptr;
1269  if (NonSimplifiedBB) {
1270  Clone = I.clone();
1271  for (Use &U : Clone->operands()) {
1272  if (U == PN)
1273  U = NonSimplifiedInVal;
1274  else
1275  U = U->DoPHITranslation(PN->getParent(), NonSimplifiedBB);
1276  }
1277  InsertNewInstBefore(Clone, *NonSimplifiedBB->getTerminator());
1278  }
1279 
1280  for (unsigned i = 0; i != NumPHIValues; ++i) {
1281  if (NewPhiValues[i])
1282  NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));
1283  else
1284  NewPN->addIncoming(Clone, PN->getIncomingBlock(i));
1285  }
1286 
1287  for (User *U : make_early_inc_range(PN->users())) {
1288  Instruction *User = cast<Instruction>(U);
1289  if (User == &I) continue;
1290  replaceInstUsesWith(*User, NewPN);
1291  eraseInstFromFunction(*User);
1292  }
1293  return replaceInstUsesWith(I, NewPN);
1294 }
1295 
1297  // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
1298  // we are guarding against replicating the binop in >1 predecessor.
1299  // This could miss matching a phi with 2 constant incoming values.
1300  auto *Phi0 = dyn_cast<PHINode>(BO.getOperand(0));
1301  auto *Phi1 = dyn_cast<PHINode>(BO.getOperand(1));
1302  if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() ||
1303  Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
1304  return nullptr;
1305 
1306  // TODO: Remove the restriction for binop being in the same block as the phis.
1307  if (BO.getParent() != Phi0->getParent() ||
1308  BO.getParent() != Phi1->getParent())
1309  return nullptr;
1310 
1311  // Match a pair of incoming constants for one of the predecessor blocks.
1312  BasicBlock *ConstBB, *OtherBB;
1313  Constant *C0, *C1;
1314  if (match(Phi0->getIncomingValue(0), m_ImmConstant(C0))) {
1315  ConstBB = Phi0->getIncomingBlock(0);
1316  OtherBB = Phi0->getIncomingBlock(1);
1317  } else if (match(Phi0->getIncomingValue(1), m_ImmConstant(C0))) {
1318  ConstBB = Phi0->getIncomingBlock(1);
1319  OtherBB = Phi0->getIncomingBlock(0);
1320  } else {
1321  return nullptr;
1322  }
1323  if (!match(Phi1->getIncomingValueForBlock(ConstBB), m_ImmConstant(C1)))
1324  return nullptr;
1325 
1326  // The block that we are hoisting to must reach here unconditionally.
1327  // Otherwise, we could be speculatively executing an expensive or
1328  // non-speculative op.
1329  auto *PredBlockBranch = dyn_cast<BranchInst>(OtherBB->getTerminator());
1330  if (!PredBlockBranch || PredBlockBranch->isConditional() ||
1331  !DT.isReachableFromEntry(OtherBB))
1332  return nullptr;
1333 
1334  // TODO: This check could be tightened to only apply to binops (div/rem) that
1335  // are not safe to speculatively execute. But that could allow hoisting
1336  // potentially expensive instructions (fdiv for example).
1337  for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
1339  return nullptr;
1340 
1341  // Fold constants for the predecessor block with constant incoming values.
1342  Constant *NewC = ConstantFoldBinaryOpOperands(BO.getOpcode(), C0, C1, DL);
1343  if (!NewC)
1344  return nullptr;
1345 
1346  // Make a new binop in the predecessor block with the non-constant incoming
1347  // values.
1348  Builder.SetInsertPoint(PredBlockBranch);
1349  Value *NewBO = Builder.CreateBinOp(BO.getOpcode(),
1350  Phi0->getIncomingValueForBlock(OtherBB),
1351  Phi1->getIncomingValueForBlock(OtherBB));
1352  if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(NewBO))
1353  NotFoldedNewBO->copyIRFlags(&BO);
1354 
1355  // Replace the binop with a phi of the new values. The old phis are dead.
1356  PHINode *NewPhi = PHINode::Create(BO.getType(), 2);
1357  NewPhi->addIncoming(NewBO, OtherBB);
1358  NewPhi->addIncoming(NewC, ConstBB);
1359  return NewPhi;
1360 }
1361 
1363  if (!isa<Constant>(I.getOperand(1)))
1364  return nullptr;
1365 
1366  if (auto *Sel = dyn_cast<SelectInst>(I.getOperand(0))) {
1367  if (Instruction *NewSel = FoldOpIntoSelect(I, Sel))
1368  return NewSel;
1369  } else if (auto *PN = dyn_cast<PHINode>(I.getOperand(0))) {
1370  if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
1371  return NewPhi;
1372  }
1373  return nullptr;
1374 }
1375 
1376 /// Given a pointer type and a constant offset, determine whether or not there
1377 /// is a sequence of GEP indices into the pointed type that will land us at the
1378 /// specified offset. If so, fill them into NewIndices and return the resultant
1379 /// element type, otherwise return null.
1380 static Type *findElementAtOffset(PointerType *PtrTy, int64_t IntOffset,
1381  SmallVectorImpl<Value *> &NewIndices,
1382  const DataLayout &DL) {
1383  // Only used by visitGEPOfBitcast(), which is skipped for opaque pointers.
1384  Type *Ty = PtrTy->getNonOpaquePointerElementType();
1385  if (!Ty->isSized())
1386  return nullptr;
1387 
1388  APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), IntOffset);
1389  SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(Ty, Offset);
1390  if (!Offset.isZero())
1391  return nullptr;
1392 
1393  for (const APInt &Index : Indices)
1394  NewIndices.push_back(ConstantInt::get(PtrTy->getContext(), Index));
1395  return Ty;
1396 }
1397 
1399  // If this GEP has only 0 indices, it is the same pointer as
1400  // Src. If Src is not a trivial GEP too, don't combine
1401  // the indices.
1402  if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
1403  !Src.hasOneUse())
1404  return false;
1405  return true;
1406 }
1407 
1408 /// Return a value X such that Val = X * Scale, or null if none.
1409 /// If the multiplication is known not to overflow, then NoSignedWrap is set.
1410 Value *InstCombinerImpl::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
1411  assert(isa<IntegerType>(Val->getType()) && "Can only descale integers!");
1412  assert(cast<IntegerType>(Val->getType())->getBitWidth() ==
1413  Scale.getBitWidth() && "Scale not compatible with value!");
1414 
1415  // If Val is zero or Scale is one then Val = Val * Scale.
1416  if (match(Val, m_Zero()) || Scale == 1) {
1417  NoSignedWrap = true;
1418  return Val;
1419  }
1420 
1421  // If Scale is zero then it does not divide Val.
1422  if (Scale.isMinValue())
1423  return nullptr;
1424 
1425  // Look through chains of multiplications, searching for a constant that is
1426  // divisible by Scale. For example, descaling X*(Y*(Z*4)) by a factor of 4
1427  // will find the constant factor 4 and produce X*(Y*Z). Descaling X*(Y*8) by
1428  // a factor of 4 will produce X*(Y*2). The principle of operation is to bore
1429  // down from Val:
1430  //
1431  // Val = M1 * X || Analysis starts here and works down
1432  // M1 = M2 * Y || Doesn't descend into terms with more
1433  // M2 = Z * 4 \/ than one use
1434  //
1435  // Then to modify a term at the bottom:
1436  //
1437  // Val = M1 * X
1438  // M1 = Z * Y || Replaced M2 with Z
1439  //
1440  // Then to work back up correcting nsw flags.
1441 
1442  // Op - the term we are currently analyzing. Starts at Val then drills down.
1443  // Replaced with its descaled value before exiting from the drill down loop.
1444  Value *Op = Val;
1445 
1446  // Parent - initially null, but after drilling down notes where Op came from.
1447  // In the example above, Parent is (Val, 0) when Op is M1, because M1 is the
1448  // 0'th operand of Val.
1449  std::pair<Instruction *, unsigned> Parent;
1450 
1451  // Set if the transform requires a descaling at deeper levels that doesn't
1452  // overflow.
1453  bool RequireNoSignedWrap = false;
1454 
1455  // Log base 2 of the scale. Negative if not a power of 2.
1456  int32_t logScale = Scale.exactLogBase2();
1457 
1458  for (;; Op = Parent.first->getOperand(Parent.second)) { // Drill down
1459  if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
1460  // If Op is a constant divisible by Scale then descale to the quotient.
1461  APInt Quotient(Scale), Remainder(Scale); // Init ensures right bitwidth.
1462  APInt::sdivrem(CI->getValue(), Scale, Quotient, Remainder);
1463  if (!Remainder.isMinValue())
1464  // Not divisible by Scale.
1465  return nullptr;
1466  // Replace with the quotient in the parent.
1467  Op = ConstantInt::get(CI->getType(), Quotient);
1468  NoSignedWrap = true;
1469  break;
1470  }
1471 
1472  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op)) {
1473  if (BO->getOpcode() == Instruction::Mul) {
1474  // Multiplication.
1475  NoSignedWrap = BO->hasNoSignedWrap();
1476  if (RequireNoSignedWrap && !NoSignedWrap)
1477  return nullptr;
1478 
1479  // There are three cases for multiplication: multiplication by exactly
1480  // the scale, multiplication by a constant different to the scale, and
1481  // multiplication by something else.
1482  Value *LHS = BO->getOperand(0);
1483  Value *RHS = BO->getOperand(1);
1484 
1485  if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
1486  // Multiplication by a constant.
1487  if (CI->getValue() == Scale) {
1488  // Multiplication by exactly the scale, replace the multiplication
1489  // by its left-hand side in the parent.
1490  Op = LHS;
1491  break;
1492  }
1493 
1494  // Otherwise drill down into the constant.
1495  if (!Op->hasOneUse())
1496  return nullptr;
1497 
1498  Parent = std::make_pair(BO, 1);
1499  continue;
1500  }
1501 
1502  // Multiplication by something else. Drill down into the left-hand side
1503  // since that's where the reassociate pass puts the good stuff.
1504  if (!Op->hasOneUse())
1505  return nullptr;
1506 
1507  Parent = std::make_pair(BO, 0);
1508  continue;
1509  }
1510 
1511  if (logScale > 0 && BO->getOpcode() == Instruction::Shl &&
1512  isa<ConstantInt>(BO->getOperand(1))) {
1513  // Multiplication by a power of 2.
1514  NoSignedWrap = BO->hasNoSignedWrap();
1515  if (RequireNoSignedWrap && !NoSignedWrap)
1516  return nullptr;
1517 
1518  Value *LHS = BO->getOperand(0);
1519  int32_t Amt = cast<ConstantInt>(BO->getOperand(1))->
1520  getLimitedValue(Scale.getBitWidth());
1521  // Op = LHS << Amt.
1522 
1523  if (Amt == logScale) {
1524  // Multiplication by exactly the scale, replace the multiplication
1525  // by its left-hand side in the parent.
1526  Op = LHS;
1527  break;
1528  }
1529  if (Amt < logScale || !Op->hasOneUse())
1530  return nullptr;
1531 
1532  // Multiplication by more than the scale. Reduce the multiplying amount
1533  // by the scale in the parent.
1534  Parent = std::make_pair(BO, 1);
1535  Op = ConstantInt::get(BO->getType(), Amt - logScale);
1536  break;
1537  }
1538  }
1539 
1540  if (!Op->hasOneUse())
1541  return nullptr;
1542 
1543  if (CastInst *Cast = dyn_cast<CastInst>(Op)) {
1544  if (Cast->getOpcode() == Instruction::SExt) {
1545  // Op is sign-extended from a smaller type, descale in the smaller type.
1546  unsigned SmallSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
1547  APInt SmallScale = Scale.trunc(SmallSize);
1548  // Suppose Op = sext X, and we descale X as Y * SmallScale. We want to
1549  // descale Op as (sext Y) * Scale. In order to have
1550  // sext (Y * SmallScale) = (sext Y) * Scale
1551  // some conditions need to hold however: SmallScale must sign-extend to
1552  // Scale and the multiplication Y * SmallScale should not overflow.
1553  if (SmallScale.sext(Scale.getBitWidth()) != Scale)
1554  // SmallScale does not sign-extend to Scale.
1555  return nullptr;
1556  assert(SmallScale.exactLogBase2() == logScale);
1557  // Require that Y * SmallScale must not overflow.
1558  RequireNoSignedWrap = true;
1559 
1560  // Drill down through the cast.
1561  Parent = std::make_pair(Cast, 0);
1562  Scale = SmallScale;
1563  continue;
1564  }
1565 
1566  if (Cast->getOpcode() == Instruction::Trunc) {
1567  // Op is truncated from a larger type, descale in the larger type.
1568  // Suppose Op = trunc X, and we descale X as Y * sext Scale. Then
1569  // trunc (Y * sext Scale) = (trunc Y) * Scale
1570  // always holds. However (trunc Y) * Scale may overflow even if
1571  // trunc (Y * sext Scale) does not, so nsw flags need to be cleared
1572  // from this point up in the expression (see later).
1573  if (RequireNoSignedWrap)
1574  return nullptr;
1575 
1576  // Drill down through the cast.
1577  unsigned LargeSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
1578  Parent = std::make_pair(Cast, 0);
1579  Scale = Scale.sext(LargeSize);
1580  if (logScale + 1 == (int32_t)Cast->getType()->getPrimitiveSizeInBits())
1581  logScale = -1;
1582  assert(Scale.exactLogBase2() == logScale);
1583  continue;
1584  }
1585  }
1586 
1587  // Unsupported expression, bail out.
1588  return nullptr;
1589  }
1590 
1591  // If Op is zero then Val = Op * Scale.
1592  if (match(Op, m_Zero())) {
1593  NoSignedWrap = true;
1594  return Op;
1595  }
1596 
1597  // We know that we can successfully descale, so from here on we can safely
1598  // modify the IR. Op holds the descaled version of the deepest term in the
1599  // expression. NoSignedWrap is 'true' if multiplying Op by Scale is known
1600  // not to overflow.
1601 
1602  if (!Parent.first)
1603  // The expression only had one term.
1604  return Op;
1605 
1606  // Rewrite the parent using the descaled version of its operand.
1607  assert(Parent.first->hasOneUse() && "Drilled down when more than one use!");
1608  assert(Op != Parent.first->getOperand(Parent.second) &&
1609  "Descaling was a no-op?");
1610  replaceOperand(*Parent.first, Parent.second, Op);
1611  Worklist.push(Parent.first);
1612 
1613  // Now work back up the expression correcting nsw flags. The logic is based
1614  // on the following observation: if X * Y is known not to overflow as a signed
1615  // multiplication, and Y is replaced by a value Z with smaller absolute value,
1616  // then X * Z will not overflow as a signed multiplication either. As we work
1617  // our way up, having NoSignedWrap 'true' means that the descaled value at the
1618  // current level has strictly smaller absolute value than the original.
1619  Instruction *Ancestor = Parent.first;
1620  do {
1621  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Ancestor)) {
1622  // If the multiplication wasn't nsw then we can't say anything about the
1623  // value of the descaled multiplication, and we have to clear nsw flags
1624  // from this point on up.
1625  bool OpNoSignedWrap = BO->hasNoSignedWrap();
1626  NoSignedWrap &= OpNoSignedWrap;
1627  if (NoSignedWrap != OpNoSignedWrap) {
1628  BO->setHasNoSignedWrap(NoSignedWrap);
1629  Worklist.push(Ancestor);
1630  }
1631  } else if (Ancestor->getOpcode() == Instruction::Trunc) {
1632  // The fact that the descaled input to the trunc has smaller absolute
1633  // value than the original input doesn't tell us anything useful about
1634  // the absolute values of the truncations.
1635  NoSignedWrap = false;
1636  }
1637  assert((Ancestor->getOpcode() != Instruction::SExt || NoSignedWrap) &&
1638  "Failed to keep proper track of nsw flags while drilling down?");
1639 
1640  if (Ancestor == Val)
1641  // Got to the top, all done!
1642  return Val;
1643 
1644  // Move up one level in the expression.
1645  assert(Ancestor->hasOneUse() && "Drilled down when more than one use!");
1646  Ancestor = Ancestor->user_back();
1647  } while (true);
1648 }
1649 
1651  if (!isa<VectorType>(Inst.getType()))
1652  return nullptr;
1653 
1654  BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
1655  Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
1656  assert(cast<VectorType>(LHS->getType())->getElementCount() ==
1657  cast<VectorType>(Inst.getType())->getElementCount());
1658  assert(cast<VectorType>(RHS->getType())->getElementCount() ==
1659  cast<VectorType>(Inst.getType())->getElementCount());
1660 
1661  // If both operands of the binop are vector concatenations, then perform the
1662  // narrow binop on each pair of the source operands followed by concatenation
1663  // of the results.
1664  Value *L0, *L1, *R0, *R1;
1666  if (match(LHS, m_Shuffle(m_Value(L0), m_Value(L1), m_Mask(Mask))) &&
1668  LHS->hasOneUse() && RHS->hasOneUse() &&
1669  cast<ShuffleVectorInst>(LHS)->isConcat() &&
1670  cast<ShuffleVectorInst>(RHS)->isConcat()) {
1671  // This transform does not have the speculative execution constraint as
1672  // below because the shuffle is a concatenation. The new binops are
1673  // operating on exactly the same elements as the existing binop.
1674  // TODO: We could ease the mask requirement to allow different undef lanes,
1675  // but that requires an analysis of the binop-with-undef output value.
1676  Value *NewBO0 = Builder.CreateBinOp(Opcode, L0, R0);
1677  if (auto *BO = dyn_cast<BinaryOperator>(NewBO0))
1678  BO->copyIRFlags(&Inst);
1679  Value *NewBO1 = Builder.CreateBinOp(Opcode, L1, R1);
1680  if (auto *BO = dyn_cast<BinaryOperator>(NewBO1))
1681  BO->copyIRFlags(&Inst);
1682  return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
1683  }
1684 
1685  // It may not be safe to reorder shuffles and things like div, urem, etc.
1686  // because we may trap when executing those ops on unknown vector elements.
1687  // See PR20059.
1688  if (!isSafeToSpeculativelyExecute(&Inst))
1689  return nullptr;
1690 
1691  auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) {
1692  Value *XY = Builder.CreateBinOp(Opcode, X, Y);
1693  if (auto *BO = dyn_cast<BinaryOperator>(XY))
1694  BO->copyIRFlags(&Inst);
1695  return new ShuffleVectorInst(XY, M);
1696  };
1697 
1698  // If both arguments of the binary operation are shuffles that use the same
1699  // mask and shuffle within a single vector, move the shuffle after the binop.
1700  Value *V1, *V2;
1701  if (match(LHS, m_Shuffle(m_Value(V1), m_Undef(), m_Mask(Mask))) &&
1703  V1->getType() == V2->getType() &&
1704  (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
1705  // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
1706  return createBinOpShuffle(V1, V2, Mask);
1707  }
1708 
1709  // If both arguments of a commutative binop are select-shuffles that use the
1710  // same mask with commuted operands, the shuffles are unnecessary.
1711  if (Inst.isCommutative() &&
1712  match(LHS, m_Shuffle(m_Value(V1), m_Value(V2), m_Mask(Mask))) &&
1713  match(RHS,
1715  auto *LShuf = cast<ShuffleVectorInst>(LHS);
1716  auto *RShuf = cast<ShuffleVectorInst>(RHS);
1717  // TODO: Allow shuffles that contain undefs in the mask?
1718  // That is legal, but it reduces undef knowledge.
1719  // TODO: Allow arbitrary shuffles by shuffling after binop?
1720  // That might be legal, but we have to deal with poison.
1721  if (LShuf->isSelect() &&
1722  !is_contained(LShuf->getShuffleMask(), UndefMaskElem) &&
1723  RShuf->isSelect() &&
1724  !is_contained(RShuf->getShuffleMask(), UndefMaskElem)) {
1725  // Example:
1726  // LHS = shuffle V1, V2, <0, 5, 6, 3>
1727  // RHS = shuffle V2, V1, <0, 5, 6, 3>
1728  // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
1729  Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
1730  NewBO->copyIRFlags(&Inst);
1731  return NewBO;
1732  }
1733  }
1734 
1735  // If one argument is a shuffle within one vector and the other is a constant,
1736  // try moving the shuffle after the binary operation. This canonicalization
1737  // intends to move shuffles closer to other shuffles and binops closer to
1738  // other binops, so they can be folded. It may also enable demanded elements
1739  // transforms.
1740  Constant *C;
1741  auto *InstVTy = dyn_cast<FixedVectorType>(Inst.getType());
1742  if (InstVTy &&
1743  match(&Inst,
1745  m_ImmConstant(C))) &&
1746  cast<FixedVectorType>(V1->getType())->getNumElements() <=
1747  InstVTy->getNumElements()) {
1748  assert(InstVTy->getScalarType() == V1->getType()->getScalarType() &&
1749  "Shuffle should not change scalar type");
1750 
1751  // Find constant NewC that has property:
1752  // shuffle(NewC, ShMask) = C
1753  // If such constant does not exist (example: ShMask=<0,0> and C=<1,2>)
1754  // reorder is not possible. A 1-to-1 mapping is not required. Example:
1755  // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef>
1756  bool ConstOp1 = isa<Constant>(RHS);
1757  ArrayRef<int> ShMask = Mask;
1758  unsigned SrcVecNumElts =
1759  cast<FixedVectorType>(V1->getType())->getNumElements();
1760  UndefValue *UndefScalar = UndefValue::get(C->getType()->getScalarType());
1761  SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, UndefScalar);
1762  bool MayChange = true;
1763  unsigned NumElts = InstVTy->getNumElements();
1764  for (unsigned I = 0; I < NumElts; ++I) {
1765  Constant *CElt = C->getAggregateElement(I);
1766  if (ShMask[I] >= 0) {
1767  assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
1768  Constant *NewCElt = NewVecC[ShMask[I]];
1769  // Bail out if:
1770  // 1. The constant vector contains a constant expression.
1771  // 2. The shuffle needs an element of the constant vector that can't
1772  // be mapped to a new constant vector.
1773  // 3. This is a widening shuffle that copies elements of V1 into the
1774  // extended elements (extending with undef is allowed).
1775  if (!CElt || (!isa<UndefValue>(NewCElt) && NewCElt != CElt) ||
1776  I >= SrcVecNumElts) {
1777  MayChange = false;
1778  break;
1779  }
1780  NewVecC[ShMask[I]] = CElt;
1781  }
1782  // If this is a widening shuffle, we must be able to extend with undef
1783  // elements. If the original binop does not produce an undef in the high
1784  // lanes, then this transform is not safe.
1785  // Similarly for undef lanes due to the shuffle mask, we can only
1786  // transform binops that preserve undef.
1787  // TODO: We could shuffle those non-undef constant values into the
1788  // result by using a constant vector (rather than an undef vector)
1789  // as operand 1 of the new binop, but that might be too aggressive
1790  // for target-independent shuffle creation.
1791  if (I >= SrcVecNumElts || ShMask[I] < 0) {
1792  Constant *MaybeUndef =
1793  ConstOp1
1794  ? ConstantFoldBinaryOpOperands(Opcode, UndefScalar, CElt, DL)
1795  : ConstantFoldBinaryOpOperands(Opcode, CElt, UndefScalar, DL);
1796  if (!MaybeUndef || !match(MaybeUndef, m_Undef())) {
1797  MayChange = false;
1798  break;
1799  }
1800  }
1801  }
1802  if (MayChange) {
1803  Constant *NewC = ConstantVector::get(NewVecC);
1804  // It may not be safe to execute a binop on a vector with undef elements
1805  // because the entire instruction can be folded to undef or create poison
1806  // that did not exist in the original code.
1807  if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1))
1808  NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1);
1809 
1810  // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
1811  // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
1812  Value *NewLHS = ConstOp1 ? V1 : NewC;
1813  Value *NewRHS = ConstOp1 ? NewC : V1;
1814  return createBinOpShuffle(NewLHS, NewRHS, Mask);
1815  }
1816  }
1817 
1818  // Try to reassociate to sink a splat shuffle after a binary operation.
1819  if (Inst.isAssociative() && Inst.isCommutative()) {
1820  // Canonicalize shuffle operand as LHS.
1821  if (isa<ShuffleVectorInst>(RHS))
1822  std::swap(LHS, RHS);
1823 
1824  Value *X;
1825  ArrayRef<int> MaskC;
1826  int SplatIndex;
1827  Value *Y, *OtherOp;
1828  if (!match(LHS,
1829  m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(MaskC)))) ||
1830  !match(MaskC, m_SplatOrUndefMask(SplatIndex)) ||
1831  X->getType() != Inst.getType() ||
1832  !match(RHS, m_OneUse(m_BinOp(Opcode, m_Value(Y), m_Value(OtherOp)))))
1833  return nullptr;
1834 
1835  // FIXME: This may not be safe if the analysis allows undef elements. By
1836  // moving 'Y' before the splat shuffle, we are implicitly assuming
1837  // that it is not undef/poison at the splat index.
1838  if (isSplatValue(OtherOp, SplatIndex)) {
1839  std::swap(Y, OtherOp);
1840  } else if (!isSplatValue(Y, SplatIndex)) {
1841  return nullptr;
1842  }
1843 
1844  // X and Y are splatted values, so perform the binary operation on those
1845  // values followed by a splat followed by the 2nd binary operation:
1846  // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
1847  Value *NewBO = Builder.CreateBinOp(Opcode, X, Y);
1848  SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
1849  Value *NewSplat = Builder.CreateShuffleVector(NewBO, NewMask);
1850  Instruction *R = BinaryOperator::Create(Opcode, NewSplat, OtherOp);
1851 
1852  // Intersect FMF on both new binops. Other (poison-generating) flags are
1853  // dropped to be safe.
1854  if (isa<FPMathOperator>(R)) {
1855  R->copyFastMathFlags(&Inst);
1856  R->andIRFlags(RHS);
1857  }
1858  if (auto *NewInstBO = dyn_cast<BinaryOperator>(NewBO))
1859  NewInstBO->copyIRFlags(R);
1860  return R;
1861  }
1862 
1863  return nullptr;
1864 }
1865 
1866 /// Try to narrow the width of a binop if at least 1 operand is an extend of
1867 /// of a value. This requires a potentially expensive known bits check to make
1868 /// sure the narrow op does not overflow.
1869 Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
1870  // We need at least one extended operand.
1871  Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1);
1872 
1873  // If this is a sub, we swap the operands since we always want an extension
1874  // on the RHS. The LHS can be an extension or a constant.
1875  if (BO.getOpcode() == Instruction::Sub)
1876  std::swap(Op0, Op1);
1877 
1878  Value *X;
1879  bool IsSext = match(Op0, m_SExt(m_Value(X)));
1880  if (!IsSext && !match(Op0, m_ZExt(m_Value(X))))
1881  return nullptr;
1882 
1883  // If both operands are the same extension from the same source type and we
1884  // can eliminate at least one (hasOneUse), this might work.
1885  CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
1886  Value *Y;
1887  if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
1888  cast<Operator>(Op1)->getOpcode() == CastOpc &&
1889  (Op0->hasOneUse() || Op1->hasOneUse()))) {
1890  // If that did not match, see if we have a suitable constant operand.
1891  // Truncating and extending must produce the same constant.
1892  Constant *WideC;
1893  if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC)))
1894  return nullptr;
1895  Constant *NarrowC = ConstantExpr::getTrunc(WideC, X->getType());
1896  if (ConstantExpr::getCast(CastOpc, NarrowC, BO.getType()) != WideC)
1897  return nullptr;
1898  Y = NarrowC;
1899  }
1900 
1901  // Swap back now that we found our operands.
1902  if (BO.getOpcode() == Instruction::Sub)
1903  std::swap(X, Y);
1904 
1905  // Both operands have narrow versions. Last step: the math must not overflow
1906  // in the narrow width.
1907  if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext))
1908  return nullptr;
1909 
1910  // bo (ext X), (ext Y) --> ext (bo X, Y)
1911  // bo (ext X), C --> ext (bo X, C')
1912  Value *NarrowBO = Builder.CreateBinOp(BO.getOpcode(), X, Y, "narrow");
1913  if (auto *NewBinOp = dyn_cast<BinaryOperator>(NarrowBO)) {
1914  if (IsSext)
1915  NewBinOp->setHasNoSignedWrap();
1916  else
1917  NewBinOp->setHasNoUnsignedWrap();
1918  }
1919  return CastInst::Create(CastOpc, NarrowBO, BO.getType());
1920 }
1921 
1922 static bool isMergedGEPInBounds(GEPOperator &GEP1, GEPOperator &GEP2) {
1923  // At least one GEP must be inbounds.
1924  if (!GEP1.isInBounds() && !GEP2.isInBounds())
1925  return false;
1926 
1927  return (GEP1.isInBounds() || GEP1.hasAllZeroIndices()) &&
1928  (GEP2.isInBounds() || GEP2.hasAllZeroIndices());
1929 }
1930 
1931 /// Thread a GEP operation with constant indices through the constant true/false
1932 /// arms of a select.
1935  if (!GEP.hasAllConstantIndices())
1936  return nullptr;
1937 
1938  Instruction *Sel;
1939  Value *Cond;
1940  Constant *TrueC, *FalseC;
1941  if (!match(GEP.getPointerOperand(), m_Instruction(Sel)) ||
1942  !match(Sel,
1943  m_Select(m_Value(Cond), m_Constant(TrueC), m_Constant(FalseC))))
1944  return nullptr;
1945 
1946  // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
1947  // Propagate 'inbounds' and metadata from existing instructions.
1948  // Note: using IRBuilder to create the constants for efficiency.
1949  SmallVector<Value *, 4> IndexC(GEP.indices());
1950  bool IsInBounds = GEP.isInBounds();
1951  Type *Ty = GEP.getSourceElementType();
1952  Value *NewTrueC = Builder.CreateGEP(Ty, TrueC, IndexC, "", IsInBounds);
1953  Value *NewFalseC = Builder.CreateGEP(Ty, FalseC, IndexC, "", IsInBounds);
1954  return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel);
1955 }
1956 
1958  GEPOperator *Src) {
1959  // Combine Indices - If the source pointer to this getelementptr instruction
1960  // is a getelementptr instruction with matching element type, combine the
1961  // indices of the two getelementptr instructions into a single instruction.
1962  if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
1963  return nullptr;
1964 
1965  if (Src->getResultElementType() == GEP.getSourceElementType() &&
1966  Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 &&
1967  Src->hasOneUse()) {
1968  Value *GO1 = GEP.getOperand(1);
1969  Value *SO1 = Src->getOperand(1);
1970 
1971  if (LI) {
1972  // Try to reassociate loop invariant GEP chains to enable LICM.
1973  if (Loop *L = LI->getLoopFor(GEP.getParent())) {
1974  // Reassociate the two GEPs if SO1 is variant in the loop and GO1 is
1975  // invariant: this breaks the dependence between GEPs and allows LICM
1976  // to hoist the invariant part out of the loop.
1977  if (L->isLoopInvariant(GO1) && !L->isLoopInvariant(SO1)) {
1978  // The swapped GEPs are inbounds if both original GEPs are inbounds
1979  // and the sign of the offsets is the same. For simplicity, only
1980  // handle both offsets being non-negative.
1981  bool IsInBounds = Src->isInBounds() && GEP.isInBounds() &&
1982  isKnownNonNegative(SO1, DL, 0, &AC, &GEP, &DT) &&
1983  isKnownNonNegative(GO1, DL, 0, &AC, &GEP, &DT);
1984  // Put NewSrc at same location as %src.
1985  Builder.SetInsertPoint(cast<Instruction>(Src));
1986  Value *NewSrc = Builder.CreateGEP(GEP.getSourceElementType(),
1987  Src->getPointerOperand(), GO1,
1988  Src->getName(), IsInBounds);
1990  GEP.getSourceElementType(), NewSrc, {SO1});
1991  NewGEP->setIsInBounds(IsInBounds);
1992  return NewGEP;
1993  }
1994  }
1995  }
1996  }
1997 
1998  // Note that if our source is a gep chain itself then we wait for that
1999  // chain to be resolved before we perform this transformation. This
2000  // avoids us creating a TON of code in some cases.
2001  if (auto *SrcGEP = dyn_cast<GEPOperator>(Src->getOperand(0)))
2002  if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP))
2003  return nullptr; // Wait until our source is folded to completion.
2004 
2005  // For constant GEPs, use a more general offset-based folding approach.
2006  // Only do this for opaque pointers, as the result element type may change.
2007  Type *PtrTy = Src->getType()->getScalarType();
2008  if (PtrTy->isOpaquePointerTy() && GEP.hasAllConstantIndices() &&
2009  (Src->hasOneUse() || Src->hasAllConstantIndices())) {
2010  // Split Src into a variable part and a constant suffix.
2011  gep_type_iterator GTI = gep_type_begin(*Src);
2012  Type *BaseType = GTI.getIndexedType();
2013  bool IsFirstType = true;
2014  unsigned NumVarIndices = 0;
2015  for (auto Pair : enumerate(Src->indices())) {
2016  if (!isa<ConstantInt>(Pair.value())) {
2017  BaseType = GTI.getIndexedType();
2018  IsFirstType = false;
2019  NumVarIndices = Pair.index() + 1;
2020  }
2021  ++GTI;
2022  }
2023 
2024  // Determine the offset for the constant suffix of Src.
2025  APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0);
2026  if (NumVarIndices != Src->getNumIndices()) {
2027  // FIXME: getIndexedOffsetInType() does not handled scalable vectors.
2028  if (isa<ScalableVectorType>(BaseType))
2029  return nullptr;
2030 
2031  SmallVector<Value *> ConstantIndices;
2032  if (!IsFirstType)
2033  ConstantIndices.push_back(
2035  append_range(ConstantIndices, drop_begin(Src->indices(), NumVarIndices));
2036  Offset += DL.getIndexedOffsetInType(BaseType, ConstantIndices);
2037  }
2038 
2039  // Add the offset for GEP (which is fully constant).
2040  if (!GEP.accumulateConstantOffset(DL, Offset))
2041  return nullptr;
2042 
2043  APInt OffsetOld = Offset;
2044  // Convert the total offset back into indices.
2045  SmallVector<APInt> ConstIndices =
2046  DL.getGEPIndicesForOffset(BaseType, Offset);
2047  if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero())) {
2048  // If both GEP are constant-indexed, and cannot be merged in either way,
2049  // convert them to a GEP of i8.
2050  if (Src->hasAllConstantIndices())
2051  return isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))
2053  Builder.getInt8Ty(), Src->getOperand(0),
2054  Builder.getInt(OffsetOld), GEP.getName())
2056  Builder.getInt8Ty(), Src->getOperand(0),
2057  Builder.getInt(OffsetOld), GEP.getName());
2058  return nullptr;
2059  }
2060 
2061  bool IsInBounds = isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP));
2062  SmallVector<Value *> Indices;
2063  append_range(Indices, drop_end(Src->indices(),
2064  Src->getNumIndices() - NumVarIndices));
2065  for (const APInt &Idx : drop_begin(ConstIndices, !IsFirstType)) {
2066  Indices.push_back(ConstantInt::get(GEP.getContext(), Idx));
2067  // Even if the total offset is inbounds, we may end up representing it
2068  // by first performing a larger negative offset, and then a smaller
2069  // positive one. The large negative offset might go out of bounds. Only
2070  // preserve inbounds if all signs are the same.
2071  IsInBounds &= Idx.isNonNegative() == ConstIndices[0].isNonNegative();
2072  }
2073 
2074  return IsInBounds
2075  ? GetElementPtrInst::CreateInBounds(Src->getSourceElementType(),
2076  Src->getOperand(0), Indices,
2077  GEP.getName())
2078  : GetElementPtrInst::Create(Src->getSourceElementType(),
2079  Src->getOperand(0), Indices,
2080  GEP.getName());
2081  }
2082 
2083  if (Src->getResultElementType() != GEP.getSourceElementType())
2084  return nullptr;
2085 
2086  SmallVector<Value*, 8> Indices;
2087 
2088  // Find out whether the last index in the source GEP is a sequential idx.
2089  bool EndsWithSequential = false;
2090  for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
2091  I != E; ++I)
2092  EndsWithSequential = I.isSequential();
2093 
2094  // Can we combine the two pointer arithmetics offsets?
2095  if (EndsWithSequential) {
2096  // Replace: gep (gep %P, long B), long A, ...
2097  // With: T = long A+B; gep %P, T, ...
2098  Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
2099  Value *GO1 = GEP.getOperand(1);
2100 
2101  // If they aren't the same type, then the input hasn't been processed
2102  // by the loop above yet (which canonicalizes sequential index types to
2103  // intptr_t). Just avoid transforming this until the input has been
2104  // normalized.
2105  if (SO1->getType() != GO1->getType())
2106  return nullptr;
2107 
2108  Value *Sum =
2109  simplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP));
2110  // Only do the combine when we are sure the cost after the
2111  // merge is never more than that before the merge.
2112  if (Sum == nullptr)
2113  return nullptr;
2114 
2115  // Update the GEP in place if possible.
2116  if (Src->getNumOperands() == 2) {
2117  GEP.setIsInBounds(isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP)));
2118  replaceOperand(GEP, 0, Src->getOperand(0));
2119  replaceOperand(GEP, 1, Sum);
2120  return &GEP;
2121  }
2122  Indices.append(Src->op_begin()+1, Src->op_end()-1);
2123  Indices.push_back(Sum);
2124  Indices.append(GEP.op_begin()+2, GEP.op_end());
2125  } else if (isa<Constant>(*GEP.idx_begin()) &&
2126  cast<Constant>(*GEP.idx_begin())->isNullValue() &&
2127  Src->getNumOperands() != 1) {
2128  // Otherwise we can do the fold if the first index of the GEP is a zero
2129  Indices.append(Src->op_begin()+1, Src->op_end());
2130  Indices.append(GEP.idx_begin()+1, GEP.idx_end());
2131  }
2132 
2133  if (!Indices.empty())
2134  return isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))
2136  Src->getSourceElementType(), Src->getOperand(0), Indices,
2137  GEP.getName())
2138  : GetElementPtrInst::Create(Src->getSourceElementType(),
2139  Src->getOperand(0), Indices,
2140  GEP.getName());
2141 
2142  return nullptr;
2143 }
2144 
2145 // Note that we may have also stripped an address space cast in between.
2148  // With opaque pointers, there is no pointer element type we can use to
2149  // adjust the GEP type.
2150  PointerType *SrcType = cast<PointerType>(BCI->getSrcTy());
2151  if (SrcType->isOpaque())
2152  return nullptr;
2153 
2154  Type *GEPEltType = GEP.getSourceElementType();
2155  Type *SrcEltType = SrcType->getNonOpaquePointerElementType();
2156  Value *SrcOp = BCI->getOperand(0);
2157 
2158  // GEP directly using the source operand if this GEP is accessing an element
2159  // of a bitcasted pointer to vector or array of the same dimensions:
2160  // gep (bitcast <c x ty>* X to [c x ty]*), Y, Z --> gep X, Y, Z
2161  // gep (bitcast [c x ty]* X to <c x ty>*), Y, Z --> gep X, Y, Z
2162  auto areMatchingArrayAndVecTypes = [](Type *ArrTy, Type *VecTy,
2163  const DataLayout &DL) {
2164  auto *VecVTy = cast<FixedVectorType>(VecTy);
2165  return ArrTy->getArrayElementType() == VecVTy->getElementType() &&
2166  ArrTy->getArrayNumElements() == VecVTy->getNumElements() &&
2167  DL.getTypeAllocSize(ArrTy) == DL.getTypeAllocSize(VecTy);
2168  };
2169  if (GEP.getNumOperands() == 3 &&
2170  ((GEPEltType->isArrayTy() && isa<FixedVectorType>(SrcEltType) &&
2171  areMatchingArrayAndVecTypes(GEPEltType, SrcEltType, DL)) ||
2172  (isa<FixedVectorType>(GEPEltType) && SrcEltType->isArrayTy() &&
2173  areMatchingArrayAndVecTypes(SrcEltType, GEPEltType, DL)))) {
2174 
2175  // Create a new GEP here, as using `setOperand()` followed by
2176  // `setSourceElementType()` won't actually update the type of the
2177  // existing GEP Value. Causing issues if this Value is accessed when
2178  // constructing an AddrSpaceCastInst
2179  SmallVector<Value *, 8> Indices(GEP.indices());
2180  Value *NGEP =
2181  Builder.CreateGEP(SrcEltType, SrcOp, Indices, "", GEP.isInBounds());
2182  NGEP->takeName(&GEP);
2183 
2184  // Preserve GEP address space to satisfy users
2185  if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
2186  return new AddrSpaceCastInst(NGEP, GEP.getType());
2187 
2188  return replaceInstUsesWith(GEP, NGEP);
2189  }
2190 
2191  // See if we can simplify:
2192  // X = bitcast A* to B*
2193  // Y = gep X, <...constant indices...>
2194  // into a gep of the original struct. This is important for SROA and alias
2195  // analysis of unions. If "A" is also a bitcast, wait for A/X to be merged.
2196  unsigned OffsetBits = DL.getIndexTypeSizeInBits(GEP.getType());
2197  APInt Offset(OffsetBits, 0);
2198 
2199  // If the bitcast argument is an allocation, The bitcast is for convertion
2200  // to actual type of allocation. Removing such bitcasts, results in having
2201  // GEPs with i8* base and pure byte offsets. That means GEP is not aware of
2202  // struct or array hierarchy.
2203  // By avoiding such GEPs, phi translation and MemoryDependencyAnalysis have
2204  // a better chance to succeed.
2205  if (!isa<BitCastInst>(SrcOp) && GEP.accumulateConstantOffset(DL, Offset) &&
2206  !isAllocationFn(SrcOp, &TLI)) {
2207  // If this GEP instruction doesn't move the pointer, just replace the GEP
2208  // with a bitcast of the real input to the dest type.
2209  if (!Offset) {
2210  // If the bitcast is of an allocation, and the allocation will be
2211  // converted to match the type of the cast, don't touch this.
2212  if (isa<AllocaInst>(SrcOp)) {
2213  // See if the bitcast simplifies, if so, don't nuke this GEP yet.
2214  if (Instruction *I = visitBitCast(*BCI)) {
2215  if (I != BCI) {
2216  I->takeName(BCI);
2217  BCI->getParent()->getInstList().insert(BCI->getIterator(), I);
2218  replaceInstUsesWith(*BCI, I);
2219  }
2220  return &GEP;
2221  }
2222  }
2223 
2224  if (SrcType->getPointerAddressSpace() != GEP.getAddressSpace())
2225  return new AddrSpaceCastInst(SrcOp, GEP.getType());
2226  return new BitCastInst(SrcOp, GEP.getType());
2227  }
2228 
2229  // Otherwise, if the offset is non-zero, we need to find out if there is a
2230  // field at Offset in 'A's type. If so, we can pull the cast through the
2231  // GEP.
2232  SmallVector<Value *, 8> NewIndices;
2233  if (findElementAtOffset(SrcType, Offset.getSExtValue(), NewIndices, DL)) {
2234  Value *NGEP = Builder.CreateGEP(SrcEltType, SrcOp, NewIndices, "",
2235  GEP.isInBounds());
2236 
2237  if (NGEP->getType() == GEP.getType())
2238  return replaceInstUsesWith(GEP, NGEP);
2239  NGEP->takeName(&GEP);
2240 
2241  if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
2242  return new AddrSpaceCastInst(NGEP, GEP.getType());
2243  return new BitCastInst(NGEP, GEP.getType());
2244  }
2245  }
2246 
2247  return nullptr;
2248 }
2249 
2251  Value *PtrOp = GEP.getOperand(0);
2252  SmallVector<Value *, 8> Indices(GEP.indices());
2253  Type *GEPType = GEP.getType();
2254  Type *GEPEltType = GEP.getSourceElementType();
2255  bool IsGEPSrcEleScalable = isa<ScalableVectorType>(GEPEltType);
2256  if (Value *V = simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(),
2257  SQ.getWithInstruction(&GEP)))
2258  return replaceInstUsesWith(GEP, V);
2259 
2260  // For vector geps, use the generic demanded vector support.
2261  // Skip if GEP return type is scalable. The number of elements is unknown at
2262  // compile-time.
2263  if (auto *GEPFVTy = dyn_cast<FixedVectorType>(GEPType)) {
2264  auto VWidth = GEPFVTy->getNumElements();
2265  APInt UndefElts(VWidth, 0);
2266  APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2267  if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
2268  UndefElts)) {
2269  if (V != &GEP)
2270  return replaceInstUsesWith(GEP, V);
2271  return &GEP;
2272  }
2273 
2274  // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
2275  // possible (decide on canonical form for pointer broadcast), 3) exploit
2276  // undef elements to decrease demanded bits
2277  }
2278 
2279  // Eliminate unneeded casts for indices, and replace indices which displace
2280  // by multiples of a zero size type with zero.
2281  bool MadeChange = false;
2282 
2283  // Index width may not be the same width as pointer width.
2284  // Data layout chooses the right type based on supported integer types.
2285  Type *NewScalarIndexTy =
2286  DL.getIndexType(GEP.getPointerOperandType()->getScalarType());
2287 
2289  for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
2290  ++I, ++GTI) {
2291  // Skip indices into struct types.
2292  if (GTI.isStruct())
2293  continue;
2294 
2295  Type *IndexTy = (*I)->getType();
2296  Type *NewIndexType =
2297  IndexTy->isVectorTy()
2298  ? VectorType::get(NewScalarIndexTy,
2299  cast<VectorType>(IndexTy)->getElementCount())
2300  : NewScalarIndexTy;
2301 
2302  // If the element type has zero size then any index over it is equivalent
2303  // to an index of zero, so replace it with zero if it is not zero already.
2304  Type *EltTy = GTI.getIndexedType();
2305  if (EltTy->isSized() && DL.getTypeAllocSize(EltTy).isZero())
2306  if (!isa<Constant>(*I) || !match(I->get(), m_Zero())) {
2307  *I = Constant::getNullValue(NewIndexType);
2308  MadeChange = true;
2309  }
2310 
2311  if (IndexTy != NewIndexType) {
2312  // If we are using a wider index than needed for this platform, shrink
2313  // it to what we need. If narrower, sign-extend it to what we need.
2314  // This explicit cast can make subsequent optimizations more obvious.
2315  *I = Builder.CreateIntCast(*I, NewIndexType, true);
2316  MadeChange = true;
2317  }
2318  }
2319  if (MadeChange)
2320  return &GEP;
2321 
2322  // Check to see if the inputs to the PHI node are getelementptr instructions.
2323  if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
2324  auto *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
2325  if (!Op1)
2326  return nullptr;
2327 
2328  // Don't fold a GEP into itself through a PHI node. This can only happen
2329  // through the back-edge of a loop. Folding a GEP into itself means that
2330  // the value of the previous iteration needs to be stored in the meantime,
2331  // thus requiring an additional register variable to be live, but not
2332  // actually achieving anything (the GEP still needs to be executed once per
2333  // loop iteration).
2334  if (Op1 == &GEP)
2335  return nullptr;
2336 
2337  int DI = -1;
2338 
2339  for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
2340  auto *Op2 = dyn_cast<GetElementPtrInst>(*I);
2341  if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() ||
2342  Op1->getSourceElementType() != Op2->getSourceElementType())
2343  return nullptr;
2344 
2345  // As for Op1 above, don't try to fold a GEP into itself.
2346  if (Op2 == &GEP)
2347  return nullptr;
2348 
2349  // Keep track of the type as we walk the GEP.
2350  Type *CurTy = nullptr;
2351 
2352  for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
2353  if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType())
2354  return nullptr;
2355 
2356  if (Op1->getOperand(J) != Op2->getOperand(J)) {
2357  if (DI == -1) {
2358  // We have not seen any differences yet in the GEPs feeding the
2359  // PHI yet, so we record this one if it is allowed to be a
2360  // variable.
2361 
2362  // The first two arguments can vary for any GEP, the rest have to be
2363  // static for struct slots
2364  if (J > 1) {
2365  assert(CurTy && "No current type?");
2366  if (CurTy->isStructTy())
2367  return nullptr;
2368  }
2369 
2370  DI = J;
2371  } else {
2372  // The GEP is different by more than one input. While this could be
2373  // extended to support GEPs that vary by more than one variable it
2374  // doesn't make sense since it greatly increases the complexity and
2375  // would result in an R+R+R addressing mode which no backend
2376  // directly supports and would need to be broken into several
2377  // simpler instructions anyway.
2378  return nullptr;
2379  }
2380  }
2381 
2382  // Sink down a layer of the type for the next iteration.
2383  if (J > 0) {
2384  if (J == 1) {
2385  CurTy = Op1->getSourceElementType();
2386  } else {
2387  CurTy =
2388  GetElementPtrInst::getTypeAtIndex(CurTy, Op1->getOperand(J));
2389  }
2390  }
2391  }
2392  }
2393 
2394  // If not all GEPs are identical we'll have to create a new PHI node.
2395  // Check that the old PHI node has only one use so that it will get
2396  // removed.
2397  if (DI != -1 && !PN->hasOneUse())
2398  return nullptr;
2399 
2400  auto *NewGEP = cast<GetElementPtrInst>(Op1->clone());
2401  if (DI == -1) {
2402  // All the GEPs feeding the PHI are identical. Clone one down into our
2403  // BB so that it can be merged with the current GEP.
2404  } else {
2405  // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
2406  // into the current block so it can be merged, and create a new PHI to
2407  // set that index.
2408  PHINode *NewPN;
2409  {
2411  Builder.SetInsertPoint(PN);
2412  NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(),
2413  PN->getNumOperands());
2414  }
2415 
2416  for (auto &I : PN->operands())
2417  NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
2418  PN->getIncomingBlock(I));
2419 
2420  NewGEP->setOperand(DI, NewPN);
2421  }
2422 
2423  GEP.getParent()->getInstList().insert(
2424  GEP.getParent()->getFirstInsertionPt(), NewGEP);
2425  replaceOperand(GEP, 0, NewGEP);
2426  PtrOp = NewGEP;
2427  }
2428 
2429  if (auto *Src = dyn_cast<GEPOperator>(PtrOp))
2430  if (Instruction *I = visitGEPOfGEP(GEP, Src))
2431  return I;
2432 
2433  // Skip if GEP source element type is scalable. The type alloc size is unknown
2434  // at compile-time.
2435  if (GEP.getNumIndices() == 1 && !IsGEPSrcEleScalable) {
2436  unsigned AS = GEP.getPointerAddressSpace();
2437  if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
2438  DL.getIndexSizeInBits(AS)) {
2439  uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedSize();
2440 
2441  bool Matched = false;
2442  uint64_t C;
2443  Value *V = nullptr;
2444  if (TyAllocSize == 1) {
2445  V = GEP.getOperand(1);
2446  Matched = true;
2447  } else if (match(GEP.getOperand(1),
2448  m_AShr(m_Value(V), m_ConstantInt(C)))) {
2449  if (TyAllocSize == 1ULL << C)
2450  Matched = true;
2451  } else if (match(GEP.getOperand(1),
2452  m_SDiv(m_Value(V), m_ConstantInt(C)))) {
2453  if (TyAllocSize == C)
2454  Matched = true;
2455  }
2456 
2457  // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y), but
2458  // only if both point to the same underlying object (otherwise provenance
2459  // is not necessarily retained).
2460  Value *Y;
2461  Value *X = GEP.getOperand(0);
2462  if (Matched &&
2466  }
2467  }
2468 
2469  // We do not handle pointer-vector geps here.
2470  if (GEPType->isVectorTy())
2471  return nullptr;
2472 
2473  // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
2474  Value *StrippedPtr = PtrOp->stripPointerCasts();
2475  PointerType *StrippedPtrTy = cast<PointerType>(StrippedPtr->getType());
2476 
2477  // TODO: The basic approach of these folds is not compatible with opaque
2478  // pointers, because we can't use bitcasts as a hint for a desirable GEP
2479  // type. Instead, we should perform canonicalization directly on the GEP
2480  // type. For now, skip these.
2481  if (StrippedPtr != PtrOp && !StrippedPtrTy->isOpaque()) {
2482  bool HasZeroPointerIndex = false;
2483  Type *StrippedPtrEltTy = StrippedPtrTy->getNonOpaquePointerElementType();
2484 
2485  if (auto *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
2486  HasZeroPointerIndex = C->isZero();
2487 
2488  // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
2489  // into : GEP [10 x i8]* X, i32 0, ...
2490  //
2491  // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
2492  // into : GEP i8* X, ...
2493  //
2494  // This occurs when the program declares an array extern like "int X[];"
2495  if (HasZeroPointerIndex) {
2496  if (auto *CATy = dyn_cast<ArrayType>(GEPEltType)) {
2497  // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
2498  if (CATy->getElementType() == StrippedPtrEltTy) {
2499  // -> GEP i8* X, ...
2500  SmallVector<Value *, 8> Idx(drop_begin(GEP.indices()));
2502  StrippedPtrEltTy, StrippedPtr, Idx, GEP.getName());
2503  Res->setIsInBounds(GEP.isInBounds());
2504  if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace())
2505  return Res;
2506  // Insert Res, and create an addrspacecast.
2507  // e.g.,
2508  // GEP (addrspacecast i8 addrspace(1)* X to [0 x i8]*), i32 0, ...
2509  // ->
2510  // %0 = GEP i8 addrspace(1)* X, ...
2511  // addrspacecast i8 addrspace(1)* %0 to i8*
2512  return new AddrSpaceCastInst(Builder.Insert(Res), GEPType);
2513  }
2514 
2515  if (auto *XATy = dyn_cast<ArrayType>(StrippedPtrEltTy)) {
2516  // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
2517  if (CATy->getElementType() == XATy->getElementType()) {
2518  // -> GEP [10 x i8]* X, i32 0, ...
2519  // At this point, we know that the cast source type is a pointer
2520  // to an array of the same type as the destination pointer
2521  // array. Because the array type is never stepped over (there
2522  // is a leading zero) we can fold the cast into this GEP.
2523  if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) {
2524  GEP.setSourceElementType(XATy);
2525  return replaceOperand(GEP, 0, StrippedPtr);
2526  }
2527  // Cannot replace the base pointer directly because StrippedPtr's
2528  // address space is different. Instead, create a new GEP followed by
2529  // an addrspacecast.
2530  // e.g.,
2531  // GEP (addrspacecast [10 x i8] addrspace(1)* X to [0 x i8]*),
2532  // i32 0, ...
2533  // ->
2534  // %0 = GEP [10 x i8] addrspace(1)* X, ...
2535  // addrspacecast i8 addrspace(1)* %0 to i8*
2536  SmallVector<Value *, 8> Idx(GEP.indices());
2537  Value *NewGEP =
2538  Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
2539  GEP.getName(), GEP.isInBounds());
2540  return new AddrSpaceCastInst(NewGEP, GEPType);
2541  }
2542  }
2543  }
2544  } else if (GEP.getNumOperands() == 2 && !IsGEPSrcEleScalable) {
2545  // Skip if GEP source element type is scalable. The type alloc size is
2546  // unknown at compile-time.
2547  // Transform things like: %t = getelementptr i32*
2548  // bitcast ([2 x i32]* %str to i32*), i32 %V into: %t1 = getelementptr [2
2549  // x i32]* %str, i32 0, i32 %V; bitcast
2550  if (StrippedPtrEltTy->isArrayTy() &&
2551  DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()) ==
2552  DL.getTypeAllocSize(GEPEltType)) {
2553  Type *IdxType = DL.getIndexType(GEPType);
2554  Value *Idx[2] = {Constant::getNullValue(IdxType), GEP.getOperand(1)};
2555  Value *NewGEP = Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
2556  GEP.getName(), GEP.isInBounds());
2557 
2558  // V and GEP are both pointer types --> BitCast
2559  return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEPType);
2560  }
2561 
2562  // Transform things like:
2563  // %V = mul i64 %N, 4
2564  // %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V
2565  // into: %t1 = getelementptr i32* %arr, i32 %N; bitcast
2566  if (GEPEltType->isSized() && StrippedPtrEltTy->isSized()) {
2567  // Check that changing the type amounts to dividing the index by a scale
2568  // factor.
2569  uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedSize();
2570  uint64_t SrcSize = DL.getTypeAllocSize(StrippedPtrEltTy).getFixedSize();
2571  if (ResSize && SrcSize % ResSize == 0) {
2572  Value *Idx = GEP.getOperand(1);
2573  unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
2574  uint64_t Scale = SrcSize / ResSize;
2575 
2576  // Earlier transforms ensure that the index has the right type
2577  // according to Data Layout, which considerably simplifies the
2578  // logic by eliminating implicit casts.
2579  assert(Idx->getType() == DL.getIndexType(GEPType) &&
2580  "Index type does not match the Data Layout preferences");
2581 
2582  bool NSW;
2583  if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
2584  // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
2585  // If the multiplication NewIdx * Scale may overflow then the new
2586  // GEP may not be "inbounds".
2587  Value *NewGEP =
2588  Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, NewIdx,
2589  GEP.getName(), GEP.isInBounds() && NSW);
2590 
2591  // The NewGEP must be pointer typed, so must the old one -> BitCast
2593  GEPType);
2594  }
2595  }
2596  }
2597 
2598  // Similarly, transform things like:
2599  // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
2600  // (where tmp = 8*tmp2) into:
2601  // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
2602  if (GEPEltType->isSized() && StrippedPtrEltTy->isSized() &&
2603  StrippedPtrEltTy->isArrayTy()) {
2604  // Check that changing to the array element type amounts to dividing the
2605  // index by a scale factor.
2606  uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedSize();
2607  uint64_t ArrayEltSize =
2608  DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType())
2609  .getFixedSize();
2610  if (ResSize && ArrayEltSize % ResSize == 0) {
2611  Value *Idx = GEP.getOperand(1);
2612  unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
2613  uint64_t Scale = ArrayEltSize / ResSize;
2614 
2615  // Earlier transforms ensure that the index has the right type
2616  // according to the Data Layout, which considerably simplifies
2617  // the logic by eliminating implicit casts.
2618  assert(Idx->getType() == DL.getIndexType(GEPType) &&
2619  "Index type does not match the Data Layout preferences");
2620 
2621  bool NSW;
2622  if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
2623  // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
2624  // If the multiplication NewIdx * Scale may overflow then the new
2625  // GEP may not be "inbounds".
2626  Type *IndTy = DL.getIndexType(GEPType);
2627  Value *Off[2] = {Constant::getNullValue(IndTy), NewIdx};
2628 
2629  Value *NewGEP =
2630  Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Off,
2631  GEP.getName(), GEP.isInBounds() && NSW);
2632  // The NewGEP must be pointer typed, so must the old one -> BitCast
2634  GEPType);
2635  }
2636  }
2637  }
2638  }
2639  }
2640 
2641  // addrspacecast between types is canonicalized as a bitcast, then an
2642  // addrspacecast. To take advantage of the below bitcast + struct GEP, look
2643  // through the addrspacecast.
2644  Value *ASCStrippedPtrOp = PtrOp;
2645  if (auto *ASC = dyn_cast<AddrSpaceCastInst>(PtrOp)) {
2646  // X = bitcast A addrspace(1)* to B addrspace(1)*
2647  // Y = addrspacecast A addrspace(1)* to B addrspace(2)*
2648  // Z = gep Y, <...constant indices...>
2649  // Into an addrspacecasted GEP of the struct.
2650  if (auto *BC = dyn_cast<BitCastInst>(ASC->getOperand(0)))
2651  ASCStrippedPtrOp = BC;
2652  }
2653 
2654  if (auto *BCI = dyn_cast<BitCastInst>(ASCStrippedPtrOp))
2655  if (Instruction *I = visitGEPOfBitcast(BCI, GEP))
2656  return I;
2657 
2658  if (!GEP.isInBounds()) {
2659  unsigned IdxWidth =
2660  DL.getIndexSizeInBits(PtrOp->getType()->getPointerAddressSpace());
2661  APInt BasePtrOffset(IdxWidth, 0);
2662  Value *UnderlyingPtrOp =
2664  BasePtrOffset);
2665  if (auto *AI = dyn_cast<AllocaInst>(UnderlyingPtrOp)) {
2666  if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
2667  BasePtrOffset.isNonNegative()) {
2668  APInt AllocSize(
2669  IdxWidth,
2670  DL.getTypeAllocSize(AI->getAllocatedType()).getKnownMinSize());
2671  if (BasePtrOffset.ule(AllocSize)) {
2673  GEP.getSourceElementType(), PtrOp, Indices, GEP.getName());
2674  }
2675  }
2676  }
2677  }
2678 
2679  if (Instruction *R = foldSelectGEP(GEP, Builder))
2680  return R;
2681 
2682  return nullptr;
2683 }
2684 
2686  Instruction *AI) {
2687  if (isa<ConstantPointerNull>(V))
2688  return true;
2689  if (auto *LI = dyn_cast<LoadInst>(V))
2690  return isa<GlobalVariable>(LI->getPointerOperand());
2691  // Two distinct allocations will never be equal.
2692  return isAllocLikeFn(V, &TLI) && V != AI;
2693 }
2694 
2695 /// Given a call CB which uses an address UsedV, return true if we can prove the
2696 /// call's only possible effect is storing to V.
2697 static bool isRemovableWrite(CallBase &CB, Value *UsedV,
2698  const TargetLibraryInfo &TLI) {
2699  if (!CB.use_empty())
2700  // TODO: add recursion if returned attribute is present
2701  return false;
2702 
2703  if (CB.isTerminator())
2704  // TODO: remove implementation restriction
2705  return false;
2706 
2707  if (!CB.willReturn() || !CB.doesNotThrow())
2708  return false;
2709 
2710  // If the only possible side effect of the call is writing to the alloca,
2711  // and the result isn't used, we can safely remove any reads implied by the
2712  // call including those which might read the alloca itself.
2713  std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(&CB, TLI);
2714  return Dest && Dest->Ptr == UsedV;
2715 }
2716 
2719  const TargetLibraryInfo &TLI) {
2721  const Optional<StringRef> Family = getAllocationFamily(AI, &TLI);
2722  Worklist.push_back(AI);
2723 
2724  do {
2725  Instruction *PI = Worklist.pop_back_val();
2726  for (User *U : PI->users()) {
2727  Instruction *I = cast<Instruction>(U);
2728  switch (I->getOpcode()) {
2729  default:
2730  // Give up the moment we see something we can't handle.
2731  return false;
2732 
2733  case Instruction::AddrSpaceCast:
2734  case Instruction::BitCast:
2735  case Instruction::GetElementPtr:
2736  Users.emplace_back(I);
2737  Worklist.push_back(I);
2738  continue;
2739 
2740  case Instruction::ICmp: {
2741  ICmpInst *ICI = cast<ICmpInst>(I);
2742  // We can fold eq/ne comparisons with null to false/true, respectively.
2743  // We also fold comparisons in some conditions provided the alloc has
2744  // not escaped (see isNeverEqualToUnescapedAlloc).
2745  if (!ICI->isEquality())
2746  return false;
2747  unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0;
2748  if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))
2749  return false;
2750  Users.emplace_back(I);
2751  continue;
2752  }
2753 
2754  case Instruction::Call:
2755  // Ignore no-op and store intrinsics.
2756  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
2757  switch (II->getIntrinsicID()) {
2758  default:
2759  return false;
2760 
2761  case Intrinsic::memmove:
2762  case Intrinsic::memcpy:
2763  case Intrinsic::memset: {
2764  MemIntrinsic *MI = cast<MemIntrinsic>(II);
2765  if (MI->isVolatile() || MI->getRawDest() != PI)
2766  return false;
2767  [[fallthrough]];
2768  }
2769  case Intrinsic::assume:
2770  case Intrinsic::invariant_start:
2771  case Intrinsic::invariant_end:
2772  case Intrinsic::lifetime_start:
2773  case Intrinsic::lifetime_end:
2774  case Intrinsic::objectsize:
2775  Users.emplace_back(I);
2776  continue;
2777  case Intrinsic::launder_invariant_group:
2778  case Intrinsic::strip_invariant_group:
2779  Users.emplace_back(I);
2780  Worklist.push_back(I);
2781  continue;
2782  }
2783  }
2784 
2785  if (isRemovableWrite(*cast<CallBase>(I), PI, TLI)) {
2786  Users.emplace_back(I);
2787  continue;
2788  }
2789 
2790  if (getFreedOperand(cast<CallBase>(I), &TLI) == PI &&
2791  getAllocationFamily(I, &TLI) == Family) {
2792  assert(Family);
2793  Users.emplace_back(I);
2794  continue;
2795  }
2796 
2797  if (getReallocatedOperand(cast<CallBase>(I), &TLI) == PI &&
2798  getAllocationFamily(I, &TLI) == Family) {
2799  assert(Family);
2800  Users.emplace_back(I);
2801  Worklist.push_back(I);
2802  continue;
2803  }
2804 
2805  return false;
2806 
2807  case Instruction::Store: {
2808  StoreInst *SI = cast<StoreInst>(I);
2809  if (SI->isVolatile() || SI->getPointerOperand() != PI)
2810  return false;
2811  Users.emplace_back(I);
2812  continue;
2813  }
2814  }
2815  llvm_unreachable("missing a return?");
2816  }
2817  } while (!Worklist.empty());
2818  return true;
2819 }
2820 
2822  assert(isa<AllocaInst>(MI) || isRemovableAlloc(&cast<CallBase>(MI), &TLI));
2823 
2824  // If we have a malloc call which is only used in any amount of comparisons to
2825  // null and free calls, delete the calls and replace the comparisons with true
2826  // or false as appropriate.
2827 
2828  // This is based on the principle that we can substitute our own allocation
2829  // function (which will never return null) rather than knowledge of the
2830  // specific function being called. In some sense this can change the permitted
2831  // outputs of a program (when we convert a malloc to an alloca, the fact that
2832  // the allocation is now on the stack is potentially visible, for example),
2833  // but we believe in a permissible manner.
2835 
2836  // If we are removing an alloca with a dbg.declare, insert dbg.value calls
2837  // before each store.
2839  std::unique_ptr<DIBuilder> DIB;
2840  if (isa<AllocaInst>(MI)) {
2841  findDbgUsers(DVIs, &MI);
2842  DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
2843  }
2844 
2845  if (isAllocSiteRemovable(&MI, Users, TLI)) {
2846  for (unsigned i = 0, e = Users.size(); i != e; ++i) {
2847  // Lowering all @llvm.objectsize calls first because they may
2848  // use a bitcast/GEP of the alloca we are removing.
2849  if (!Users[i])
2850  continue;
2851 
2852  Instruction *I = cast<Instruction>(&*Users[i]);
2853 
2854  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
2855  if (II->getIntrinsicID() == Intrinsic::objectsize) {
2856  Value *Result =
2857  lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/true);
2858  replaceInstUsesWith(*I, Result);
2859  eraseInstFromFunction(*I);
2860  Users[i] = nullptr; // Skip examining in the next loop.
2861  }
2862  }
2863  }
2864  for (unsigned i = 0, e = Users.size(); i != e; ++i) {
2865  if (!Users[i])
2866  continue;
2867 
2868  Instruction *I = cast<Instruction>(&*Users[i]);
2869 
2870  if (ICmpInst *C = dyn_cast<ICmpInst>(I)) {
2871  replaceInstUsesWith(*C,
2872  ConstantInt::get(Type::getInt1Ty(C->getContext()),
2873  C->isFalseWhenEqual()));
2874  } else if (auto *SI = dyn_cast<StoreInst>(I)) {
2875  for (auto *DVI : DVIs)
2876  if (DVI->isAddressOfVariable())
2877  ConvertDebugDeclareToDebugValue(DVI, SI, *DIB);
2878  } else {
2879  // Casts, GEP, or anything else: we're about to delete this instruction,
2880  // so it can not have any valid uses.
2881  replaceInstUsesWith(*I, PoisonValue::get(I->getType()));
2882  }
2883  eraseInstFromFunction(*I);
2884  }
2885 
2886  if (InvokeInst *II = dyn_cast<InvokeInst>(&MI)) {
2887  // Replace invoke with a NOP intrinsic to maintain the original CFG
2888  Module *M = II->getModule();
2889  Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
2890  InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
2891  std::nullopt, "", II->getParent());
2892  }
2893 
2894  // Remove debug intrinsics which describe the value contained within the
2895  // alloca. In addition to removing dbg.{declare,addr} which simply point to
2896  // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
2897  //
2898  // ```
2899  // define void @foo(i32 %0) {
2900  // %a = alloca i32 ; Deleted.
2901  // store i32 %0, i32* %a
2902  // dbg.value(i32 %0, "arg0") ; Not deleted.
2903  // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
2904  // call void @trivially_inlinable_no_op(i32* %a)
2905  // ret void
2906  // }
2907  // ```
2908  //
2909  // This may not be required if we stop describing the contents of allocas
2910  // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
2911  // the LowerDbgDeclare utility.
2912  //
2913  // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
2914  // "arg0" dbg.value may be stale after the call. However, failing to remove
2915  // the DW_OP_deref dbg.value causes large gaps in location coverage.
2916  for (auto *DVI : DVIs)
2917  if (DVI->isAddressOfVariable() || DVI->getExpression()->startsWithDeref())
2918  DVI->eraseFromParent();
2919 
2920  return eraseInstFromFunction(MI);
2921  }
2922  return nullptr;
2923 }
2924 
2925 /// Move the call to free before a NULL test.
2926 ///
2927 /// Check if this free is accessed after its argument has been test
2928 /// against NULL (property 0).
2929 /// If yes, it is legal to move this call in its predecessor block.
2930 ///
2931 /// The move is performed only if the block containing the call to free
2932 /// will be removed, i.e.:
2933 /// 1. it has only one predecessor P, and P has two successors
2934 /// 2. it contains the call, noops, and an unconditional branch
2935 /// 3. its successor is the same as its predecessor's successor
2936 ///
2937 /// The profitability is out-of concern here and this function should
2938 /// be called only if the caller knows this transformation would be
2939 /// profitable (e.g., for code size).
2941  const DataLayout &DL) {
2942  Value *Op = FI.getArgOperand(0);
2943  BasicBlock *FreeInstrBB = FI.getParent();
2944  BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
2945 
2946  // Validate part of constraint #1: Only one predecessor
2947  // FIXME: We can extend the number of predecessor, but in that case, we
2948  // would duplicate the call to free in each predecessor and it may
2949  // not be profitable even for code size.
2950  if (!PredBB)
2951  return nullptr;
2952 
2953  // Validate constraint #2: Does this block contains only the call to
2954  // free, noops, and an unconditional branch?
2955  BasicBlock *SuccBB;
2956  Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
2957  if (!match(FreeInstrBBTerminator, m_UnconditionalBr(SuccBB)))
2958  return nullptr;
2959 
2960  // If there are only 2 instructions in the block, at this point,
2961  // this is the call to free and unconditional.
2962  // If there are more than 2 instructions, check that they are noops
2963  // i.e., they won't hurt the performance of the generated code.
2964  if (FreeInstrBB->size() != 2) {
2965  for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) {
2966  if (&Inst == &FI || &Inst == FreeInstrBBTerminator)
2967  continue;
2968  auto *Cast = dyn_cast<CastInst>(&Inst);
2969  if (!Cast || !Cast->isNoopCast(DL))
2970  return nullptr;
2971  }
2972  }
2973  // Validate the rest of constraint #1 by matching on the pred branch.
2974  Instruction *TI = PredBB->getTerminator();
2975  BasicBlock *TrueBB, *FalseBB;
2976  ICmpInst::Predicate Pred;
2977  if (!match(TI, m_Br(m_ICmp(Pred,
2979  m_Specific(Op->stripPointerCasts())),
2980  m_Zero()),
2981  TrueBB, FalseBB)))
2982  return nullptr;
2983  if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
2984  return nullptr;
2985 
2986  // Validate constraint #3: Ensure the null case just falls through.
2987  if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
2988  return nullptr;
2989  assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
2990  "Broken CFG: missing edge from predecessor to successor");
2991 
2992  // At this point, we know that everything in FreeInstrBB can be moved
2993  // before TI.
2994  for (Instruction &Instr : llvm::make_early_inc_range(*FreeInstrBB)) {
2995  if (&Instr == FreeInstrBBTerminator)
2996  break;
2997  Instr.moveBefore(TI);
2998  }
2999  assert(FreeInstrBB->size() == 1 &&
3000  "Only the branch instruction should remain");
3001 
3002  // Now that we've moved the call to free before the NULL check, we have to
3003  // remove any attributes on its parameter that imply it's non-null, because
3004  // those attributes might have only been valid because of the NULL check, and
3005  // we can get miscompiles if we keep them. This is conservative if non-null is
3006  // also implied by something other than the NULL check, but it's guaranteed to
3007  // be correct, and the conservativeness won't matter in practice, since the
3008  // attributes are irrelevant for the call to free itself and the pointer
3009  // shouldn't be used after the call.
3011  Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::NonNull);
3012  Attribute Dereferenceable = Attrs.getParamAttr(0, Attribute::Dereferenceable);
3013  if (Dereferenceable.isValid()) {
3014  uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
3015  Attrs = Attrs.removeParamAttribute(FI.getContext(), 0,
3016  Attribute::Dereferenceable);
3018  }
3019  FI.setAttributes(Attrs);
3020 
3021  return &FI;
3022 }
3023 
3025  // free undef -> unreachable.
3026  if (isa<UndefValue>(Op)) {
3027  // Leave a marker since we can't modify the CFG here.
3028  CreateNonTerminatorUnreachable(&FI);
3029  return eraseInstFromFunction(FI);
3030  }
3031 
3032  // If we have 'free null' delete the instruction. This can happen in stl code
3033  // when lots of inlining happens.
3034  if (isa<ConstantPointerNull>(Op))
3035  return eraseInstFromFunction(FI);
3036 
3037  // If we had free(realloc(...)) with no intervening uses, then eliminate the
3038  // realloc() entirely.
3039  CallInst *CI = dyn_cast<CallInst>(Op);
3040  if (CI && CI->hasOneUse())
3041  if (Value *ReallocatedOp = getReallocatedOperand(CI, &TLI))
3042  return eraseInstFromFunction(*replaceInstUsesWith(*CI, ReallocatedOp));
3043 
3044  // If we optimize for code size, try to move the call to free before the null
3045  // test so that simplify cfg can remove the empty block and dead code
3046  // elimination the branch. I.e., helps to turn something like:
3047  // if (foo) free(foo);
3048  // into
3049  // free(foo);
3050  //
3051  // Note that we can only do this for 'free' and not for any flavor of
3052  // 'operator delete'; there is no 'operator delete' symbol for which we are
3053  // permitted to invent a call, even if we're passing in a null pointer.
3054  if (MinimizeSize) {
3055  LibFunc Func;
3056  if (TLI.getLibFunc(FI, Func) && TLI.has(Func) && Func == LibFunc_free)
3058  return I;
3059  }
3060 
3061  return nullptr;
3062 }
3063 
3064 static bool isMustTailCall(Value *V) {
3065  if (auto *CI = dyn_cast<CallInst>(V))
3066  return CI->isMustTailCall();
3067  return false;
3068 }
3069 
3071  if (RI.getNumOperands() == 0) // ret void
3072  return nullptr;
3073 
3074  Value *ResultOp = RI.getOperand(0);
3075  Type *VTy = ResultOp->getType();
3076  if (!VTy->isIntegerTy() || isa<Constant>(ResultOp))
3077  return nullptr;
3078 
3079  // Don't replace result of musttail calls.
3080  if (isMustTailCall(ResultOp))
3081  return nullptr;
3082 
3083  // There might be assume intrinsics dominating this return that completely
3084  // determine the value. If so, constant fold it.
3085  KnownBits Known = computeKnownBits(ResultOp, 0, &RI);
3086  if (Known.isConstant())
3087  return replaceOperand(RI, 0,
3088  Constant::getIntegerValue(VTy, Known.getConstant()));
3089 
3090  return nullptr;
3091 }
3092 
3093 // WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
3095  // Try to remove the previous instruction if it must lead to unreachable.
3096  // This includes instructions like stores and "llvm.assume" that may not get
3097  // removed by simple dead code elimination.
3098  while (Instruction *Prev = I.getPrevNonDebugInstruction()) {
3099  // While we theoretically can erase EH, that would result in a block that
3100  // used to start with an EH no longer starting with EH, which is invalid.
3101  // To make it valid, we'd need to fixup predecessors to no longer refer to
3102  // this block, but that changes CFG, which is not allowed in InstCombine.
3103  if (Prev->isEHPad())
3104  return nullptr; // Can not drop any more instructions. We're done here.
3105 
3107  return nullptr; // Can not drop any more instructions. We're done here.
3108  // Otherwise, this instruction can be freely erased,
3109  // even if it is not side-effect free.
3110 
3111  // A value may still have uses before we process it here (for example, in
3112  // another unreachable block), so convert those to poison.
3113  replaceInstUsesWith(*Prev, PoisonValue::get(Prev->getType()));
3114  eraseInstFromFunction(*Prev);
3115  }
3116  assert(I.getParent()->sizeWithoutDebug() == 1 && "The block is now empty.");
3117  // FIXME: recurse into unconditional predecessors?
3118  return nullptr;
3119 }
3120 
3122  assert(BI.isUnconditional() && "Only for unconditional branches.");
3123 
3124  // If this store is the second-to-last instruction in the basic block
3125  // (excluding debug info and bitcasts of pointers) and if the block ends with
3126  // an unconditional branch, try to move the store to the successor block.
3127 
3128  auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
3129  auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) {
3130  return BBI->isDebugOrPseudoInst() ||
3131  (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy());
3132  };
3133 
3134  BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
3135  do {
3136  if (BBI != FirstInstr)
3137  --BBI;
3138  } while (BBI != FirstInstr && IsNoopInstrForStoreMerging(BBI));
3139 
3140  return dyn_cast<StoreInst>(BBI);
3141  };
3142 
3143  if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
3144  if (mergeStoreIntoSuccessor(*SI))
3145  return &BI;
3146 
3147  return nullptr;
3148 }
3149 
3151  if (BI.isUnconditional())
3152  return visitUnconditionalBranchInst(BI);
3153 
3154  // Change br (not X), label True, label False to: br X, label False, True
3155  Value *Cond = BI.getCondition();
3156  Value *X;
3157  if (match(Cond, m_Not(m_Value(X))) && !isa<Constant>(X)) {
3158  // Swap Destinations and condition...
3159  BI.swapSuccessors();
3160  return replaceOperand(BI, 0, X);
3161  }
3162 
3163  // Canonicalize logical-and-with-invert as logical-or-with-invert.
3164  // This is done by inverting the condition and swapping successors:
3165  // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T
3166  Value *Y;
3167  if (isa<SelectInst>(Cond) &&
3168  match(Cond,
3170  Value *NotX = Builder.CreateNot(X, "not." + X->getName());
3171  Value *Or = Builder.CreateLogicalOr(NotX, Y);
3172  BI.swapSuccessors();
3173  return replaceOperand(BI, 0, Or);
3174  }
3175 
3176  // If the condition is irrelevant, remove the use so that other
3177  // transforms on the condition become more effective.
3178  if (!isa<ConstantInt>(Cond) && BI.getSuccessor(0) == BI.getSuccessor(1))
3179  return replaceOperand(BI, 0, ConstantInt::getFalse(Cond->getType()));
3180 
3181  // Canonicalize, for example, fcmp_one -> fcmp_oeq.
3182  CmpInst::Predicate Pred;
3183  if (match(Cond, m_OneUse(m_FCmp(Pred, m_Value(), m_Value()))) &&
3184  !isCanonicalPredicate(Pred)) {
3185  // Swap destinations and condition.
3186  auto *Cmp = cast<CmpInst>(Cond);
3187  Cmp->setPredicate(CmpInst::getInversePredicate(Pred));
3188  BI.swapSuccessors();
3189  Worklist.push(Cmp);
3190  return &BI;
3191  }
3192 
3193  return nullptr;
3194 }
3195 
3197  Value *Cond = SI.getCondition();
3198  Value *Op0;
3199  ConstantInt *AddRHS;
3200  if (match(Cond, m_Add(m_Value(Op0), m_ConstantInt(AddRHS)))) {
3201  // Change 'switch (X+4) case 1:' into 'switch (X) case -3'.
3202  for (auto Case : SI.cases()) {
3203  Constant *NewCase = ConstantExpr::getSub(Case.getCaseValue(), AddRHS);
3204  assert(isa<ConstantInt>(NewCase) &&
3205  "Result of expression should be constant");
3206  Case.setValue(cast<ConstantInt>(NewCase));
3207  }
3208  return replaceOperand(SI, 0, Op0);
3209  }
3210 
3211  KnownBits Known = computeKnownBits(Cond, 0, &SI);
3212  unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
3213  unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
3214 
3215  // Compute the number of leading bits we can ignore.
3216  // TODO: A better way to determine this would use ComputeNumSignBits().
3217  for (const auto &C : SI.cases()) {
3218  LeadingKnownZeros = std::min(
3219  LeadingKnownZeros, C.getCaseValue()->getValue().countLeadingZeros());
3220  LeadingKnownOnes = std::min(
3221  LeadingKnownOnes, C.getCaseValue()->getValue().countLeadingOnes());
3222  }
3223 
3224  unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
3225 
3226  // Shrink the condition operand if the new type is smaller than the old type.
3227  // But do not shrink to a non-standard type, because backend can't generate
3228  // good code for that yet.
3229  // TODO: We can make it aggressive again after fixing PR39569.
3230  if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
3231  shouldChangeType(Known.getBitWidth(), NewWidth)) {
3232  IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
3233  Builder.SetInsertPoint(&SI);
3234  Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc");
3235 
3236  for (auto Case : SI.cases()) {
3237  APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
3238  Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
3239  }
3240  return replaceOperand(SI, 0, NewCond);
3241  }
3242 
3243  return nullptr;
3244 }
3245 
3246 Instruction *
3247 InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
3248  auto *WO = dyn_cast<WithOverflowInst>(EV.getAggregateOperand());
3249  if (!WO)
3250  return nullptr;
3251 
3252  Intrinsic::ID OvID = WO->getIntrinsicID();
3253  const APInt *C = nullptr;
3254  if (match(WO->getRHS(), m_APIntAllowUndef(C))) {
3255  if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow ||
3256  OvID == Intrinsic::umul_with_overflow)) {
3257  // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
3258  if (C->isAllOnes())
3259  return BinaryOperator::CreateNeg(WO->getLHS());
3260  // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
3261  if (C->isPowerOf2()) {
3262  return BinaryOperator::CreateShl(
3263  WO->getLHS(),
3264  ConstantInt::get(WO->getLHS()->getType(), C->logBase2()));
3265  }
3266  }
3267  }
3268 
3269  // We're extracting from an overflow intrinsic. See if we're the only user.
3270  // That allows us to simplify multiple result intrinsics to simpler things
3271  // that just get one value.
3272  if (!WO->hasOneUse())
3273  return nullptr;
3274 
3275  // Check if we're grabbing only the result of a 'with overflow' intrinsic
3276  // and replace it with a traditional binary instruction.
3277  if (*EV.idx_begin() == 0) {
3278  Instruction::BinaryOps BinOp = WO->getBinaryOp();
3279  Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
3280  // Replace the old instruction's uses with poison.
3281  replaceInstUsesWith(*WO, PoisonValue::get(WO->getType()));
3282  eraseInstFromFunction(*WO);
3283  return BinaryOperator::Create(BinOp, LHS, RHS);
3284  }
3285 
3286  assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst");
3287 
3288  // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS.
3289  if (OvID == Intrinsic::usub_with_overflow)
3290  return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS());
3291 
3292  // If only the overflow result is used, and the right hand side is a
3293  // constant (or constant splat), we can remove the intrinsic by directly
3294  // checking for overflow.
3295  if (C) {
3296  // Compute the no-wrap range for LHS given RHS=C, then construct an
3297  // equivalent icmp, potentially using an offset.
3299  WO->getBinaryOp(), *C, WO->getNoWrapKind());
3300 
3301  CmpInst::Predicate Pred;
3302  APInt NewRHSC, Offset;
3303  NWR.getEquivalentICmp(Pred, NewRHSC, Offset);
3304  auto *OpTy = WO->getRHS()->getType();
3305  auto *NewLHS = WO->getLHS();
3306  if (Offset != 0)
3307  NewLHS = Builder.CreateAdd(NewLHS, ConstantInt::get(OpTy, Offset));
3308  return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS,
3309  ConstantInt::get(OpTy, NewRHSC));
3310  }
3311 
3312  return nullptr;
3313 }
3314 
3316  Value *Agg = EV.getAggregateOperand();
3317 
3318  if (!EV.hasIndices())
3319  return replaceInstUsesWith(EV, Agg);
3320 
3321  if (Value *V = simplifyExtractValueInst(Agg, EV.getIndices(),
3322  SQ.getWithInstruction(&EV)))
3323  return replaceInstUsesWith(EV, V);
3324 
3325  if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
3326  // We're extracting from an insertvalue instruction, compare the indices
3327  const unsigned *exti, *exte, *insi, *inse;
3328  for (exti = EV.idx_begin(), insi = IV->idx_begin(),
3329  exte = EV.idx_end(), inse = IV->idx_end();
3330  exti != exte && insi != inse;
3331  ++exti, ++insi) {
3332  if (*insi != *exti)
3333  // The insert and extract both reference distinctly different elements.
3334  // This means the extract is not influenced by the insert, and we can
3335  // replace the aggregate operand of the extract with the aggregate
3336  // operand of the insert. i.e., replace
3337  // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3338  // %E = extractvalue { i32, { i32 } } %I, 0
3339  // with
3340  // %E = extractvalue { i32, { i32 } } %A, 0
3341  return ExtractValueInst::Create(IV->getAggregateOperand(),
3342  EV.getIndices());
3343  }
3344  if (exti == exte && insi == inse)
3345  // Both iterators are at the end: Index lists are identical. Replace
3346  // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3347  // %C = extractvalue { i32, { i32 } } %B, 1, 0
3348  // with "i32 42"
3349  return replaceInstUsesWith(EV, IV->getInsertedValueOperand());
3350  if (exti == exte) {
3351  // The extract list is a prefix of the insert list. i.e. replace
3352  // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3353  // %E = extractvalue { i32, { i32 } } %I, 1
3354  // with
3355  // %X = extractvalue { i32, { i32 } } %A, 1
3356  // %E = insertvalue { i32 } %X, i32 42, 0
3357  // by switching the order of the insert and extract (though the
3358  // insertvalue should be left in, since it may have other uses).
3359  Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(),
3360  EV.getIndices());
3361  return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
3362  makeArrayRef(insi, inse));
3363  }
3364  if (insi == inse)
3365  // The insert list is a prefix of the extract list
3366  // We can simply remove the common indices from the extract and make it
3367  // operate on the inserted value instead of the insertvalue result.
3368  // i.e., replace
3369  // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3370  // %E = extractvalue { i32, { i32 } } %I, 1, 0
3371  // with
3372  // %E extractvalue { i32 } { i32 42 }, 0
3373  return ExtractValueInst::Create(IV->getInsertedValueOperand(),
3374  makeArrayRef(exti, exte));
3375  }
3376 
3377  if (Instruction *R = foldExtractOfOverflowIntrinsic(EV))
3378  return R;
3379 
3380  if (LoadInst *L = dyn_cast<LoadInst>(Agg)) {
3381  // If the (non-volatile) load only has one use, we can rewrite this to a
3382  // load from a GEP. This reduces the size of the load. If a load is used
3383  // only by extractvalue instructions then this either must have been
3384  // optimized before, or it is a struct with padding, in which case we
3385  // don't want to do the transformation as it loses padding knowledge.
3386  if (L->isSimple() && L->hasOneUse()) {
3387  // extractvalue has integer indices, getelementptr has Value*s. Convert.
3388  SmallVector<Value*, 4> Indices;
3389  // Prefix an i32 0 since we need the first element.
3390  Indices.push_back(Builder.getInt32(0));
3391  for (unsigned Idx : EV.indices())
3392  Indices.push_back(Builder.getInt32(Idx));
3393 
3394  // We need to insert these at the location of the old load, not at that of
3395  // the extractvalue.
3396  Builder.SetInsertPoint(L);
3397  Value *GEP = Builder.CreateInBoundsGEP(L->getType(),
3398  L->getPointerOperand(), Indices);
3399  Instruction *NL = Builder.CreateLoad(EV.getType(), GEP);
3400  // Whatever aliasing information we had for the orignal load must also
3401  // hold for the smaller load, so propagate the annotations.
3402  NL->setAAMetadata(L->getAAMetadata());
3403  // Returning the load directly will cause the main loop to insert it in
3404  // the wrong spot, so use replaceInstUsesWith().
3405  return replaceInstUsesWith(EV, NL);
3406  }
3407  }
3408 
3409  if (auto *PN = dyn_cast<PHINode>(Agg))
3410  if (Instruction *Res = foldOpIntoPhi(EV, PN))
3411  return Res;
3412 
3413  // We could simplify extracts from other values. Note that nested extracts may
3414  // already be simplified implicitly by the above: extract (extract (insert) )
3415  // will be translated into extract ( insert ( extract ) ) first and then just
3416  // the value inserted, if appropriate. Similarly for extracts from single-use
3417  // loads: extract (extract (load)) will be translated to extract (load (gep))
3418  // and if again single-use then via load (gep (gep)) to load (gep).
3419  // However, double extracts from e.g. function arguments or return values
3420  // aren't handled yet.
3421  return nullptr;
3422 }
3423 
3424 /// Return 'true' if the given typeinfo will match anything.
3425 static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
3426  switch (Personality) {
3427  case EHPersonality::GNU_C:
3429  case EHPersonality::Rust:
3430  // The GCC C EH and Rust personality only exists to support cleanups, so
3431  // it's not clear what the semantics of catch clauses are.
3432  return false;
3434  return false;
3436  // While __gnat_all_others_value will match any Ada exception, it doesn't
3437  // match foreign exceptions (or didn't, before gcc-4.7).
3438  return false;
3447  case EHPersonality::XL_CXX:
3448  return TypeInfo->isNullValue();
3449  }
3450  llvm_unreachable("invalid enum");
3451 }
3452 
3453 static bool shorter_filter(const Value *LHS, const Value *RHS) {
3454  return
3455  cast<ArrayType>(LHS->getType())->getNumElements()
3456  <
3457  cast<ArrayType>(RHS->getType())->getNumElements();
3458 }
3459 
3461  // The logic here should be correct for any real-world personality function.
3462  // However if that turns out not to be true, the offending logic can always
3463  // be conditioned on the personality function, like the catch-all logic is.
3464  EHPersonality Personality =
3466 
3467  // Simplify the list of clauses, eg by removing repeated catch clauses
3468  // (these are often created by inlining).
3469  bool MakeNewInstruction = false; // If true, recreate using the following:
3470  SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
3471  bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
3472 
3473  SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
3474  for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
3475  bool isLastClause = i + 1 == e;
3476  if (LI.isCatch(i)) {
3477  // A catch clause.
3478  Constant *CatchClause = LI.getClause(i);
3479  Constant *TypeInfo = CatchClause->stripPointerCasts();
3480 
3481  // If we already saw this clause, there is no point in having a second
3482  // copy of it.
3483  if (AlreadyCaught.insert(TypeInfo).second) {
3484  // This catch clause was not already seen.
3485  NewClauses.push_back(CatchClause);
3486  } else {
3487  // Repeated catch clause - drop the redundant copy.
3488  MakeNewInstruction = true;
3489  }
3490 
3491  // If this is a catch-all then there is no point in keeping any following
3492  // clauses or marking the landingpad as having a cleanup.
3493  if (isCatchAll(Personality, TypeInfo)) {
3494  if (!isLastClause)
3495  MakeNewInstruction = true;
3496  CleanupFlag = false;
3497  break;
3498  }
3499  } else {
3500  // A filter clause. If any of the filter elements were already caught
3501  // then they can be dropped from the filter. It is tempting to try to
3502  // exploit the filter further by saying that any typeinfo that does not
3503  // occur in the filter can't be caught later (and thus can be dropped).
3504  // However this would be wrong, since typeinfos can match without being
3505  // equal (for example if one represents a C++ class, and the other some
3506  // class derived from it).
3507  assert(LI.isFilter(i) && "Unsupported landingpad clause!");
3508  Constant *FilterClause = LI.getClause(i);
3509  ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
3510  unsigned NumTypeInfos = FilterType->getNumElements();
3511 
3512  // An empty filter catches everything, so there is no point in keeping any
3513  // following clauses or marking the landingpad as having a cleanup. By
3514  // dealing with this case here the following code is made a bit simpler.
3515  if (!NumTypeInfos) {
3516  NewClauses.push_back(FilterClause);
3517  if (!isLastClause)
3518  MakeNewInstruction = true;
3519  CleanupFlag = false;
3520  break;
3521  }
3522 
3523  bool MakeNewFilter = false; // If true, make a new filter.
3524  SmallVector<Constant *, 16> NewFilterElts; // New elements.
3525  if (isa<ConstantAggregateZero>(FilterClause)) {
3526  // Not an empty filter - it contains at least one null typeinfo.
3527  assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
3528  Constant *TypeInfo =
3529  Constant::getNullValue(FilterType->getElementType());
3530  // If this typeinfo is a catch-all then the filter can never match.
3531  if (isCatchAll(Personality, TypeInfo)) {
3532  // Throw the filter away.
3533  MakeNewInstruction = true;
3534  continue;
3535  }
3536 
3537  // There is no point in having multiple copies of this typeinfo, so
3538  // discard all but the first copy if there is more than one.
3539  NewFilterElts.push_back(TypeInfo);
3540  if (NumTypeInfos > 1)
3541  MakeNewFilter = true;
3542  } else {
3543  ConstantArray *Filter = cast<ConstantArray>(FilterClause);
3544  SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
3545  NewFilterElts.reserve(NumTypeInfos);
3546 
3547  // Remove any filter elements that were already caught or that already
3548  // occurred in the filter. While there, see if any of the elements are
3549  // catch-alls. If so, the filter can be discarded.
3550  bool SawCatchAll = false;
3551  for (unsigned j = 0; j != NumTypeInfos; ++j) {
3552  Constant *Elt = Filter->getOperand(j);
3553  Constant *TypeInfo = Elt->stripPointerCasts();
3554  if (isCatchAll(Personality, TypeInfo)) {
3555  // This element is a catch-all. Bail out, noting this fact.
3556  SawCatchAll = true;
3557  break;
3558  }
3559 
3560  // Even if we've seen a type in a catch clause, we don't want to
3561  // remove it from the filter. An unexpected type handler may be
3562  // set up for a call site which throws an exception of the same
3563  // type caught. In order for the exception thrown by the unexpected
3564  // handler to propagate correctly, the filter must be correctly
3565  // described for the call site.
3566  //
3567  // Example:
3568  //
3569  // void unexpected() { throw 1;}
3570  // void foo() throw (int) {
3571  // std::set_unexpected(unexpected);
3572  // try {
3573  // throw 2.0;
3574  // } catch (int i) {}
3575  // }
3576 
3577  // There is no point in having multiple copies of the same typeinfo in
3578  // a filter, so only add it if we didn't already.
3579  if (SeenInFilter.insert(TypeInfo).second)
3580  NewFilterElts.push_back(cast<Constant>(Elt));
3581  }
3582  // A filter containing a catch-all cannot match anything by definition.
3583  if (SawCatchAll) {
3584  // Throw the filter away.
3585  MakeNewInstruction = true;
3586  continue;
3587  }
3588 
3589  // If we dropped something from the filter, make a new one.
3590  if (NewFilterElts.size() < NumTypeInfos)
3591  MakeNewFilter = true;
3592  }
3593  if (MakeNewFilter) {
3594  FilterType = ArrayType::get(FilterType->getElementType(),
3595  NewFilterElts.size());
3596  FilterClause = ConstantArray::get(FilterType, NewFilterElts);
3597  MakeNewInstruction = true;
3598  }
3599 
3600  NewClauses.push_back(FilterClause);
3601 
3602  // If the new filter is empty then it will catch everything so there is
3603  // no point in keeping any following clauses or marking the landingpad
3604  // as having a cleanup. The case of the original filter being empty was
3605  // already handled above.
3606  if (MakeNewFilter && !NewFilterElts.size()) {
3607  assert(MakeNewInstruction && "New filter but not a new instruction!");
3608  CleanupFlag = false;
3609  break;
3610  }
3611  }
3612  }
3613 
3614  // If several filters occur in a row then reorder them so that the shortest
3615  // filters come first (those with the smallest number of elements). This is
3616  // advantageous because shorter filters are more likely to match, speeding up
3617  // unwinding, but mostly because it increases the effectiveness of the other
3618  // filter optimizations below.
3619  for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
3620  unsigned j;
3621  // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
3622  for (j = i; j != e; ++j)
3623  if (!isa<ArrayType>(NewClauses[j]->getType()))
3624  break;
3625 
3626  // Check whether the filters are already sorted by length. We need to know
3627  // if sorting them is actually going to do anything so that we only make a
3628  // new landingpad instruction if it does.
3629  for (unsigned k = i; k + 1 < j; ++k)
3630  if (shorter_filter(NewClauses[k+1], NewClauses[k])) {
3631  // Not sorted, so sort the filters now. Doing an unstable sort would be
3632  // correct too but reordering filters pointlessly might confuse users.
3633  std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j,
3634  shorter_filter);
3635  MakeNewInstruction = true;
3636  break;
3637  }
3638 
3639  // Look for the next batch of filters.
3640  i = j + 1;
3641  }
3642 
3643  // If typeinfos matched if and only if equal, then the elements of a filter L
3644  // that occurs later than a filter F could be replaced by the intersection of
3645  // the elements of F and L. In reality two typeinfos can match without being
3646  // equal (for example if one represents a C++ class, and the other some class
3647  // derived from it) so it would be wrong to perform this transform in general.
3648  // However the transform is correct and useful if F is a subset of L. In that
3649  // case L can be replaced by F, and thus removed altogether since repeating a
3650  // filter is pointless. So here we look at all pairs of filters F and L where
3651  // L follows F in the list of clauses, and remove L if every element of F is
3652  // an element of L. This can occur when inlining C++ functions with exception
3653  // specifications.
3654  for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
3655  // Examine each filter in turn.
3656  Value *Filter = NewClauses[i];
3657  ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType());
3658  if (!FTy)
3659  // Not a filter - skip it.
3660  continue;
3661  unsigned FElts = FTy->getNumElements();
3662  // Examine each filter following this one. Doing this backwards means that
3663  // we don't have to worry about filters disappearing under us when removed.
3664  for (unsigned j = NewClauses.size() - 1; j != i; --j) {
3665  Value *LFilter = NewClauses[j];
3666  ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType());
3667  if (!LTy)
3668  // Not a filter - skip it.
3669  continue;
3670  // If Filter is a subset of LFilter, i.e. every element of Filter is also
3671  // an element of LFilter, then discard LFilter.
3672  SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
3673  // If Filter is empty then it is a subset of LFilter.
3674  if (!FElts) {
3675  // Discard LFilter.
3676  NewClauses.erase(J);
3677  MakeNewInstruction = true;
3678  // Move on to the next filter.
3679  continue;
3680  }
3681  unsigned LElts = LTy->getNumElements();
3682  // If Filter is longer than LFilter then it cannot be a subset of it.
3683  if (FElts > LElts)
3684  // Move on to the next filter.
3685  continue;
3686  // At this point we know that LFilter has at least one element.
3687  if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros.
3688  // Filter is a subset of LFilter iff Filter contains only zeros (as we
3689  // already know that Filter is not longer than LFilter).
3690  if (isa<ConstantAggregateZero>(Filter)) {
3691  assert(FElts <= LElts && "Should have handled this case earlier!");
3692  // Discard LFilter.
3693  NewClauses.erase(J);
3694  MakeNewInstruction = true;
3695  }
3696  // Move on to the next filter.
3697  continue;
3698  }
3699  ConstantArray *LArray = cast<ConstantArray>(LFilter);
3700  if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros.
3701  // Since Filter is non-empty and contains only zeros, it is a subset of
3702  // LFilter iff LFilter contains a zero.
3703  assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
3704  for (unsigned l = 0; l != LElts; ++l)
3705  if (LArray->getOperand(l)->isNullValue()) {
3706  // LFilter contains a zero - discard it.
3707  NewClauses.erase(J);
3708  MakeNewInstruction = true;
3709  break;
3710  }
3711  // Move on to the next filter.
3712  continue;
3713  }
3714  // At this point we know that both filters are ConstantArrays. Loop over
3715  // operands to see whether every element of Filter is also an element of
3716  // LFilter. Since filters tend to be short this is probably faster than
3717  // using a method that scales nicely.
3718  ConstantArray *FArray = cast<ConstantArray>(Filter);
3719  bool AllFound = true;
3720  for (unsigned f = 0; f != FElts; ++f) {
3721  Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts();
3722  AllFound = false;
3723  for (unsigned l = 0; l != LElts; ++l) {
3724  Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts();
3725  if (LTypeInfo == FTypeInfo) {
3726  AllFound = true;
3727  break;
3728  }
3729  }
3730  if (!AllFound)
3731  break;
3732  }
3733  if (AllFound) {
3734  // Discard LFilter.
3735  NewClauses.erase(J);
3736  MakeNewInstruction = true;
3737  }
3738  // Move on to the next filter.
3739  }
3740  }
3741 
3742  // If we changed any of the clauses, replace the old landingpad instruction
3743  // with a new one.
3744  if (MakeNewInstruction) {
3746  NewClauses.size());
3747  for (unsigned i = 0, e = NewClauses.size(); i != e; ++i)
3748  NLI->addClause(NewClauses[i]);
3749  // A landing pad with no clauses must have the cleanup flag set. It is
3750  // theoretically possible, though highly unlikely, that we eliminated all
3751  // clauses. If so, force the cleanup flag to true.
3752  if (NewClauses.empty())
3753  CleanupFlag = true;
3754  NLI->setCleanup(CleanupFlag);
3755  return NLI;
3756  }
3757 
3758  // Even if none of the clauses changed, we may nonetheless have understood
3759  // that the cleanup flag is pointless. Clear it if so.
3760  if (LI.isCleanup() != CleanupFlag) {
3761  assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
3762  LI.setCleanup(CleanupFlag);
3763  return &LI;
3764  }
3765 
3766  return nullptr;
3767 }
3768 
3769 Value *
3771  // Try to push freeze through instructions that propagate but don't produce
3772  // poison as far as possible. If an operand of freeze follows three
3773  // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
3774  // guaranteed-non-poison operands then push the freeze through to the one
3775  // operand that is not guaranteed non-poison. The actual transform is as
3776  // follows.
3777  // Op1 = ... ; Op1 can be posion
3778  // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
3779  // ; single guaranteed-non-poison operands
3780  // ... = Freeze(Op0)
3781  // =>
3782  // Op1 = ...
3783  // Op1.fr = Freeze(Op1)
3784  // ... = Inst(Op1.fr, NonPoisonOps...)
3785  auto *OrigOp = OrigFI.getOperand(0);
3786  auto *OrigOpInst = dyn_cast<Instruction>(OrigOp);
3787 
3788  // While we could change the other users of OrigOp to use freeze(OrigOp), that
3789  // potentially reduces their optimization potential, so let's only do this iff
3790  // the OrigOp is only used by the freeze.
3791  if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(OrigOp))
3792  return nullptr;
3793 
3794  // We can't push the freeze through an instruction which can itself create
3795  // poison. If the only source of new poison is flags, we can simply
3796  // strip them (since we know the only use is the freeze and nothing can
3797  // benefit from them.)
3798  if (canCreateUndefOrPoison(cast<Operator>(OrigOp), /*ConsiderFlags*/ false))
3799  return nullptr;
3800 
3801  // If operand is guaranteed not to be poison, there is no need to add freeze
3802  // to the operand. So we first find the operand that is not guaranteed to be
3803  // poison.
3804  Use *MaybePoisonOperand = nullptr;
3805  for (Use &U : OrigOpInst->operands()) {
3806  if (isGuaranteedNotToBeUndefOrPoison(U.get()))
3807  continue;
3808  if (!MaybePoisonOperand)
3809  MaybePoisonOperand = &U;
3810  else
3811  return nullptr;
3812  }
3813 
3814  OrigOpInst->dropPoisonGeneratingFlags();
3815 
3816  // If all operands are guaranteed to be non-poison, we can drop freeze.
3817  if (!MaybePoisonOperand)
3818  return OrigOp;
3819 
3820  Builder.SetInsertPoint(OrigOpInst);
3821  auto *FrozenMaybePoisonOperand = Builder.CreateFreeze(
3822  MaybePoisonOperand->get(), MaybePoisonOperand->get()->getName() + ".fr");
3823 
3824  replaceUse(*MaybePoisonOperand, FrozenMaybePoisonOperand);
3825  return OrigOp;
3826 }
3827 
3829  PHINode *PN) {
3830  // Detect whether this is a recurrence with a start value and some number of
3831  // backedge values. We'll check whether we can push the freeze through the
3832  // backedge values (possibly dropping poison flags along the way) until we
3833  // reach the phi again. In that case, we can move the freeze to the start
3834  // value.
3835  Use *StartU = nullptr;
3836  SmallVector<Value *> Worklist;
3837  for (Use &U : PN->incoming_values()) {
3838  if (DT.dominates(PN->getParent(), PN->getIncomingBlock(U))) {
3839  // Add backedge value to worklist.
3840  Worklist.push_back(U.get());
3841  continue;
3842  }
3843 
3844  // Don't bother handling multiple start values.
3845  if (StartU)
3846  return nullptr;
3847  StartU = &U;
3848  }
3849 
3850  if (!StartU || Worklist.empty())
3851  return nullptr; // Not a recurrence.
3852 
3853  Value *StartV = StartU->get();
3854  BasicBlock *StartBB = PN->getIncomingBlock(*StartU);
3855  bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(StartV);
3856  // We can't insert freeze if the the start value is the result of the
3857  // terminator (e.g. an invoke).
3858  if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
3859  return nullptr;
3860 
3861  SmallPtrSet<Value *, 32> Visited;
3862  SmallVector<Instruction *> DropFlags;
3863  while (!Worklist.empty()) {
3864  Value *V = Worklist.pop_back_val();
3865  if (!Visited.insert(V).second)
3866  continue;
3867 
3868  if (Visited.size() > 32)
3869  return nullptr; // Limit the total number of values we inspect.
3870 
3871  // Assume that PN is non-poison, because it will be after the transform.
3872  if (V == PN || isGuaranteedNotToBeUndefOrPoison(V))
3873  continue;
3874 
3875  Instruction *I = dyn_cast<Instruction>(V);
3876  if (!I || canCreateUndefOrPoison(cast<Operator>(I),
3877  /*ConsiderFlags*/ false))
3878  return nullptr;
3879 
3880  DropFlags.push_back(I);
3881  append_range(Worklist, I->operands());
3882  }
3883 
3884  for (Instruction *I : DropFlags)
3885  I->dropPoisonGeneratingFlags();
3886 
3887  if (StartNeedsFreeze) {
3888  Builder.SetInsertPoint(StartBB->getTerminator());
3889  Value *FrozenStartV = Builder.CreateFreeze(StartV,
3890  StartV->getName() + ".fr");
3891  replaceUse(*StartU, FrozenStartV);
3892  }
3893  return replaceInstUsesWith(FI, PN);
3894 }
3895 
3897  Value *Op = FI.getOperand(0);
3898 
3899  if (isa<Constant>(Op) || Op->hasOneUse())
3900  return false;
3901 
3902  // Move the freeze directly after the definition of its operand, so that
3903  // it dominates the maximum number of uses. Note that it may not dominate
3904  // *all* uses if the operand is an invoke/callbr and the use is in a phi on
3905  // the normal/default destination. This is why the domination check in the
3906  // replacement below is still necessary.
3907  Instruction *MoveBefore;
3908  if (isa<Argument>(Op)) {
3909  MoveBefore =
3911  } else {
3912  MoveBefore = cast<Instruction>(Op)->getInsertionPointAfterDef();
3913  if (!MoveBefore)
3914  return false;
3915  }
3916 
3917  bool Changed = false;
3918  if (&FI != MoveBefore) {
3919  FI.moveBefore(MoveBefore);
3920  Changed = true;
3921  }
3922 
3923  Op->replaceUsesWithIf(&FI, [&](Use &U) -> bool {
3924  bool Dominates = DT.dominates(&FI, U);
3925  Changed |= Dominates;
3926  return Dominates;
3927  });
3928 
3929  return Changed;
3930 }
3931 
3933  Value *Op0 = I.getOperand(0);
3934 
3935  if (Value *V = simplifyFreezeInst(Op0, SQ.getWithInstruction(&I)))
3936  return replaceInstUsesWith(I, V);
3937 
3938  // freeze (phi const, x) --> phi const, (freeze x)
3939  if (auto *PN = dyn_cast<PHINode>(Op0)) {
3940  if (Instruction *NV = foldOpIntoPhi(I, PN))
3941  return NV;
3942  if (Instruction *NV = foldFreezeIntoRecurrence(I, PN))
3943  return NV;
3944  }
3945 
3946  if (Value *NI = pushFreezeToPreventPoisonFromPropagating(I))
3947  return replaceInstUsesWith(I, NI);
3948 
3949  // If I is freeze(undef), check its uses and fold it to a fixed constant.
3950  // - or: pick -1
3951  // - select's condition: if the true value is constant, choose it by making
3952  // the condition true.
3953  // - default: pick 0
3954  //
3955  // Note that this transform is intentionally done here rather than
3956  // via an analysis in InstSimplify or at individual user sites. That is
3957  // because we must produce the same value for all uses of the freeze -
3958  // it's the reason "freeze" exists!
3959  //
3960  // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
3961  // duplicating logic for binops at least.
3962  auto getUndefReplacement = [&I](Type *Ty) {
3963  Constant *BestValue = nullptr;
3964  Constant *NullValue = Constant::getNullValue(Ty);
3965  for (const auto *U : I.users()) {
3966  Constant *C = NullValue;
3967  if (match(U, m_Or(m_Value(), m_Value())))
3969  else if (match(U, m_Select(m_Specific(&I), m_Constant(), m_Value())))
3970  C = ConstantInt::getTrue(Ty);
3971 
3972  if (!BestValue)
3973  BestValue = C;
3974  else if (BestValue != C)
3975  BestValue = NullValue;
3976  }
3977  assert(BestValue && "Must have at least one use");
3978  return BestValue;
3979  };
3980 
3981  if (match(Op0, m_Undef()))
3982  return replaceInstUsesWith(I, getUndefReplacement(I.getType()));
3983 
3984  Constant *C;
3985  if (match(Op0, m_Constant(C)) && C->containsUndefOrPoisonElement()) {
3986  Constant *ReplaceC = getUndefReplacement(I.getType()->getScalarType());
3987  return replaceInstUsesWith(I, Constant::replaceUndefsWith(C, ReplaceC));
3988  }
3989 
3990  // Replace uses of Op with freeze(Op).
3991  if (freezeOtherUses(I))
3992  return &I;
3993 
3994  return nullptr;
3995 }
3996 
3997 /// Check for case where the call writes to an otherwise dead alloca. This
3998 /// shows up for unused out-params in idiomatic C/C++ code. Note that this
3999 /// helper *only* analyzes the write; doesn't check any other legality aspect.
4001  auto *CB = dyn_cast<CallBase>(I);
4002  if (!CB)
4003  // TODO: handle e.g. store to alloca here - only worth doing if we extend
4004  // to allow reload along used path as described below. Otherwise, this
4005  // is simply a store to a dead allocation which will be removed.
4006  return false;
4007  std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CB, TLI);
4008  if (!Dest)
4009  return false;
4010  auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(Dest->Ptr));
4011  if (!AI)
4012  // TODO: allow malloc?
4013  return false;
4014  // TODO: allow memory access dominated by move point? Note that since AI
4015  // could have a reference to itself captured by the call, we would need to
4016  // account for cycles in doing so.
4017  SmallVector<const User *> AllocaUsers;
4019  auto pushUsers = [&](const Instruction &I) {
4020  for (const User *U : I.users()) {
4021  if (Visited.insert(U).second)
4022  AllocaUsers.push_back(U);
4023  }
4024  };
4025  pushUsers(*AI);
4026  while (!AllocaUsers.empty()) {
4027  auto *UserI = cast<Instruction>(AllocaUsers.pop_back_val());
4028  if (isa<BitCastInst>(UserI) || isa<GetElementPtrInst>(UserI) ||
4029  isa<AddrSpaceCastInst>(UserI)) {
4030  pushUsers(*UserI);
4031  continue;
4032  }
4033  if (UserI == CB)
4034  continue;
4035  // TODO: support lifetime.start/end here
4036  return false;
4037  }
4038  return true;
4039 }
4040 
4041 /// Try to move the specified instruction from its current block into the
4042 /// beginning of DestBlock, which can only happen if it's safe to move the
4043 /// instruction past all of the instructions between it and the end of its
4044 /// block.
4046  TargetLibraryInfo &TLI) {
4047  BasicBlock *SrcBlock = I->getParent();
4048 
4049  // Cannot move control-flow-involving, volatile loads, vaarg, etc.
4050  if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
4051  I->isTerminator())
4052  return false;
4053 
4054  // Do not sink static or dynamic alloca instructions. Static allocas must
4055  // remain in the entry block, and dynamic allocas must not be sunk in between
4056  // a stacksave / stackrestore pair, which would incorrectly shorten its
4057  // lifetime.
4058  if (isa<AllocaInst>(I))
4059  return false;
4060 
4061  // Do not sink into catchswitch blocks.
4062  if (isa<CatchSwitchInst>(DestBlock->getTerminator()))
4063  return false;
4064 
4065  // Do not sink convergent call instructions.
4066  if (auto *CI = dyn_cast<CallInst>(I)) {
4067  if (CI->isConvergent())
4068  return false;
4069  }
4070 
4071  // Unless we can prove that the memory write isn't visibile except on the
4072  // path we're sinking to, we must bail.
4073  if (I->mayWriteToMemory()) {
4074  if (!SoleWriteToDeadLocal(I, TLI))
4075  return false;
4076  }
4077 
4078  // We can only sink load instructions if there is nothing between the load and
4079  // the end of block that could change the value.
4080  if (I->mayReadFromMemory()) {
4081  // We don't want to do any sophisticated alias analysis, so we only check
4082  // the instructions after I in I's parent block if we try to sink to its
4083  // successor block.
4084  if (DestBlock->getUniquePredecessor() != I->getParent())
4085  return false;
4086  for (BasicBlock::iterator Scan = std::next(I->getIterator()),
4087  E = I->getParent()->end();
4088  Scan != E; ++Scan)
4089  if (Scan->mayWriteToMemory())
4090  return false;
4091  }
4092 
4093  I->dropDroppableUses([DestBlock](const Use *U) {
4094  if (auto *I = dyn_cast<Instruction>(U->getUser()))
4095  return I->getParent() != DestBlock;
4096  return true;
4097  });
4098  /// FIXME: We could remove droppable uses that are not dominated by
4099  /// the new position.
4100 
4101  BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
4102  I->moveBefore(&*InsertPos);
4103  ++NumSunkInst;
4104 
4105  // Also sink all related debug uses from the source basic block. Otherwise we
4106  // get debug use before the def. Attempt to salvage debug uses first, to
4107  // maximise the range variables have location for. If we cannot salvage, then
4108  // mark the location undef: we know it was supposed to receive a new location
4109  // here, but that computation has been sunk.
4111  findDbgUsers(DbgUsers, I);
4112  // Process the sinking DbgUsers in reverse order, as we only want to clone the
4113  // last appearing debug intrinsic for each given variable.
4115  for (DbgVariableIntrinsic *DVI : DbgUsers)
4116  if (DVI->getParent() == SrcBlock)
4117  DbgUsersToSink.push_back(DVI);
4118  llvm::sort(DbgUsersToSink,
4119  [](auto *A, auto *B) { return B->comesBefore(A); });
4120 
4122  SmallSet<DebugVariable, 4> SunkVariables;
4123  for (auto *User : DbgUsersToSink) {
4124  // A dbg.declare instruction should not be cloned, since there can only be
4125  // one per variable fragment. It should be left in the original place
4126  // because the sunk instruction is not an alloca (otherwise we could not be
4127  // here).
4128  if (isa<DbgDeclareInst>(User))
4129  continue;
4130 
4131  DebugVariable DbgUserVariable =
4132  DebugVariable(User->getVariable(), User->getExpression(),
4133  User->getDebugLoc()->getInlinedAt());
4134 
4135  if (!SunkVariables.insert(DbgUserVariable).second)
4136  continue;
4137 
4138  // Leave dbg.assign intrinsics in their original positions and there should
4139  // be no need to insert a clone.
4140  if (isa<DbgAssignIntrinsic>(User))
4141  continue;
4142 
4143  DIIClones.emplace_back(cast<DbgVariableIntrinsic>(User->clone()));
4144  if (isa<DbgDeclareInst>(User) && isa<CastInst>(I))
4145  DIIClones.back()->replaceVariableLocationOp(I, I->getOperand(0));
4146  LLVM_DEBUG(dbgs() << "CLONE: " << *DIIClones.back() << '\n');
4147  }
4148 
4149  // Perform salvaging without the clones, then sink the clones.
4150  if (!DIIClones.empty()) {
4151  salvageDebugInfoForDbgValues(*I, DbgUsers);
4152  // The clones are in reverse order of original appearance, reverse again to
4153  // maintain the original order.
4154  for (auto &DIIClone : llvm::reverse(DIIClones)) {
4155  DIIClone->insertBefore(&*InsertPos);
4156  LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone << '\n');
4157  }
4158  }
4159 
4160  return true;
4161 }
4162 
4164  while (!Worklist.isEmpty()) {
4165  // Walk deferred instructions in reverse order, and push them to the
4166  // worklist, which means they'll end up popped from the worklist in-order.
4167  while (Instruction *I = Worklist.popDeferred()) {
4168  // Check to see if we can DCE the instruction. We do this already here to
4169  // reduce the number of uses and thus allow other folds to trigger.
4170  // Note that eraseInstFromFunction() may push additional instructions on
4171  // the deferred worklist, so this will DCE whole instruction chains.
4172  if (isInstructionTriviallyDead(I, &TLI)) {
4173  eraseInstFromFunction(*I);
4174  ++NumDeadInst;
4175  continue;
4176  }
4177 
4178  Worklist.push(I);
4179  }
4180 
4181  Instruction *I = Worklist.removeOne();
4182  if (I == nullptr) continue; // skip null values.
4183 
4184  // Check to see if we can DCE the instruction.
4185  if (isInstructionTriviallyDead(I, &TLI)) {
4186  eraseInstFromFunction(*I);
4187  ++NumDeadInst;
4188  continue;
4189  }
4190 
4191  if (!DebugCounter::shouldExecute(VisitCounter))
4192  continue;
4193 
4194  // Instruction isn't dead, see if we can constant propagate it.
4195  if (!I->use_empty() &&
4196  (I->getNumOperands() == 0 || isa<Constant>(I->getOperand(0)))) {
4197  if (Constant *C = ConstantFoldInstruction(I, DL, &TLI)) {
4198  LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I
4199  << '\n');
4200 
4201  // Add operands to the worklist.
4202  replaceInstUsesWith(*I, C);
4203  ++NumConstProp;
4204  if (isInstructionTriviallyDead(I, &TLI))
4205  eraseInstFromFunction(*I);
4206  MadeIRChange = true;
4207  continue;
4208  }
4209  }
4210 
4211  // See if we can trivially sink this instruction to its user if we can
4212  // prove that the successor is not executed more frequently than our block.
4213  // Return the UserBlock if successful.
4214  auto getOptionalSinkBlockForInst =
4215  [this](Instruction *I) -> std::optional<BasicBlock *> {
4216  if (!EnableCodeSinking)
4217  return std::nullopt;
4218 
4219  BasicBlock *BB = I->getParent();
4220  BasicBlock *UserParent = nullptr;
4221  unsigned NumUsers = 0;
4222 
4223  for (auto *U : I->users()) {
4224  if (U->isDroppable())
4225  continue;
4226  if (NumUsers > MaxSinkNumUsers)
4227  return std::nullopt;
4228 
4229  Instruction *UserInst = cast<Instruction>(U);
4230  // Special handling for Phi nodes - get the block the use occurs in.
4231  if (PHINode *PN = dyn_cast<PHINode>(UserInst)) {
4232  for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
4233  if (PN->getIncomingValue(i) == I) {
4234  // Bail out if we have uses in different blocks. We don't do any
4235  // sophisticated analysis (i.e finding NearestCommonDominator of
4236  // these use blocks).
4237  if (UserParent && UserParent != PN->getIncomingBlock(i))
4238  return std::nullopt;
4239  UserParent = PN->getIncomingBlock(i);
4240  }
4241  }
4242  assert(UserParent && "expected to find user block!");
4243  } else {
4244  if (UserParent && UserParent != UserInst->getParent())
4245  return std::nullopt;
4246  UserParent = UserInst->getParent();
4247  }
4248 
4249  // Make sure these checks are done only once, naturally we do the checks
4250  // the first time we get the userparent, this will save compile time.
4251  if (NumUsers == 0) {
4252  // Try sinking to another block. If that block is unreachable, then do
4253  // not bother. SimplifyCFG should handle it.
4254  if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
4255  return std::nullopt;
4256 
4257  auto *Term = UserParent->getTerminator();
4258  // See if the user is one of our successors that has only one
4259  // predecessor, so that we don't have to split the critical edge.
4260  // Another option where we can sink is a block that ends with a
4261  // terminator that does not pass control to other block (such as
4262  // return or unreachable or resume). In this case:
4263  // - I dominates the User (by SSA form);
4264  // - the User will be executed at most once.
4265  // So sinking I down to User is always profitable or neutral.
4266  if (UserParent->getUniquePredecessor() != BB && !succ_empty(Term))
4267  return std::nullopt;
4268 
4269  assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
4270  }
4271 
4272  NumUsers++;
4273  }
4274 
4275  // No user or only has droppable users.
4276  if (!UserParent)
4277  return std::nullopt;
4278 
4279  return UserParent;
4280  };
4281 
4282  auto OptBB = getOptionalSinkBlockForInst(I);
4283  if (OptBB) {
4284  auto *UserParent = *OptBB;
4285  // Okay, the CFG is simple enough, try to sink this instruction.
4286  if (TryToSinkInstruction(I, UserParent, TLI)) {
4287  LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
4288  MadeIRChange = true;
4289  // We'll add uses of the sunk instruction below, but since
4290  // sinking can expose opportunities for it's *operands* add
4291  // them to the worklist
4292  for (Use &U : I->operands())
4293  if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
4294  Worklist.push(OpI);
4295  }
4296  }
4297 
4298  // Now that we have an instruction, try combining it to simplify it.
4299  Builder.SetInsertPoint(I);
4300  Builder.CollectMetadataToCopy(
4301  I, {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
4302 
4303 #ifndef NDEBUG
4304  std::string OrigI;
4305 #endif
4306  LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
4307  LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
4308 
4309  if (Instruction *Result = visit(*I)) {
4310  ++NumCombined;
4311  // Should we replace the old instruction with a new one?
4312  if (Result != I) {
4313  LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
4314  << " New = " << *Result << '\n');
4315 
4316  Result->copyMetadata(*I,
4317  {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
4318  // Everything uses the new instruction now.
4319  I->replaceAllUsesWith(Result);
4320 
4321  // Move the name to the new instruction first.
4322  Result->takeName(I);
4323 
4324  // Insert the new instruction into the basic block...
4325  BasicBlock *InstParent = I->getParent();
4326  BasicBlock::iterator InsertPos = I->getIterator();
4327 
4328  // Are we replace a PHI with something that isn't a PHI, or vice versa?
4329  if (isa<PHINode>(Result) != isa<PHINode>(I)) {
4330  // We need to fix up the insertion point.
4331  if (isa<PHINode>(I)) // PHI -> Non-PHI
4332  InsertPos = InstParent->getFirstInsertionPt();
4333  else // Non-PHI -> PHI
4334  InsertPos = InstParent->getFirstNonPHI()->getIterator();
4335  }
4336 
4337  InstParent->getInstList().insert(InsertPos, Result);
4338 
4339  // Push the new instruction and any users onto the worklist.
4340  Worklist.pushUsersToWorkList(*Result);
4341  Worklist.push(Result);
4342 
4343  eraseInstFromFunction(*I);
4344  } else {
4345  LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
4346  << " New = " << *I << '\n');
4347 
4348  // If the instruction was modified, it's possible that it is now dead.
4349  // if so, remove it.
4350  if (isInstructionTriviallyDead(I, &TLI)) {
4351  eraseInstFromFunction(*I);
4352  } else {
4353  Worklist.pushUsersToWorkList(*I);
4354  Worklist.push(I);
4355  }
4356  }
4357  MadeIRChange = true;
4358  }
4359  }
4360 
4361  Worklist.zap();
4362  return MadeIRChange;
4363 }
4364 
4365 // Track the scopes used by !alias.scope and !noalias. In a function, a
4366 // @llvm.experimental.noalias.scope.decl is only useful if that scope is used
4367 // by both sets. If not, the declaration of the scope can be safely omitted.
4368 // The MDNode of the scope can be omitted as well for the instructions that are
4369 // part of this function. We do not do that at this point, as this might become
4370 // too time consuming to do.
4372  SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists;
4373  SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists;
4374 
4375 public:
4377  // This seems to be faster than checking 'mayReadOrWriteMemory()'.
4378  if (!I->hasMetadataOtherThanDebugLoc())
4379  return;
4380 
4381  auto Track = [](Metadata *ScopeList, auto &Container) {
4382  const auto *MDScopeList = dyn_cast_or_null<MDNode>(ScopeList);
4383  if (!MDScopeList || !Container.insert(MDScopeList).second)
4384  return;
4385  for (const auto &MDOperand : MDScopeList->operands())
4386  if (auto *MDScope = dyn_cast<MDNode>(MDOperand))
4387  Container.insert(MDScope);
4388  };
4389 
4390  Track(I->getMetadata(LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
4391  Track(I->getMetadata(LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
4392  }
4393 
4395  NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Inst);
4396  if (!Decl)
4397  return false;
4398 
4399  assert(Decl->use_empty() &&
4400  "llvm.experimental.noalias.scope.decl in use ?");
4401  const MDNode *MDSL = Decl->getScopeList();
4402  assert(MDSL->getNumOperands() == 1 &&
4403  "llvm.experimental.noalias.scope should refer to a single scope");
4404  auto &MDOperand = MDSL->getOperand(0);
4405  if (auto *MD = dyn_cast<MDNode>(MDOperand))
4406  return !UsedAliasScopesAndLists.contains(MD) ||
4407  !UsedNoAliasScopesAndLists.contains(MD);
4408 
4409  // Not an MDNode ? throw away.
4410  return true;
4411  }
4412 };
4413 
4414 /// Populate the IC worklist from a function, by walking it in depth-first
4415 /// order and adding all reachable code to the worklist.
4416 ///
4417 /// This has a couple of tricks to make the code faster and more powerful. In
4418 /// particular, we constant fold and DCE instructions as we go, to avoid adding
4419 /// them to the worklist (this significantly speeds up instcombine on code where
4420 /// many instructions are dead or constant). Additionally, if we find a branch
4421 /// whose condition is a known constant, we only visit the reachable successors.
4423  const TargetLibraryInfo *TLI,
4424  InstructionWorklist &ICWorklist) {
4425  bool MadeIRChange = false;
4428  Worklist.push_back(&F.front());
4429 
4430  SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
4431  DenseMap<Constant *, Constant *> FoldedConstants;
4432  AliasScopeTracker SeenAliasScopes;
4433 
4434  do {
4435  BasicBlock *BB = Worklist.pop_back_val();
4436 
4437  // We have now visited this block! If we've already been here, ignore it.
4438  if (!Visited.insert(BB).second)
4439  continue;
4440 
4441  for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
4442  // ConstantProp instruction if trivially constant.
4443  if (!Inst.use_empty() &&
4444  (Inst.getNumOperands() == 0 || isa<Constant>(Inst.getOperand(0))))
4445  if (Constant *C = ConstantFoldInstruction(&Inst, DL, TLI)) {
4446  LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
4447  << '\n');
4448  Inst.replaceAllUsesWith(C);
4449  ++NumConstProp;
4450  if (isInstructionTriviallyDead(&Inst, TLI))
4451  Inst.eraseFromParent();
4452  MadeIRChange = true;
4453  continue;
4454  }
4455 
4456  // See if we can constant fold its operands.
4457  for (Use &U : Inst.operands()) {
4458  if (!isa<ConstantVector>(U) && !isa<ConstantExpr>(U))
4459  continue;
4460 
4461  auto *C = cast<Constant>(U);
4462  Constant *&FoldRes = FoldedConstants[C];
4463  if (!FoldRes)
4464  FoldRes = ConstantFoldConstant(C, DL, TLI);
4465 
4466  if (FoldRes != C) {
4467  LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
4468  << "\n Old = " << *C
4469  << "\n New = " << *FoldRes << '\n');
4470  U = FoldRes;
4471  MadeIRChange = true;
4472  }
4473  }
4474 
4475  // Skip processing debug and pseudo intrinsics in InstCombine. Processing
4476  // these call instructions consumes non-trivial amount of time and
4477  // provides no value for the optimization.
4478  if (!Inst.isDebugOrPseudoInst()) {
4479  InstrsForInstructionWorklist.push_back(&Inst);
4480  SeenAliasScopes.analyse(&Inst);
4481  }
4482  }
4483 
4484  // Recursively visit successors. If this is a branch or switch on a
4485  // constant, only visit the reachable successor.
4486  Instruction *TI = BB->getTerminator();
4487  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
4488  if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
4489  bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
4490  BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
4491  Worklist.push_back(ReachableBB);
4492  continue;
4493  }
4494  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
4495  if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
4496  Worklist.push_back(SI->findCaseValue(Cond)->getCaseSuccessor());
4497  continue;
4498  }
4499  }
4500 
4501  append_range(Worklist, successors(TI));
4502  } while (!Worklist.empty());
4503 
4504  // Remove instructions inside unreachable blocks. This prevents the
4505  // instcombine code from having to deal with some bad special cases, and
4506  // reduces use counts of instructions.
4507  for (BasicBlock &BB : F) {
4508  if (Visited.count(&BB))
4509  continue;
4510 
4511  unsigned NumDeadInstInBB;
4512  unsigned NumDeadDbgInstInBB;
4513  std::tie(NumDeadInstInBB, NumDeadDbgInstInBB) =
4515 
4516  MadeIRChange |= NumDeadInstInBB + NumDeadDbgInstInBB > 0;
4517  NumDeadInst += NumDeadInstInBB;
4518  }
4519 
4520  // Once we've found all of the instructions to add to instcombine's worklist,
4521  // add them in reverse order. This way instcombine will visit from the top
4522  // of the function down. This jives well with the way that it adds all uses
4523  // of instructions to the worklist after doing a transformation, thus avoiding
4524  // some N^2 behavior in pathological cases.
4525  ICWorklist.reserve(InstrsForInstructionWorklist.size());
4526  for (Instruction *Inst : reverse(InstrsForInstructionWorklist)) {
4527  // DCE instruction if trivially dead. As we iterate in reverse program
4528  // order here, we will clean up whole chains of dead instructions.
4529  if (isInstructionTriviallyDead(Inst, TLI) ||
4530  SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
4531  ++NumDeadInst;
4532  LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
4533  salvageDebugInfo(*Inst);
4534  Inst->eraseFromParent();
4535  MadeIRChange = true;
4536  continue;
4537  }
4538 
4539  ICWorklist.push(Inst);
4540  }
4541 
4542  return MadeIRChange;
4543 }
4544 
4546  Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA,
4549  ProfileSummaryInfo *PSI, unsigned MaxIterations, LoopInfo *LI) {
4550  auto &DL = F.getParent()->getDataLayout();
4551  MaxIterations = std::min(MaxIterations, LimitMaxIterations.getValue());
4552 
4553  /// Builder - This is an IRBuilder that automatically inserts new
4554  /// instructions into the worklist when they are created.
4556  F.getContext(), TargetFolder(DL),
4557  IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) {
4558  Worklist.add(I);
4559  if (auto *Assume = dyn_cast<AssumeInst>(I))
4560  AC.registerAssumption(Assume);
4561  }));
4562 
4563  // Lower dbg.declare intrinsics otherwise their value may be clobbered
4564  // by instcombiner.
4565  bool MadeIRChange = false;
4567  MadeIRChange = LowerDbgDeclare(F);
4568  // LowerDbgDeclare calls RemoveRedundantDbgInstrs, but LowerDbgDeclare will
4569  // almost never return true when running an assignment tracking build. Take
4570  // this opportunity to do some clean up for assignment tracking builds too.
4571  if (!MadeIRChange && getEnableAssignmentTracking()) {
4572  for (auto &BB : F)
4574  }
4575 
4576  // Iterate while there is work to do.
4577  unsigned Iteration = 0;
4578  while (true) {
4579  ++NumWorklistIterations;
4580  ++Iteration;
4581 
4582  if (Iteration > InfiniteLoopDetectionThreshold) {
4584  "Instruction Combining seems stuck in an infinite loop after " +
4585  Twine(InfiniteLoopDetectionThreshold) + " iterations.");
4586  }
4587 
4588  if (Iteration > MaxIterations) {
4589  LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << MaxIterations
4590  << " on " << F.getName()
4591  << " reached; stopping before reaching a fixpoint\n");
4592  break;
4593  }
4594 
4595  LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
4596  << F.getName() << "\n");
4597 
4598  MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist);
4599 
4600  InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
4601  ORE, BFI, PSI, DL, LI);
4603 
4604  if (!IC.run())
4605  break;
4606 
4607  MadeIRChange = true;
4608  }
4609 
4610  return MadeIRChange;
4611 }
4612 
4614 
4615 InstCombinePass::InstCombinePass(unsigned MaxIterations)
4616  : MaxIterations(MaxIterations) {}
4617 
4620  auto &AC = AM.getResult<AssumptionAnalysis>(F);
4621  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
4622  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
4624  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
4625 
4626  auto *LI = AM.getCachedResult<LoopAnalysis>(F);
4627 
4628  auto *AA = &AM.getResult<AAManager>(F);
4629  auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
4630  ProfileSummaryInfo *PSI =
4631  MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
4632  auto *BFI = (PSI && PSI->hasProfileSummary()) ?
4633  &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
4634 
4635  if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
4636  BFI, PSI, MaxIterations, LI))
4637  // No changes, all analyses are preserved.
4638  return PreservedAnalyses::all();
4639 
4640  // Mark all the analyses that instcombine updates as preserved.
4641  PreservedAnalyses PA;
4642  PA.preserveSet<CFGAnalyses>();
4643  return PA;
4644 }
4645 
4647  AU.setPreservesCFG();
4660 }
4661 
4663  if (skipFunction(F))
4664  return false;
4665 
4666  // Required analyses.
4667  auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
4668  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
4669  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
4670  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
4671  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
4672  auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
4673 
4674  // Optional analyses.
4675  auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
4676  auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
4677  ProfileSummaryInfo *PSI =
4678  &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
4680  (PSI && PSI->hasProfileSummary()) ?
4681  &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
4682  nullptr;
4683 
4684  return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
4685  BFI, PSI, MaxIterations, LI);
4686 }
4687 
4689 
4691  : FunctionPass(ID), MaxIterations(InstCombineDefaultMaxIterations) {
4693 }
4694 
4696  : FunctionPass(ID), MaxIterations(MaxIterations) {
4698 }
4699 
4701  "Combine redundant instructions", false, false)
4713 
4714 // Initialization Routines
4717 }
4718 
4721 }
4722 
4724  return new InstructionCombiningPass();
4725 }
4726 
4728  return new InstructionCombiningPass(MaxIterations);
4729 }
4730 
4733 }
llvm::EHPersonality::MSVC_CXX
@ MSVC_CXX
llvm::NoAliasScopeDeclInst
Definition: IntrinsicInst.h:1422
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::ExtractValueInst::hasIndices
bool hasIndices() const
Definition: Instructions.h:2517
llvm::Instruction::isTerminator
bool isTerminator() const
Definition: Instruction.h:172
llvm::OuterAnalysisManagerProxy
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1056
AssumptionCache.h
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:876
foldSelectGEP
static Instruction * foldSelectGEP(GetElementPtrInst &GEP, InstCombiner::BuilderTy &Builder)
Thread a GEP operation with constant indices through the constant true/false arms of a select.
Definition: InstructionCombining.cpp:1933
Attrs
Function Attrs
Definition: README_ALTIVEC.txt:215
llvm::Type::isSized
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:283
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2592
llvm::LandingPadInst::Create
static LandingPadInst * Create(Type *RetTy, unsigned NumReservedClauses, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedClauses is a hint for the number of incoming clauses that this landingpad w...
Definition: Instructions.cpp:216
llvm::Instruction::isAssociative
bool isAssociative() const LLVM_READONLY
Return true if the instruction is associative:
Definition: Instruction.cpp:792
llvm::GetElementPtrInst::setIsInBounds
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Definition: Instructions.cpp:1925
llvm::getReallocatedOperand
Value * getReallocatedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this is a call to a realloc function, return the reallocated operand.
Definition: MemoryBuiltins.cpp:351
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:109
llvm::simplifyGEPInst
Value * simplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef< Value * > Indices, bool InBounds, const SimplifyQuery &Q)
Given operands for a GetElementPtrInst, fold the result or return null.
Definition: InstructionSimplify.cpp:4802
llvm::APInt::sadd_ov
APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1926
llvm::InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic
std::optional< Value * > targetSimplifyDemandedVectorEltsIntrinsic(IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
Definition: InstructionCombining.cpp:193
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
prepareICWorklistFromFunction
static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, const TargetLibraryInfo *TLI, InstructionWorklist &ICWorklist)
Populate the IC worklist from a function, by walking it in depth-first order and adding all reachable...
Definition: InstructionCombining.cpp:4422
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:741
llvm::LandingPadInst::isCleanup
bool isCleanup() const
Return 'true' if this landingpad instruction is a cleanup.
Definition: Instructions.h:2996
llvm::Attribute::isValid
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:184
llvm::InvokeInst::Create
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3853
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:236
llvm::InstCombinerImpl::visitGetElementPtrInst
Instruction * visitGetElementPtrInst(GetElementPtrInst &GEP)
Definition: InstructionCombining.cpp:2250
foldOperationIntoSelectOperand
static Value * foldOperationIntoSelectOperand(Instruction &I, Value *SO, InstCombiner::BuilderTy &Builder)
Definition: InstructionCombining.cpp:1053
llvm::CastInst::CreatePointerBitCastOrAddrSpaceCast
static CastInst * CreatePointerBitCastOrAddrSpaceCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast or an AddrSpaceCast cast instruction.
Definition: Instructions.cpp:3470
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::ConvertDebugDeclareToDebugValue
void ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, StoreInst *SI, DIBuilder &Builder)
===------------------------------------------------------------------—===// Dbg Intrinsic utilities
Definition: Local.cpp:1501
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:69
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:741
llvm::isKnownNonNegative
bool isKnownNonNegative(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Returns true if the give value is known to be non-negative.
Definition: ValueTracking.cpp:322
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:387
llvm::AssumptionCache::registerAssumption
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
Definition: AssumptionCache.cpp:224
llvm::ReturnInst
Return a value (possibly void), from a function.
Definition: Instructions.h:3050
llvm::RecurKind::Or
@ Or
Bitwise or logical OR of integers.
llvm::PatternMatch::m_Mask
Definition: PatternMatch.h:1514
llvm::EHPersonality::GNU_C_SjLj
@ GNU_C_SjLj
leftDistributesOverRight
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
Definition: InstructionCombining.cpp:571
llvm::PHINode::incoming_values
op_range incoming_values()
Definition: Instructions.h:2783
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::Instruction::isBitwiseLogicOp
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
Definition: Instruction.h:220
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
llvm::Instruction::getInsertionPointAfterDef
Instruction * getInsertionPointAfterDef()
Get the first insertion point at which the result of this instruction is defined.
Definition: Instruction.cpp:130
llvm::InstCombinerImpl::visitBranchInst
Instruction * visitBranchInst(BranchInst &BI)
Definition: InstructionCombining.cpp:3150
Metadata.h
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
llvm::User::operands
op_range operands()
Definition: User.h:242
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::InstructionCombiningPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: InstructionCombining.cpp:4646
llvm::InstCombinerImpl::run
bool run()
Run the combiner over the entire worklist until it is empty.
Definition: InstructionCombining.cpp:4163
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
llvm::EHPersonality::GNU_Ada
@ GNU_Ada
IntrinsicInst.h
llvm::SimplifyQuery
Definition: InstructionSimplify.h:93
llvm::DIBuilder
Definition: DIBuilder.h:42
LLVMPassRegistryRef
struct LLVMOpaquePassRegistry * LLVMPassRegistryRef
Definition: Types.h:130
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
llvm::InstCombinerImpl::foldBinOpIntoSelectOrPhi
Instruction * foldBinOpIntoSelectOrPhi(BinaryOperator &I)
This is a convenience wrapper function for the above two functions.
Definition: InstructionCombining.cpp:1362
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
llvm::Attribute
Definition: Attributes.h:66
llvm::APInt::ule
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1120
TargetFolder.h
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:629
llvm::BasicBlock::instructionsWithoutDebug
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:103
llvm::PatternMatch::m_SplatOrUndefMask
Definition: PatternMatch.h:1535
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5253
llvm::InstCombinerImpl::visitExtractValueInst
Instruction * visitExtractValueInst(ExtractValueInst &EV)
Definition: InstructionCombining.cpp:3315
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::ProfileSummaryInfo::hasProfileSummary
bool hasProfileSummary() const
Returns true if profile summary is available.
Definition: ProfileSummaryInfo.h:69
GetElementPtrTypeIterator.h
llvm::isGuaranteedNotToBeUndefOrPoison
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
Definition: ValueTracking.cpp:5440
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:328
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::TargetTransformInfo::instCombineIntrinsic
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
Definition: TargetTransformInfo.cpp:313
Statistic.h
llvm::PatternMatch::m_SpecificMask
Definition: PatternMatch.h:1529
llvm::LandingPadInst
The landingpad instruction holds all of the information necessary to generate correct exception handl...
Definition: Instructions.h:2949
llvm::PatternMatch::m_Add
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:979
llvm::enumerate
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:2263
llvm::FunctionPass::skipFunction
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:174
ErrorHandling.h
llvm::PatternMatch::m_CombineOr
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:218
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:173
LazyBlockFrequencyInfo.h
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
llvm::Function::getEntryBlock
const BasicBlock & getEntryBlock() const
Definition: Function.h:691
llvm::IRBuilder< TargetFolder, IRBuilderCallbackInserter >
llvm::getEnableAssignmentTracking
bool getEnableAssignmentTracking()
Return true if assignment tracking is enabled.
Definition: DebugInfo.cpp:49
llvm::InsertValueInst::Create
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:2594
getBinOpsForFactorization
static Instruction::BinaryOps getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op, Value *&LHS, Value *&RHS)
This function predicates factorization using distributive laws.
Definition: InstructionCombining.cpp:620
llvm::CastInst::Create
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition: Instructions.cpp:3341
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:682
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:742
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:834
llvm::findDbgUsers
void findDbgUsers(SmallVectorImpl< DbgVariableIntrinsic * > &DbgInsts, Value *V)
Finds the debug info intrinsics describing a value.
Definition: DebugInfo.cpp:112
llvm::InstCombinerImpl::foldUsingDistributiveLaws
Value * foldUsingDistributiveLaws(BinaryOperator &I)
Tries to simplify binary operations which some other binary operation distributes over.
Definition: InstructionCombining.cpp:779
llvm::ConstantRange::getEquivalentICmp
bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
Definition: ConstantRange.cpp:234
ValueTracking.h
Local.h
llvm::TargetTransformInfo::simplifyDemandedVectorEltsIntrinsic
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Can be used to implement target-specific instruction combining.
Definition: TargetTransformInfo.cpp:325
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
GlobalsModRef.h
llvm::InstructionWorklist::reserve
void reserve(size_t Size)
Definition: InstructionWorklist.h:79
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::NoAliasScopeDeclInst::getScopeList
MDNode * getScopeList() const
Definition: IntrinsicInst.h:1432
llvm::PatternMatch::m_Br
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
Definition: PatternMatch.h:1731
llvm::InstCombiner::MaxArraySizeForCombine
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
Definition: InstCombiner.h:53
APInt.h
llvm::InstCombinerImpl::freezeOtherUses
bool freezeOtherUses(FreezeInst &FI)
Definition: InstructionCombining.cpp:3896
llvm::LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
Definition: LazyBlockFrequencyInfo.cpp:62
llvm::InstructionCombiningPass::InstructionCombiningPass
InstructionCombiningPass()
Definition: InstructionCombining.cpp:4690
llvm::simplifyInstructionWithOperands
Value * simplifyInstructionWithOperands(Instruction *I, ArrayRef< Value * > NewOps, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
Like simplifyInstruction but the operands of I are replaced with NewOps.
Definition: InstructionSimplify.cpp:6591
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
NL
#define NL
Definition: DetailedRecordsBackend.cpp:31
DenseMap.h
llvm::KnownBits::getConstant
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:57
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1431
llvm::InstCombinerImpl::visitAllocSite
Instruction * visitAllocSite(Instruction &FI)
Definition: InstructionCombining.cpp:2821
MemoryBuiltins.h
tryFactorization
static Value * tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ, InstCombiner::BuilderTy &Builder, Instruction::BinaryOps InnerOpcode, Value *A, Value *B, Value *C, Value *D)
This tries to simplify binary operations by factorizing out common terms (e.
Definition: InstructionCombining.cpp:639
llvm::AttributeList
Definition: Attributes.h:431
llvm::CallBase::getAttributes
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1475
llvm::Instruction::isShift
bool isShift() const
Definition: Instruction.h:176
EHPersonalities.h
InfiniteLoopDetectionThreshold
static cl::opt< unsigned > InfiniteLoopDetectionThreshold("instcombine-infinite-loop-threshold", cl::desc("Number of instruction combining iterations considered an " "infinite loop"), cl::init(InstCombineDefaultInfiniteLoopThreshold), cl::Hidden)
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:136
llvm::ExtractValueInst::indices
iterator_range< idx_iterator > indices() const
Definition: Instructions.h:2495
llvm::InstCombiner::targetInstCombineIntrinsic
std::optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
Definition: InstructionCombining.cpp:174
llvm::MemIntrinsic
This is the common base class for memset/memcpy/memmove.
Definition: IntrinsicInst.h:1042
llvm::Optional
Definition: APInt.h:33
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
llvm::SmallPtrSet< Value *, 16 >
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::PatternMatch::m_AShr
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1123
isMustTailCall
static bool isMustTailCall(Value *V)
Definition: InstructionCombining.cpp:3064
Operator.h
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:29
llvm::InstCombinerImpl::Descale
Value * Descale(Value *Val, APInt Scale, bool &NoSignedWrap)
Returns a value X such that Val = X * Scale, or null if none.
Definition: InstructionCombining.cpp:1410
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::BasicBlock::getSinglePredecessor
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:285
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
LLVMInitializeInstCombine
void LLVMInitializeInstCombine(LLVMPassRegistryRef R)
Definition: InstructionCombining.cpp:4719
llvm::ArrayType
Class to represent array types.
Definition: DerivedTypes.h:357
llvm::BinaryOperator::CreateNeg
NUW NUW NUW NUW Exact static Exact BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", Instruction *InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
Definition: Instructions.cpp:2953
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:123
llvm::PatternMatch::m_Not
BinaryOp_match< ValTy, cst_pred_ty< is_all_ones >, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
Definition: PatternMatch.h:2289
llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:21
BaseType
llvm::succ_empty
bool succ_empty(const Instruction *I)
Definition: CFG.h:255
isRemovableWrite
static bool isRemovableWrite(CallBase &CB, Value *UsedV, const TargetLibraryInfo &TLI)
Given a call CB which uses an address UsedV, return true if we can prove the call's only possible eff...
Definition: InstructionCombining.cpp:2697
BasicAliasAnalysis.h
LegacyPassManager.h
llvm::Type::getNonOpaquePointerElementType
Type * getNonOpaquePointerElementType() const
Only use this method in code that is not reachable with opaque pointers, or part of deprecated method...
Definition: Type.h:401
ConstantFolding.h
llvm::EmitGEPOffset
Value * EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP, bool NoAssumptions=false)
Given a getelementptr instruction/constantexpr, emit the code necessary to compute the offset from th...
Definition: Local.h:29
Use.h
llvm::EHPersonality::GNU_ObjC
@ GNU_ObjC
llvm::APInt::isMinValue
bool isMinValue() const
Determine if this is the smallest unsigned value.
Definition: APInt.h:402
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::InstCombinePass::InstCombinePass
InstCombinePass()
Definition: InstructionCombining.cpp:4613
llvm::InstructionWorklist::add
void add(Instruction *I)
Add instruction to the worklist.
Definition: InstructionWorklist.h:44
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::BranchInst::swapSuccessors
void swapSuccessors()
Swap the successors of this branch instruction.
Definition: Instructions.cpp:1418
llvm::X86AS::SS
@ SS
Definition: X86.h:201
llvm::InstCombinerImpl::SimplifySelectsFeedingBinaryOp
Value * SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS, Value *RHS)
Definition: InstructionCombining.cpp:871
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::initializeInstCombine
void initializeInstCombine(PassRegistry &)
Initialize all passes linked into the InstCombine library.
Definition: InstructionCombining.cpp:4715
KnownBits.h
llvm::LazyBlockFrequencyInfoPass
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
Definition: LazyBlockFrequencyInfo.h:98
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::simplifyExtractValueInst
Value * simplifyExtractValueInst(Value *Agg, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an ExtractValueInst, fold the result or return null.
Definition: InstructionSimplify.cpp:4902
combineInstructionsOverFunction
static bool combineInstructionsOverFunction(Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, unsigned MaxIterations, LoopInfo *LI)
Definition: InstructionCombining.cpp:4545
llvm::gep_type_end
gep_type_iterator gep_type_end(const User *GEP)
Definition: GetElementPtrTypeIterator.h:130
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1298
llvm::InstCombinerImpl::visitUnreachableInst
Instruction * visitUnreachableInst(UnreachableInst &I)
Definition: InstructionCombining.cpp:3094
llvm::UndefMaskElem
constexpr int UndefMaskElem
Definition: Instructions.h:1994
AliasAnalysis.h
llvm::PatternMatch::m_OneUse
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ConstantExpr::getSub
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2637
llvm::InstCombinerImpl::tryFactorizationFolds
Value * tryFactorizationFolds(BinaryOperator &I)
This tries to simplify binary operations by factorizing out common terms (e.
Definition: InstructionCombining.cpp:736
llvm::ConstantExpr::get
static Constant * get(unsigned Opcode, Constant *C1, Constant *C2, unsigned Flags=0, Type *OnlyIfReducedTy=nullptr)
get - Return a binary or shift operator constant expression, folding if possible.
Definition: Constants.cpp:2236
llvm::isAllocationFn
bool isAllocationFn(const Value *V, const TargetLibraryInfo *TLI)
Tests if a value is a call or invoke to a library function that allocates or reallocates memory (eith...
Definition: MemoryBuiltins.cpp:296