LLVM  13.0.0git
Reassociate.cpp
Go to the documentation of this file.
1 //===- Reassociate.cpp - Reassociate binary expressions -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass reassociates commutative expressions in an order that is designed
10 // to promote better constant propagation, GCSE, LICM, PRE, etc.
11 //
12 // For example: 4 + (x + 5) -> x + (4 + 5)
13 //
14 // In the implementation of this algorithm, constants are assigned rank = 0,
15 // function arguments are rank = 1, and other values are assigned ranks
16 // corresponding to the reverse post order traversal of current function
17 // (starting at 2), which effectively gives values in deep loops higher rank
18 // than values not in loops.
19 //
20 //===----------------------------------------------------------------------===//
21 
23 #include "llvm/ADT/APFloat.h"
24 #include "llvm/ADT/APInt.h"
25 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/SetVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
35 #include "llvm/IR/Argument.h"
36 #include "llvm/IR/BasicBlock.h"
37 #include "llvm/IR/CFG.h"
38 #include "llvm/IR/Constant.h"
39 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/Function.h"
41 #include "llvm/IR/IRBuilder.h"
42 #include "llvm/IR/InstrTypes.h"
43 #include "llvm/IR/Instruction.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/IntrinsicInst.h"
46 #include "llvm/IR/Operator.h"
47 #include "llvm/IR/PassManager.h"
48 #include "llvm/IR/PatternMatch.h"
49 #include "llvm/IR/Type.h"
50 #include "llvm/IR/User.h"
51 #include "llvm/IR/Value.h"
52 #include "llvm/IR/ValueHandle.h"
53 #include "llvm/InitializePasses.h"
54 #include "llvm/Pass.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/Debug.h"
59 #include "llvm/Transforms/Scalar.h"
61 #include <algorithm>
62 #include <cassert>
63 #include <utility>
64 
65 using namespace llvm;
66 using namespace reassociate;
67 using namespace PatternMatch;
68 
69 #define DEBUG_TYPE "reassociate"
70 
71 STATISTIC(NumChanged, "Number of insts reassociated");
72 STATISTIC(NumAnnihil, "Number of expr tree annihilated");
73 STATISTIC(NumFactor , "Number of multiplies factored");
74 
75 #ifndef NDEBUG
76 /// Print out the expression identified in the Ops list.
78  Module *M = I->getModule();
79  dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
80  << *Ops[0].Op->getType() << '\t';
81  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
82  dbgs() << "[ ";
83  Ops[i].Op->printAsOperand(dbgs(), false, M);
84  dbgs() << ", #" << Ops[i].Rank << "] ";
85  }
86 }
87 #endif
88 
89 /// Utility class representing a non-constant Xor-operand. We classify
90 /// non-constant Xor-Operands into two categories:
91 /// C1) The operand is in the form "X & C", where C is a constant and C != ~0
92 /// C2)
93 /// C2.1) The operand is in the form of "X | C", where C is a non-zero
94 /// constant.
95 /// C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this
96 /// operand as "E | 0"
98 public:
99  XorOpnd(Value *V);
100 
101  bool isInvalid() const { return SymbolicPart == nullptr; }
102  bool isOrExpr() const { return isOr; }
103  Value *getValue() const { return OrigVal; }
104  Value *getSymbolicPart() const { return SymbolicPart; }
105  unsigned getSymbolicRank() const { return SymbolicRank; }
106  const APInt &getConstPart() const { return ConstPart; }
107 
108  void Invalidate() { SymbolicPart = OrigVal = nullptr; }
109  void setSymbolicRank(unsigned R) { SymbolicRank = R; }
110 
111 private:
112  Value *OrigVal;
113  Value *SymbolicPart;
114  APInt ConstPart;
115  unsigned SymbolicRank;
116  bool isOr;
117 };
118 
120  assert(!isa<ConstantInt>(V) && "No ConstantInt");
121  OrigVal = V;
122  Instruction *I = dyn_cast<Instruction>(V);
123  SymbolicRank = 0;
124 
125  if (I && (I->getOpcode() == Instruction::Or ||
126  I->getOpcode() == Instruction::And)) {
127  Value *V0 = I->getOperand(0);
128  Value *V1 = I->getOperand(1);
129  const APInt *C;
130  if (match(V0, m_APInt(C)))
131  std::swap(V0, V1);
132 
133  if (match(V1, m_APInt(C))) {
134  ConstPart = *C;
135  SymbolicPart = V0;
136  isOr = (I->getOpcode() == Instruction::Or);
137  return;
138  }
139  }
140 
141  // view the operand as "V | 0"
142  SymbolicPart = V;
143  ConstPart = APInt::getNullValue(V->getType()->getScalarSizeInBits());
144  isOr = true;
145 }
146 
147 /// Return true if V is an instruction of the specified opcode and if it
148 /// only has one use.
149 static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
150  auto *I = dyn_cast<Instruction>(V);
151  if (I && I->hasOneUse() && I->getOpcode() == Opcode)
152  if (!isa<FPMathOperator>(I) || I->isFast())
153  return cast<BinaryOperator>(I);
154  return nullptr;
155 }
156 
157 static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
158  unsigned Opcode2) {
159  auto *I = dyn_cast<Instruction>(V);
160  if (I && I->hasOneUse() &&
161  (I->getOpcode() == Opcode1 || I->getOpcode() == Opcode2))
162  if (!isa<FPMathOperator>(I) || I->isFast())
163  return cast<BinaryOperator>(I);
164  return nullptr;
165 }
166 
167 void ReassociatePass::BuildRankMap(Function &F,
169  unsigned Rank = 2;
170 
171  // Assign distinct ranks to function arguments.
172  for (auto &Arg : F.args()) {
173  ValueRankMap[&Arg] = ++Rank;
174  LLVM_DEBUG(dbgs() << "Calculated Rank[" << Arg.getName() << "] = " << Rank
175  << "\n");
176  }
177 
178  // Traverse basic blocks in ReversePostOrder.
179  for (BasicBlock *BB : RPOT) {
180  unsigned BBRank = RankMap[BB] = ++Rank << 16;
181 
182  // Walk the basic block, adding precomputed ranks for any instructions that
183  // we cannot move. This ensures that the ranks for these instructions are
184  // all different in the block.
185  for (Instruction &I : *BB)
186  if (mayBeMemoryDependent(I))
187  ValueRankMap[&I] = ++BBRank;
188  }
189 }
190 
191 unsigned ReassociatePass::getRank(Value *V) {
192  Instruction *I = dyn_cast<Instruction>(V);
193  if (!I) {
194  if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument.
195  return 0; // Otherwise it's a global or constant, rank 0.
196  }
197 
198  if (unsigned Rank = ValueRankMap[I])
199  return Rank; // Rank already known?
200 
201  // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that
202  // we can reassociate expressions for code motion! Since we do not recurse
203  // for PHI nodes, we cannot have infinite recursion here, because there
204  // cannot be loops in the value graph that do not go through PHI nodes.
205  unsigned Rank = 0, MaxRank = RankMap[I->getParent()];
206  for (unsigned i = 0, e = I->getNumOperands(); i != e && Rank != MaxRank; ++i)
207  Rank = std::max(Rank, getRank(I->getOperand(i)));
208 
209  // If this is a 'not' or 'neg' instruction, do not count it for rank. This
210  // assures us that X and ~X will have the same rank.
211  if (!match(I, m_Not(m_Value())) && !match(I, m_Neg(m_Value())) &&
212  !match(I, m_FNeg(m_Value())))
213  ++Rank;
214 
215  LLVM_DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank
216  << "\n");
217 
218  return ValueRankMap[I] = Rank;
219 }
220 
221 // Canonicalize constants to RHS. Otherwise, sort the operands by rank.
222 void ReassociatePass::canonicalizeOperands(Instruction *I) {
223  assert(isa<BinaryOperator>(I) && "Expected binary operator.");
224  assert(I->isCommutative() && "Expected commutative operator.");
225 
226  Value *LHS = I->getOperand(0);
227  Value *RHS = I->getOperand(1);
228  if (LHS == RHS || isa<Constant>(RHS))
229  return;
230  if (isa<Constant>(LHS) || getRank(RHS) < getRank(LHS))
231  cast<BinaryOperator>(I)->swapOperands();
232 }
233 
234 static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name,
235  Instruction *InsertBefore, Value *FlagsOp) {
236  if (S1->getType()->isIntOrIntVectorTy())
237  return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore);
238  else {
239  BinaryOperator *Res =
240  BinaryOperator::CreateFAdd(S1, S2, Name, InsertBefore);
241  Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
242  return Res;
243  }
244 }
245 
246 static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name,
247  Instruction *InsertBefore, Value *FlagsOp) {
248  if (S1->getType()->isIntOrIntVectorTy())
249  return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore);
250  else {
251  BinaryOperator *Res =
252  BinaryOperator::CreateFMul(S1, S2, Name, InsertBefore);
253  Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
254  return Res;
255  }
256 }
257 
258 static Instruction *CreateNeg(Value *S1, const Twine &Name,
259  Instruction *InsertBefore, Value *FlagsOp) {
260  if (S1->getType()->isIntOrIntVectorTy())
261  return BinaryOperator::CreateNeg(S1, Name, InsertBefore);
262 
263  if (auto *FMFSource = dyn_cast<Instruction>(FlagsOp))
264  return UnaryOperator::CreateFNegFMF(S1, FMFSource, Name, InsertBefore);
265 
266  return UnaryOperator::CreateFNeg(S1, Name, InsertBefore);
267 }
268 
269 /// Replace 0-X with X*-1.
271  assert((isa<UnaryOperator>(Neg) || isa<BinaryOperator>(Neg)) &&
272  "Expected a Negate!");
273  // FIXME: It's not safe to lower a unary FNeg into a FMul by -1.0.
274  unsigned OpNo = isa<BinaryOperator>(Neg) ? 1 : 0;
275  Type *Ty = Neg->getType();
276  Constant *NegOne = Ty->isIntOrIntVectorTy() ?
278 
279  BinaryOperator *Res = CreateMul(Neg->getOperand(OpNo), NegOne, "", Neg, Neg);
280  Neg->setOperand(OpNo, Constant::getNullValue(Ty)); // Drop use of op.
281  Res->takeName(Neg);
282  Neg->replaceAllUsesWith(Res);
283  Res->setDebugLoc(Neg->getDebugLoc());
284  return Res;
285 }
286 
287 /// Returns k such that lambda(2^Bitwidth) = 2^k, where lambda is the Carmichael
288 /// function. This means that x^(2^k) === 1 mod 2^Bitwidth for
289 /// every odd x, i.e. x^(2^k) = 1 for every odd x in Bitwidth-bit arithmetic.
290 /// Note that 0 <= k < Bitwidth, and if Bitwidth > 3 then x^(2^k) = 0 for every
291 /// even x in Bitwidth-bit arithmetic.
292 static unsigned CarmichaelShift(unsigned Bitwidth) {
293  if (Bitwidth < 3)
294  return Bitwidth - 1;
295  return Bitwidth - 2;
296 }
297 
298 /// Add the extra weight 'RHS' to the existing weight 'LHS',
299 /// reducing the combined weight using any special properties of the operation.
300 /// The existing weight LHS represents the computation X op X op ... op X where
301 /// X occurs LHS times. The combined weight represents X op X op ... op X with
302 /// X occurring LHS + RHS times. If op is "Xor" for example then the combined
303 /// operation is equivalent to X if LHS + RHS is odd, or 0 if LHS + RHS is even;
304 /// the routine returns 1 in LHS in the first case, and 0 in LHS in the second.
305 static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) {
306  // If we were working with infinite precision arithmetic then the combined
307  // weight would be LHS + RHS. But we are using finite precision arithmetic,
308  // and the APInt sum LHS + RHS may not be correct if it wraps (it is correct
309  // for nilpotent operations and addition, but not for idempotent operations
310  // and multiplication), so it is important to correctly reduce the combined
311  // weight back into range if wrapping would be wrong.
312 
313  // If RHS is zero then the weight didn't change.
314  if (RHS.isMinValue())
315  return;
316  // If LHS is zero then the combined weight is RHS.
317  if (LHS.isMinValue()) {
318  LHS = RHS;
319  return;
320  }
321  // From this point on we know that neither LHS nor RHS is zero.
322 
323  if (Instruction::isIdempotent(Opcode)) {
324  // Idempotent means X op X === X, so any non-zero weight is equivalent to a
325  // weight of 1. Keeping weights at zero or one also means that wrapping is
326  // not a problem.
327  assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
328  return; // Return a weight of 1.
329  }
330  if (Instruction::isNilpotent(Opcode)) {
331  // Nilpotent means X op X === 0, so reduce weights modulo 2.
332  assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
333  LHS = 0; // 1 + 1 === 0 modulo 2.
334  return;
335  }
336  if (Opcode == Instruction::Add || Opcode == Instruction::FAdd) {
337  // TODO: Reduce the weight by exploiting nsw/nuw?
338  LHS += RHS;
339  return;
340  }
341 
342  assert((Opcode == Instruction::Mul || Opcode == Instruction::FMul) &&
343  "Unknown associative operation!");
344  unsigned Bitwidth = LHS.getBitWidth();
345  // If CM is the Carmichael number then a weight W satisfying W >= CM+Bitwidth
346  // can be replaced with W-CM. That's because x^W=x^(W-CM) for every Bitwidth
347  // bit number x, since either x is odd in which case x^CM = 1, or x is even in
348  // which case both x^W and x^(W - CM) are zero. By subtracting off multiples
349  // of CM like this weights can always be reduced to the range [0, CM+Bitwidth)
350  // which by a happy accident means that they can always be represented using
351  // Bitwidth bits.
352  // TODO: Reduce the weight by exploiting nsw/nuw? (Could do much better than
353  // the Carmichael number).
354  if (Bitwidth > 3) {
355  /// CM - The value of Carmichael's lambda function.
356  APInt CM = APInt::getOneBitSet(Bitwidth, CarmichaelShift(Bitwidth));
357  // Any weight W >= Threshold can be replaced with W - CM.
358  APInt Threshold = CM + Bitwidth;
359  assert(LHS.ult(Threshold) && RHS.ult(Threshold) && "Weights not reduced!");
360  // For Bitwidth 4 or more the following sum does not overflow.
361  LHS += RHS;
362  while (LHS.uge(Threshold))
363  LHS -= CM;
364  } else {
365  // To avoid problems with overflow do everything the same as above but using
366  // a larger type.
367  unsigned CM = 1U << CarmichaelShift(Bitwidth);
368  unsigned Threshold = CM + Bitwidth;
369  assert(LHS.getZExtValue() < Threshold && RHS.getZExtValue() < Threshold &&
370  "Weights not reduced!");
371  unsigned Total = LHS.getZExtValue() + RHS.getZExtValue();
372  while (Total >= Threshold)
373  Total -= CM;
374  LHS = Total;
375  }
376 }
377 
378 using RepeatedValue = std::pair<Value*, APInt>;
379 
380 /// Given an associative binary expression, return the leaf
381 /// nodes in Ops along with their weights (how many times the leaf occurs). The
382 /// original expression is the same as
383 /// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times
384 /// op
385 /// (Ops[1].first op Ops[1].first op ... Ops[1].first) <- Ops[1].second times
386 /// op
387 /// ...
388 /// op
389 /// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times
390 ///
391 /// Note that the values Ops[0].first, ..., Ops[N].first are all distinct.
392 ///
393 /// This routine may modify the function, in which case it returns 'true'. The
394 /// changes it makes may well be destructive, changing the value computed by 'I'
395 /// to something completely different. Thus if the routine returns 'true' then
396 /// you MUST either replace I with a new expression computed from the Ops array,
397 /// or use RewriteExprTree to put the values back in.
398 ///
399 /// A leaf node is either not a binary operation of the same kind as the root
400 /// node 'I' (i.e. is not a binary operator at all, or is, but with a different
401 /// opcode), or is the same kind of binary operator but has a use which either
402 /// does not belong to the expression, or does belong to the expression but is
403 /// a leaf node. Every leaf node has at least one use that is a non-leaf node
404 /// of the expression, while for non-leaf nodes (except for the root 'I') every
405 /// use is a non-leaf node of the expression.
406 ///
407 /// For example:
408 /// expression graph node names
409 ///
410 /// + | I
411 /// / \ |
412 /// + + | A, B
413 /// / \ / \ |
414 /// * + * | C, D, E
415 /// / \ / \ / \ |
416 /// + * | F, G
417 ///
418 /// The leaf nodes are C, E, F and G. The Ops array will contain (maybe not in
419 /// that order) (C, 1), (E, 1), (F, 2), (G, 2).
420 ///
421 /// The expression is maximal: if some instruction is a binary operator of the
422 /// same kind as 'I', and all of its uses are non-leaf nodes of the expression,
423 /// then the instruction also belongs to the expression, is not a leaf node of
424 /// it, and its operands also belong to the expression (but may be leaf nodes).
425 ///
426 /// NOTE: This routine will set operands of non-leaf non-root nodes to undef in
427 /// order to ensure that every non-root node in the expression has *exactly one*
428 /// use by a non-leaf node of the expression. This destruction means that the
429 /// caller MUST either replace 'I' with a new expression or use something like
430 /// RewriteExprTree to put the values back in if the routine indicates that it
431 /// made a change by returning 'true'.
432 ///
433 /// In the above example either the right operand of A or the left operand of B
434 /// will be replaced by undef. If it is B's operand then this gives:
435 ///
436 /// + | I
437 /// / \ |
438 /// + + | A, B - operand of B replaced with undef
439 /// / \ \ |
440 /// * + * | C, D, E
441 /// / \ / \ / \ |
442 /// + * | F, G
443 ///
444 /// Note that such undef operands can only be reached by passing through 'I'.
445 /// For example, if you visit operands recursively starting from a leaf node
446 /// then you will never see such an undef operand unless you get back to 'I',
447 /// which requires passing through a phi node.
448 ///
449 /// Note that this routine may also mutate binary operators of the wrong type
450 /// that have all uses inside the expression (i.e. only used by non-leaf nodes
451 /// of the expression) if it can turn them into binary operators of the right
452 /// type and thus make the expression bigger.
455  assert((isa<UnaryOperator>(I) || isa<BinaryOperator>(I)) &&
456  "Expected a UnaryOperator or BinaryOperator!");
457  LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
458  unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits();
459  unsigned Opcode = I->getOpcode();
460  assert(I->isAssociative() && I->isCommutative() &&
461  "Expected an associative and commutative operation!");
462 
463  // Visit all operands of the expression, keeping track of their weight (the
464  // number of paths from the expression root to the operand, or if you like
465  // the number of times that operand occurs in the linearized expression).
466  // For example, if I = X + A, where X = A + B, then I, X and B have weight 1
467  // while A has weight two.
468 
469  // Worklist of non-leaf nodes (their operands are in the expression too) along
470  // with their weights, representing a certain number of paths to the operator.
471  // If an operator occurs in the worklist multiple times then we found multiple
472  // ways to get to it.
473  SmallVector<std::pair<Instruction*, APInt>, 8> Worklist; // (Op, Weight)
474  Worklist.push_back(std::make_pair(I, APInt(Bitwidth, 1)));
475  bool Changed = false;
476 
477  // Leaves of the expression are values that either aren't the right kind of
478  // operation (eg: a constant, or a multiply in an add tree), or are, but have
479  // some uses that are not inside the expression. For example, in I = X + X,
480  // X = A + B, the value X has two uses (by I) that are in the expression. If
481  // X has any other uses, for example in a return instruction, then we consider
482  // X to be a leaf, and won't analyze it further. When we first visit a value,
483  // if it has more than one use then at first we conservatively consider it to
484  // be a leaf. Later, as the expression is explored, we may discover some more
485  // uses of the value from inside the expression. If all uses turn out to be
486  // from within the expression (and the value is a binary operator of the right
487  // kind) then the value is no longer considered to be a leaf, and its operands
488  // are explored.
489 
490  // Leaves - Keeps track of the set of putative leaves as well as the number of
491  // paths to each leaf seen so far.
492  using LeafMap = DenseMap<Value *, APInt>;
493  LeafMap Leaves; // Leaf -> Total weight so far.
494  SmallVector<Value *, 8> LeafOrder; // Ensure deterministic leaf output order.
495 
496 #ifndef NDEBUG
497  SmallPtrSet<Value *, 8> Visited; // For sanity checking the iteration scheme.
498 #endif
499  while (!Worklist.empty()) {
500  std::pair<Instruction*, APInt> P = Worklist.pop_back_val();
501  I = P.first; // We examine the operands of this binary operator.
502 
503  for (unsigned OpIdx = 0; OpIdx < I->getNumOperands(); ++OpIdx) { // Visit operands.
504  Value *Op = I->getOperand(OpIdx);
505  APInt Weight = P.second; // Number of paths to this operand.
506  LLVM_DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
507  assert(!Op->use_empty() && "No uses, so how did we get to it?!");
508 
509  // If this is a binary operation of the right kind with only one use then
510  // add its operands to the expression.
511  if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
512  assert(Visited.insert(Op).second && "Not first visit!");
513  LLVM_DEBUG(dbgs() << "DIRECT ADD: " << *Op << " (" << Weight << ")\n");
514  Worklist.push_back(std::make_pair(BO, Weight));
515  continue;
516  }
517 
518  // Appears to be a leaf. Is the operand already in the set of leaves?
519  LeafMap::iterator It = Leaves.find(Op);
520  if (It == Leaves.end()) {
521  // Not in the leaf map. Must be the first time we saw this operand.
522  assert(Visited.insert(Op).second && "Not first visit!");
523  if (!Op->hasOneUse()) {
524  // This value has uses not accounted for by the expression, so it is
525  // not safe to modify. Mark it as being a leaf.
526  LLVM_DEBUG(dbgs()
527  << "ADD USES LEAF: " << *Op << " (" << Weight << ")\n");
528  LeafOrder.push_back(Op);
529  Leaves[Op] = Weight;
530  continue;
531  }
532  // No uses outside the expression, try morphing it.
533  } else {
534  // Already in the leaf map.
535  assert(It != Leaves.end() && Visited.count(Op) &&
536  "In leaf map but not visited!");
537 
538  // Update the number of paths to the leaf.
539  IncorporateWeight(It->second, Weight, Opcode);
540 
541 #if 0 // TODO: Re-enable once PR13021 is fixed.
542  // The leaf already has one use from inside the expression. As we want
543  // exactly one such use, drop this new use of the leaf.
544  assert(!Op->hasOneUse() && "Only one use, but we got here twice!");
545  I->setOperand(OpIdx, UndefValue::get(I->getType()));
546  Changed = true;
547 
548  // If the leaf is a binary operation of the right kind and we now see
549  // that its multiple original uses were in fact all by nodes belonging
550  // to the expression, then no longer consider it to be a leaf and add
551  // its operands to the expression.
552  if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
553  LLVM_DEBUG(dbgs() << "UNLEAF: " << *Op << " (" << It->second << ")\n");
554  Worklist.push_back(std::make_pair(BO, It->second));
555  Leaves.erase(It);
556  continue;
557  }
558 #endif
559 
560  // If we still have uses that are not accounted for by the expression
561  // then it is not safe to modify the value.
562  if (!Op->hasOneUse())
563  continue;
564 
565  // No uses outside the expression, try morphing it.
566  Weight = It->second;
567  Leaves.erase(It); // Since the value may be morphed below.
568  }
569 
570  // At this point we have a value which, first of all, is not a binary
571  // expression of the right kind, and secondly, is only used inside the
572  // expression. This means that it can safely be modified. See if we
573  // can usefully morph it into an expression of the right kind.
574  assert((!isa<Instruction>(Op) ||
575  cast<Instruction>(Op)->getOpcode() != Opcode
576  || (isa<FPMathOperator>(Op) &&
577  !cast<Instruction>(Op)->isFast())) &&
578  "Should have been handled above!");
579  assert(Op->hasOneUse() && "Has uses outside the expression tree!");
580 
581  // If this is a multiply expression, turn any internal negations into
582  // multiplies by -1 so they can be reassociated.
583  if (Instruction *Tmp = dyn_cast<Instruction>(Op))
584  if ((Opcode == Instruction::Mul && match(Tmp, m_Neg(m_Value()))) ||
585  (Opcode == Instruction::FMul && match(Tmp, m_FNeg(m_Value())))) {
586  LLVM_DEBUG(dbgs()
587  << "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
588  Tmp = LowerNegateToMultiply(Tmp);
589  LLVM_DEBUG(dbgs() << *Tmp << '\n');
590  Worklist.push_back(std::make_pair(Tmp, Weight));
591  Changed = true;
592  continue;
593  }
594 
595  // Failed to morph into an expression of the right type. This really is
596  // a leaf.
597  LLVM_DEBUG(dbgs() << "ADD LEAF: " << *Op << " (" << Weight << ")\n");
598  assert(!isReassociableOp(Op, Opcode) && "Value was morphed?");
599  LeafOrder.push_back(Op);
600  Leaves[Op] = Weight;
601  }
602  }
603 
604  // The leaves, repeated according to their weights, represent the linearized
605  // form of the expression.
606  for (unsigned i = 0, e = LeafOrder.size(); i != e; ++i) {
607  Value *V = LeafOrder[i];
608  LeafMap::iterator It = Leaves.find(V);
609  if (It == Leaves.end())
610  // Node initially thought to be a leaf wasn't.
611  continue;
612  assert(!isReassociableOp(V, Opcode) && "Shouldn't be a leaf!");
613  APInt Weight = It->second;
614  if (Weight.isMinValue())
615  // Leaf already output or weight reduction eliminated it.
616  continue;
617  // Ensure the leaf is only output once.
618  It->second = 0;
619  Ops.push_back(std::make_pair(V, Weight));
620  }
621 
622  // For nilpotent operations or addition there may be no operands, for example
623  // because the expression was "X xor X" or consisted of 2^Bitwidth additions:
624  // in both cases the weight reduces to 0 causing the value to be skipped.
625  if (Ops.empty()) {
626  Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType());
627  assert(Identity && "Associative operation without identity!");
628  Ops.emplace_back(Identity, APInt(Bitwidth, 1));
629  }
630 
631  return Changed;
632 }
633 
634 /// Now that the operands for this expression tree are
635 /// linearized and optimized, emit them in-order.
636 void ReassociatePass::RewriteExprTree(BinaryOperator *I,
638  assert(Ops.size() > 1 && "Single values should be used directly!");
639 
640  // Since our optimizations should never increase the number of operations, the
641  // new expression can usually be written reusing the existing binary operators
642  // from the original expression tree, without creating any new instructions,
643  // though the rewritten expression may have a completely different topology.
644  // We take care to not change anything if the new expression will be the same
645  // as the original. If more than trivial changes (like commuting operands)
646  // were made then we are obliged to clear out any optional subclass data like
647  // nsw flags.
648 
649  /// NodesToRewrite - Nodes from the original expression available for writing
650  /// the new expression into.
651  SmallVector<BinaryOperator*, 8> NodesToRewrite;
652  unsigned Opcode = I->getOpcode();
653  BinaryOperator *Op = I;
654 
655  /// NotRewritable - The operands being written will be the leaves of the new
656  /// expression and must not be used as inner nodes (via NodesToRewrite) by
657  /// mistake. Inner nodes are always reassociable, and usually leaves are not
658  /// (if they were they would have been incorporated into the expression and so
659  /// would not be leaves), so most of the time there is no danger of this. But
660  /// in rare cases a leaf may become reassociable if an optimization kills uses
661  /// of it, or it may momentarily become reassociable during rewriting (below)
662  /// due it being removed as an operand of one of its uses. Ensure that misuse
663  /// of leaf nodes as inner nodes cannot occur by remembering all of the future
664  /// leaves and refusing to reuse any of them as inner nodes.
665  SmallPtrSet<Value*, 8> NotRewritable;
666  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
667  NotRewritable.insert(Ops[i].Op);
668 
669  // ExpressionChanged - Non-null if the rewritten expression differs from the
670  // original in some non-trivial way, requiring the clearing of optional flags.
671  // Flags are cleared from the operator in ExpressionChanged up to I inclusive.
672  BinaryOperator *ExpressionChanged = nullptr;
673  for (unsigned i = 0; ; ++i) {
674  // The last operation (which comes earliest in the IR) is special as both
675  // operands will come from Ops, rather than just one with the other being
676  // a subexpression.
677  if (i+2 == Ops.size()) {
678  Value *NewLHS = Ops[i].Op;
679  Value *NewRHS = Ops[i+1].Op;
680  Value *OldLHS = Op->getOperand(0);
681  Value *OldRHS = Op->getOperand(1);
682 
683  if (NewLHS == OldLHS && NewRHS == OldRHS)
684  // Nothing changed, leave it alone.
685  break;
686 
687  if (NewLHS == OldRHS && NewRHS == OldLHS) {
688  // The order of the operands was reversed. Swap them.
689  LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
690  Op->swapOperands();
691  LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
692  MadeChange = true;
693  ++NumChanged;
694  break;
695  }
696 
697  // The new operation differs non-trivially from the original. Overwrite
698  // the old operands with the new ones.
699  LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
700  if (NewLHS != OldLHS) {
701  BinaryOperator *BO = isReassociableOp(OldLHS, Opcode);
702  if (BO && !NotRewritable.count(BO))
703  NodesToRewrite.push_back(BO);
704  Op->setOperand(0, NewLHS);
705  }
706  if (NewRHS != OldRHS) {
707  BinaryOperator *BO = isReassociableOp(OldRHS, Opcode);
708  if (BO && !NotRewritable.count(BO))
709  NodesToRewrite.push_back(BO);
710  Op->setOperand(1, NewRHS);
711  }
712  LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
713 
714  ExpressionChanged = Op;
715  MadeChange = true;
716  ++NumChanged;
717 
718  break;
719  }
720 
721  // Not the last operation. The left-hand side will be a sub-expression
722  // while the right-hand side will be the current element of Ops.
723  Value *NewRHS = Ops[i].Op;
724  if (NewRHS != Op->getOperand(1)) {
725  LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
726  if (NewRHS == Op->getOperand(0)) {
727  // The new right-hand side was already present as the left operand. If
728  // we are lucky then swapping the operands will sort out both of them.
729  Op->swapOperands();
730  } else {
731  // Overwrite with the new right-hand side.
732  BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode);
733  if (BO && !NotRewritable.count(BO))
734  NodesToRewrite.push_back(BO);
735  Op->setOperand(1, NewRHS);
736  ExpressionChanged = Op;
737  }
738  LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
739  MadeChange = true;
740  ++NumChanged;
741  }
742 
743  // Now deal with the left-hand side. If this is already an operation node
744  // from the original expression then just rewrite the rest of the expression
745  // into it.
746  BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode);
747  if (BO && !NotRewritable.count(BO)) {
748  Op = BO;
749  continue;
750  }
751 
752  // Otherwise, grab a spare node from the original expression and use that as
753  // the left-hand side. If there are no nodes left then the optimizers made
754  // an expression with more nodes than the original! This usually means that
755  // they did something stupid but it might mean that the problem was just too
756  // hard (finding the mimimal number of multiplications needed to realize a
757  // multiplication expression is NP-complete). Whatever the reason, smart or
758  // stupid, create a new node if there are none left.
759  BinaryOperator *NewOp;
760  if (NodesToRewrite.empty()) {
761  Constant *Undef = UndefValue::get(I->getType());
763  Undef, Undef, "", I);
764  if (NewOp->getType()->isFPOrFPVectorTy())
765  NewOp->setFastMathFlags(I->getFastMathFlags());
766  } else {
767  NewOp = NodesToRewrite.pop_back_val();
768  }
769 
770  LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
771  Op->setOperand(0, NewOp);
772  LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
773  ExpressionChanged = Op;
774  MadeChange = true;
775  ++NumChanged;
776  Op = NewOp;
777  }
778 
779  // If the expression changed non-trivially then clear out all subclass data
780  // starting from the operator specified in ExpressionChanged, and compactify
781  // the operators to just before the expression root to guarantee that the
782  // expression tree is dominated by all of Ops.
783  if (ExpressionChanged)
784  do {
785  // Preserve FastMathFlags.
786  if (isa<FPMathOperator>(I)) {
787  FastMathFlags Flags = I->getFastMathFlags();
788  ExpressionChanged->clearSubclassOptionalData();
789  ExpressionChanged->setFastMathFlags(Flags);
790  } else
791  ExpressionChanged->clearSubclassOptionalData();
792 
793  if (ExpressionChanged == I)
794  break;
795 
796  // Discard any debug info related to the expressions that has changed (we
797  // can leave debug infor related to the root, since the result of the
798  // expression tree should be the same even after reassociation).
799  replaceDbgUsesWithUndef(ExpressionChanged);
800 
801  ExpressionChanged->moveBefore(I);
802  ExpressionChanged = cast<BinaryOperator>(*ExpressionChanged->user_begin());
803  } while (true);
804 
805  // Throw away any left over nodes from the original expression.
806  for (unsigned i = 0, e = NodesToRewrite.size(); i != e; ++i)
807  RedoInsts.insert(NodesToRewrite[i]);
808 }
809 
810 /// Insert instructions before the instruction pointed to by BI,
811 /// that computes the negative version of the value specified. The negative
812 /// version of the value is returned, and BI is left pointing at the instruction
813 /// that should be processed next by the reassociation pass.
814 /// Also add intermediate instructions to the redo list that are modified while
815 /// pushing the negates through adds. These will be revisited to see if
816 /// additional opportunities have been exposed.
818  ReassociatePass::OrderedSet &ToRedo) {
819  if (auto *C = dyn_cast<Constant>(V))
822 
823  // We are trying to expose opportunity for reassociation. One of the things
824  // that we want to do to achieve this is to push a negation as deep into an
825  // expression chain as possible, to expose the add instructions. In practice,
826  // this means that we turn this:
827  // X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D
828  // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate
829  // the constants. We assume that instcombine will clean up the mess later if
830  // we introduce tons of unnecessary negation instructions.
831  //
832  if (BinaryOperator *I =
833  isReassociableOp(V, Instruction::Add, Instruction::FAdd)) {
834  // Push the negates through the add.
835  I->setOperand(0, NegateValue(I->getOperand(0), BI, ToRedo));
836  I->setOperand(1, NegateValue(I->getOperand(1), BI, ToRedo));
837  if (I->getOpcode() == Instruction::Add) {
838  I->setHasNoUnsignedWrap(false);
839  I->setHasNoSignedWrap(false);
840  }
841 
842  // We must move the add instruction here, because the neg instructions do
843  // not dominate the old add instruction in general. By moving it, we are
844  // assured that the neg instructions we just inserted dominate the
845  // instruction we are about to insert after them.
846  //
847  I->moveBefore(BI);
848  I->setName(I->getName()+".neg");
849 
850  // Add the intermediate negates to the redo list as processing them later
851  // could expose more reassociating opportunities.
852  ToRedo.insert(I);
853  return I;
854  }
855 
856  // Okay, we need to materialize a negated version of V with an instruction.
857  // Scan the use lists of V to see if we have one already.
858  for (User *U : V->users()) {
859  if (!match(U, m_Neg(m_Value())) && !match(U, m_FNeg(m_Value())))
860  continue;
861 
862  // We found one! Now we have to make sure that the definition dominates
863  // this use. We do this by moving it to the entry block (if it is a
864  // non-instruction value) or right after the definition. These negates will
865  // be zapped by reassociate later, so we don't need much finesse here.
866  Instruction *TheNeg = cast<Instruction>(U);
867 
868  // Verify that the negate is in this function, V might be a constant expr.
869  if (TheNeg->getParent()->getParent() != BI->getParent()->getParent())
870  continue;
871 
872  bool FoundCatchSwitch = false;
873 
874  BasicBlock::iterator InsertPt;
875  if (Instruction *InstInput = dyn_cast<Instruction>(V)) {
876  if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) {
877  InsertPt = II->getNormalDest()->begin();
878  } else {
879  InsertPt = ++InstInput->getIterator();
880  }
881 
882  const BasicBlock *BB = InsertPt->getParent();
883 
884  // Make sure we don't move anything before PHIs or exception
885  // handling pads.
886  while (InsertPt != BB->end() && (isa<PHINode>(InsertPt) ||
887  InsertPt->isEHPad())) {
888  if (isa<CatchSwitchInst>(InsertPt))
889  // A catchswitch cannot have anything in the block except
890  // itself and PHIs. We'll bail out below.
891  FoundCatchSwitch = true;
892  ++InsertPt;
893  }
894  } else {
895  InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin();
896  }
897 
898  // We found a catchswitch in the block where we want to move the
899  // neg. We cannot move anything into that block. Bail and just
900  // create the neg before BI, as if we hadn't found an existing
901  // neg.
902  if (FoundCatchSwitch)
903  break;
904 
905  TheNeg->moveBefore(&*InsertPt);
906  if (TheNeg->getOpcode() == Instruction::Sub) {
907  TheNeg->setHasNoUnsignedWrap(false);
908  TheNeg->setHasNoSignedWrap(false);
909  } else {
910  TheNeg->andIRFlags(BI);
911  }
912  ToRedo.insert(TheNeg);
913  return TheNeg;
914  }
915 
916  // Insert a 'neg' instruction that subtracts the value from zero to get the
917  // negation.
918  Instruction *NewNeg = CreateNeg(V, V->getName() + ".neg", BI, BI);
919  ToRedo.insert(NewNeg);
920  return NewNeg;
921 }
922 
923 // See if this `or` looks like an load widening reduction, i.e. that it
924 // consists of an `or`/`shl`/`zext`/`load` nodes only. Note that we don't
925 // ensure that the pattern is *really* a load widening reduction,
926 // we do not ensure that it can really be replaced with a widened load,
927 // only that it mostly looks like one.
931 
932  auto Enqueue = [&](Value *V) {
933  auto *I = dyn_cast<Instruction>(V);
934  // Each node of an `or` reduction must be an instruction,
935  if (!I)
936  return false; // Node is certainly not part of an `or` load reduction.
937  // Only process instructions we have never processed before.
938  if (Visited.insert(I).second)
939  Worklist.emplace_back(I);
940  return true; // Will need to look at parent nodes.
941  };
942 
943  if (!Enqueue(Or))
944  return false; // Not an `or` reduction pattern.
945 
946  while (!Worklist.empty()) {
947  auto *I = Worklist.pop_back_val();
948 
949  // Okay, which instruction is this node?
950  switch (I->getOpcode()) {
951  case Instruction::Or:
952  // Got an `or` node. That's fine, just recurse into it's operands.
953  for (Value *Op : I->operands())
954  if (!Enqueue(Op))
955  return false; // Not an `or` reduction pattern.
956  continue;
957 
958  case Instruction::Shl:
959  case Instruction::ZExt:
960  // `shl`/`zext` nodes are fine, just recurse into their base operand.
961  if (!Enqueue(I->getOperand(0)))
962  return false; // Not an `or` reduction pattern.
963  continue;
964 
965  case Instruction::Load:
966  // Perfect, `load` node means we've reached an edge of the graph.
967  continue;
968 
969  default: // Unknown node.
970  return false; // Not an `or` reduction pattern.
971  }
972  }
973 
974  return true;
975 }
976 
977 /// Return true if it may be profitable to convert this (X|Y) into (X+Y).
979  // Don't bother to convert this up unless either the LHS is an associable add
980  // or subtract or mul or if this is only used by one of the above.
981  // This is only a compile-time improvement, it is not needed for correctness!
982  auto isInteresting = [](Value *V) {
983  for (auto Op : {Instruction::Add, Instruction::Sub, Instruction::Mul,
984  Instruction::Shl})
985  if (isReassociableOp(V, Op))
986  return true;
987  return false;
988  };
989 
990  if (any_of(Or->operands(), isInteresting))
991  return true;
992 
993  Value *VB = Or->user_back();
994  if (Or->hasOneUse() && isInteresting(VB))
995  return true;
996 
997  return false;
998 }
999 
1000 /// If we have (X|Y), and iff X and Y have no common bits set,
1001 /// transform this into (X+Y) to allow arithmetics reassociation.
1003  // Convert an or into an add.
1004  BinaryOperator *New =
1005  CreateAdd(Or->getOperand(0), Or->getOperand(1), "", Or, Or);
1006  New->setHasNoSignedWrap();
1007  New->setHasNoUnsignedWrap();
1008  New->takeName(Or);
1009 
1010  // Everyone now refers to the add instruction.
1011  Or->replaceAllUsesWith(New);
1012  New->setDebugLoc(Or->getDebugLoc());
1013 
1014  LLVM_DEBUG(dbgs() << "Converted or into an add: " << *New << '\n');
1015  return New;
1016 }
1017 
1018 /// Return true if we should break up this subtract of X-Y into (X + -Y).
1020  // If this is a negation, we can't split it up!
1021  if (match(Sub, m_Neg(m_Value())) || match(Sub, m_FNeg(m_Value())))
1022  return false;
1023 
1024  // Don't breakup X - undef.
1025  if (isa<UndefValue>(Sub->getOperand(1)))
1026  return false;
1027 
1028  // Don't bother to break this up unless either the LHS is an associable add or
1029  // subtract or if this is only used by one.
1030  Value *V0 = Sub->getOperand(0);
1031  if (isReassociableOp(V0, Instruction::Add, Instruction::FAdd) ||
1032  isReassociableOp(V0, Instruction::Sub, Instruction::FSub))
1033  return true;
1034  Value *V1 = Sub->getOperand(1);
1035  if (isReassociableOp(V1, Instruction::Add, Instruction::FAdd) ||
1036  isReassociableOp(V1, Instruction::Sub, Instruction::FSub))
1037  return true;
1038  Value *VB = Sub->user_back();
1039  if (Sub->hasOneUse() &&
1040  (isReassociableOp(VB, Instruction::Add, Instruction::FAdd) ||
1041  isReassociableOp(VB, Instruction::Sub, Instruction::FSub)))
1042  return true;
1043 
1044  return false;
1045 }
1046 
1047 /// If we have (X-Y), and if either X is an add, or if this is only used by an
1048 /// add, transform this into (X+(0-Y)) to promote better reassociation.
1050  ReassociatePass::OrderedSet &ToRedo) {
1051  // Convert a subtract into an add and a neg instruction. This allows sub
1052  // instructions to be commuted with other add instructions.
1053  //
1054  // Calculate the negative value of Operand 1 of the sub instruction,
1055  // and set it as the RHS of the add instruction we just made.
1056  Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo);
1057  BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub, Sub);
1058  Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op.
1059  Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op.
1060  New->takeName(Sub);
1061 
1062  // Everyone now refers to the add instruction.
1063  Sub->replaceAllUsesWith(New);
1064  New->setDebugLoc(Sub->getDebugLoc());
1065 
1066  LLVM_DEBUG(dbgs() << "Negated: " << *New << '\n');
1067  return New;
1068 }
1069 
1070 /// If this is a shift of a reassociable multiply or is used by one, change
1071 /// this into a multiply by a constant to assist with further reassociation.
1073  Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
1074  auto *SA = cast<ConstantInt>(Shl->getOperand(1));
1075  MulCst = ConstantExpr::getShl(MulCst, SA);
1076 
1077  BinaryOperator *Mul =
1078  BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl);
1079  Shl->setOperand(0, UndefValue::get(Shl->getType())); // Drop use of op.
1080  Mul->takeName(Shl);
1081 
1082  // Everyone now refers to the mul instruction.
1083  Shl->replaceAllUsesWith(Mul);
1084  Mul->setDebugLoc(Shl->getDebugLoc());
1085 
1086  // We can safely preserve the nuw flag in all cases. It's also safe to turn a
1087  // nuw nsw shl into a nuw nsw mul. However, nsw in isolation requires special
1088  // handling. It can be preserved as long as we're not left shifting by
1089  // bitwidth - 1.
1090  bool NSW = cast<BinaryOperator>(Shl)->hasNoSignedWrap();
1091  bool NUW = cast<BinaryOperator>(Shl)->hasNoUnsignedWrap();
1092  unsigned BitWidth = Shl->getType()->getIntegerBitWidth();
1093  if (NSW && (NUW || SA->getValue().ult(BitWidth - 1)))
1094  Mul->setHasNoSignedWrap(true);
1095  Mul->setHasNoUnsignedWrap(NUW);
1096  return Mul;
1097 }
1098 
1099 /// Scan backwards and forwards among values with the same rank as element i
1100 /// to see if X exists. If X does not exist, return i. This is useful when
1101 /// scanning for 'x' when we see '-x' because they both get the same rank.
1103  unsigned i, Value *X) {
1104  unsigned XRank = Ops[i].Rank;
1105  unsigned e = Ops.size();
1106  for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j) {
1107  if (Ops[j].Op == X)
1108  return j;
1109  if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op))
1110  if (Instruction *I2 = dyn_cast<Instruction>(X))
1111  if (I1->isIdenticalTo(I2))
1112  return j;
1113  }
1114  // Scan backwards.
1115  for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j) {
1116  if (Ops[j].Op == X)
1117  return j;
1118  if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op))
1119  if (Instruction *I2 = dyn_cast<Instruction>(X))
1120  if (I1->isIdenticalTo(I2))
1121  return j;
1122  }
1123  return i;
1124 }
1125 
1126 /// Emit a tree of add instructions, summing Ops together
1127 /// and returning the result. Insert the tree before I.
1130  if (Ops.size() == 1) return Ops.back();
1131 
1132  Value *V1 = Ops.pop_back_val();
1133  Value *V2 = EmitAddTreeOfValues(I, Ops);
1134  return CreateAdd(V2, V1, "reass.add", I, I);
1135 }
1136 
1137 /// If V is an expression tree that is a multiplication sequence,
1138 /// and if this sequence contains a multiply by Factor,
1139 /// remove Factor from the tree and return the new tree.
1140 Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) {
1141  BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
1142  if (!BO)
1143  return nullptr;
1144 
1146  MadeChange |= LinearizeExprTree(BO, Tree);
1148  Factors.reserve(Tree.size());
1149  for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
1150  RepeatedValue E = Tree[i];
1151  Factors.append(E.second.getZExtValue(),
1152  ValueEntry(getRank(E.first), E.first));
1153  }
1154 
1155  bool FoundFactor = false;
1156  bool NeedsNegate = false;
1157  for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
1158  if (Factors[i].Op == Factor) {
1159  FoundFactor = true;
1160  Factors.erase(Factors.begin()+i);
1161  break;
1162  }
1163 
1164  // If this is a negative version of this factor, remove it.
1165  if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor)) {
1166  if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op))
1167  if (FC1->getValue() == -FC2->getValue()) {
1168  FoundFactor = NeedsNegate = true;
1169  Factors.erase(Factors.begin()+i);
1170  break;
1171  }
1172  } else if (ConstantFP *FC1 = dyn_cast<ConstantFP>(Factor)) {
1173  if (ConstantFP *FC2 = dyn_cast<ConstantFP>(Factors[i].Op)) {
1174  const APFloat &F1 = FC1->getValueAPF();
1175  APFloat F2(FC2->getValueAPF());
1176  F2.changeSign();
1177  if (F1 == F2) {
1178  FoundFactor = NeedsNegate = true;
1179  Factors.erase(Factors.begin() + i);
1180  break;
1181  }
1182  }
1183  }
1184  }
1185 
1186  if (!FoundFactor) {
1187  // Make sure to restore the operands to the expression tree.
1188  RewriteExprTree(BO, Factors);
1189  return nullptr;
1190  }
1191 
1192  BasicBlock::iterator InsertPt = ++BO->getIterator();
1193 
1194  // If this was just a single multiply, remove the multiply and return the only
1195  // remaining operand.
1196  if (Factors.size() == 1) {
1197  RedoInsts.insert(BO);
1198  V = Factors[0].Op;
1199  } else {
1200  RewriteExprTree(BO, Factors);
1201  V = BO;
1202  }
1203 
1204  if (NeedsNegate)
1205  V = CreateNeg(V, "neg", &*InsertPt, BO);
1206 
1207  return V;
1208 }
1209 
1210 /// If V is a single-use multiply, recursively add its operands as factors,
1211 /// otherwise add V to the list of factors.
1212 ///
1213 /// Ops is the top-level list of add operands we're trying to factor.
1215  SmallVectorImpl<Value*> &Factors) {
1216  BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
1217  if (!BO) {
1218  Factors.push_back(V);
1219  return;
1220  }
1221 
1222  // Otherwise, add the LHS and RHS to the list of factors.
1223  FindSingleUseMultiplyFactors(BO->getOperand(1), Factors);
1224  FindSingleUseMultiplyFactors(BO->getOperand(0), Factors);
1225 }
1226 
1227 /// Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
1228 /// This optimizes based on identities. If it can be reduced to a single Value,
1229 /// it is returned, otherwise the Ops list is mutated as necessary.
1230 static Value *OptimizeAndOrXor(unsigned Opcode,
1232  // Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
1233  // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1.
1234  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1235  // First, check for X and ~X in the operand list.
1236  assert(i < Ops.size());
1237  Value *X;
1238  if (match(Ops[i].Op, m_Not(m_Value(X)))) { // Cannot occur for ^.
1239  unsigned FoundX = FindInOperandList(Ops, i, X);
1240  if (FoundX != i) {
1241  if (Opcode == Instruction::And) // ...&X&~X = 0
1242  return Constant::getNullValue(X->getType());
1243 
1244  if (Opcode == Instruction::Or) // ...|X|~X = -1
1245  return Constant::getAllOnesValue(X->getType());
1246  }
1247  }
1248 
1249  // Next, check for duplicate pairs of values, which we assume are next to
1250  // each other, due to our sorting criteria.
1251  assert(i < Ops.size());
1252  if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) {
1253  if (Opcode == Instruction::And || Opcode == Instruction::Or) {
1254  // Drop duplicate values for And and Or.
1255  Ops.erase(Ops.begin()+i);
1256  --i; --e;
1257  ++NumAnnihil;
1258  continue;
1259  }
1260 
1261  // Drop pairs of values for Xor.
1262  assert(Opcode == Instruction::Xor);
1263  if (e == 2)
1264  return Constant::getNullValue(Ops[0].Op->getType());
1265 
1266  // Y ^ X^X -> Y
1267  Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
1268  i -= 1; e -= 2;
1269  ++NumAnnihil;
1270  }
1271  }
1272  return nullptr;
1273 }
1274 
1275 /// Helper function of CombineXorOpnd(). It creates a bitwise-and
1276 /// instruction with the given two operands, and return the resulting
1277 /// instruction. There are two special cases: 1) if the constant operand is 0,
1278 /// it will return NULL. 2) if the constant is ~0, the symbolic operand will
1279 /// be returned.
1280 static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
1281  const APInt &ConstOpnd) {
1282  if (ConstOpnd.isNullValue())
1283  return nullptr;
1284 
1285  if (ConstOpnd.isAllOnesValue())
1286  return Opnd;
1287 
1288  Instruction *I = BinaryOperator::CreateAnd(
1289  Opnd, ConstantInt::get(Opnd->getType(), ConstOpnd), "and.ra",
1290  InsertBefore);
1291  I->setDebugLoc(InsertBefore->getDebugLoc());
1292  return I;
1293 }
1294 
1295 // Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
1296 // into "R ^ C", where C would be 0, and R is a symbolic value.
1297 //
1298 // If it was successful, true is returned, and the "R" and "C" is returned
1299 // via "Res" and "ConstOpnd", respectively; otherwise, false is returned,
1300 // and both "Res" and "ConstOpnd" remain unchanged.
1301 bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
1302  APInt &ConstOpnd, Value *&Res) {
1303  // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2
1304  // = ((x | c1) ^ c1) ^ (c1 ^ c2)
1305  // = (x & ~c1) ^ (c1 ^ c2)
1306  // It is useful only when c1 == c2.
1307  if (!Opnd1->isOrExpr() || Opnd1->getConstPart().isNullValue())
1308  return false;
1309 
1310  if (!Opnd1->getValue()->hasOneUse())
1311  return false;
1312 
1313  const APInt &C1 = Opnd1->getConstPart();
1314  if (C1 != ConstOpnd)
1315  return false;
1316 
1317  Value *X = Opnd1->getSymbolicPart();
1318  Res = createAndInstr(I, X, ~C1);
1319  // ConstOpnd was C2, now C1 ^ C2.
1320  ConstOpnd ^= C1;
1321 
1322  if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
1323  RedoInsts.insert(T);
1324  return true;
1325 }
1326 
1327 // Helper function of OptimizeXor(). It tries to simplify
1328 // "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a
1329 // symbolic value.
1330 //
1331 // If it was successful, true is returned, and the "R" and "C" is returned
1332 // via "Res" and "ConstOpnd", respectively (If the entire expression is
1333 // evaluated to a constant, the Res is set to NULL); otherwise, false is
1334 // returned, and both "Res" and "ConstOpnd" remain unchanged.
1335 bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
1336  XorOpnd *Opnd2, APInt &ConstOpnd,
1337  Value *&Res) {
1338  Value *X = Opnd1->getSymbolicPart();
1339  if (X != Opnd2->getSymbolicPart())
1340  return false;
1341 
1342  // This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.)
1343  int DeadInstNum = 1;
1344  if (Opnd1->getValue()->hasOneUse())
1345  DeadInstNum++;
1346  if (Opnd2->getValue()->hasOneUse())
1347  DeadInstNum++;
1348 
1349  // Xor-Rule 2:
1350  // (x | c1) ^ (x & c2)
1351  // = (x|c1) ^ (x&c2) ^ (c1 ^ c1) = ((x|c1) ^ c1) ^ (x & c2) ^ c1
1352  // = (x & ~c1) ^ (x & c2) ^ c1 // Xor-Rule 1
1353  // = (x & c3) ^ c1, where c3 = ~c1 ^ c2 // Xor-rule 3
1354  //
1355  if (Opnd1->isOrExpr() != Opnd2->isOrExpr()) {
1356  if (Opnd2->isOrExpr())
1357  std::swap(Opnd1, Opnd2);
1358 
1359  const APInt &C1 = Opnd1->getConstPart();
1360  const APInt &C2 = Opnd2->getConstPart();
1361  APInt C3((~C1) ^ C2);
1362 
1363  // Do not increase code size!
1364  if (!C3.isNullValue() && !C3.isAllOnesValue()) {
1365  int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
1366  if (NewInstNum > DeadInstNum)
1367  return false;
1368  }
1369 
1370  Res = createAndInstr(I, X, C3);
1371  ConstOpnd ^= C1;
1372  } else if (Opnd1->isOrExpr()) {
1373  // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
1374  //
1375  const APInt &C1 = Opnd1->getConstPart();
1376  const APInt &C2 = Opnd2->getConstPart();
1377  APInt C3 = C1 ^ C2;
1378 
1379  // Do not increase code size
1380  if (!C3.isNullValue() && !C3.isAllOnesValue()) {
1381  int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
1382  if (NewInstNum > DeadInstNum)
1383  return false;
1384  }
1385 
1386  Res = createAndInstr(I, X, C3);
1387  ConstOpnd ^= C3;
1388  } else {
1389  // Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
1390  //
1391  const APInt &C1 = Opnd1->getConstPart();
1392  const APInt &C2 = Opnd2->getConstPart();
1393  APInt C3 = C1 ^ C2;
1394  Res = createAndInstr(I, X, C3);
1395  }
1396 
1397  // Put the original operands in the Redo list; hope they will be deleted
1398  // as dead code.
1399  if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
1400  RedoInsts.insert(T);
1401  if (Instruction *T = dyn_cast<Instruction>(Opnd2->getValue()))
1402  RedoInsts.insert(T);
1403 
1404  return true;
1405 }
1406 
1407 /// Optimize a series of operands to an 'xor' instruction. If it can be reduced
1408 /// to a single Value, it is returned, otherwise the Ops list is mutated as
1409 /// necessary.
1410 Value *ReassociatePass::OptimizeXor(Instruction *I,
1412  if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops))
1413  return V;
1414 
1415  if (Ops.size() == 1)
1416  return nullptr;
1417 
1419  SmallVector<XorOpnd*, 8> OpndPtrs;
1420  Type *Ty = Ops[0].Op->getType();
1421  APInt ConstOpnd(Ty->getScalarSizeInBits(), 0);
1422 
1423  // Step 1: Convert ValueEntry to XorOpnd
1424  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1425  Value *V = Ops[i].Op;
1426  const APInt *C;
1427  // TODO: Support non-splat vectors.
1428  if (match(V, m_APInt(C))) {
1429  ConstOpnd ^= *C;
1430  } else {
1431  XorOpnd O(V);
1432  O.setSymbolicRank(getRank(O.getSymbolicPart()));
1433  Opnds.push_back(O);
1434  }
1435  }
1436 
1437  // NOTE: From this point on, do *NOT* add/delete element to/from "Opnds".
1438  // It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate
1439  // the "OpndPtrs" as well. For the similar reason, do not fuse this loop
1440  // with the previous loop --- the iterator of the "Opnds" may be invalidated
1441  // when new elements are added to the vector.
1442  for (unsigned i = 0, e = Opnds.size(); i != e; ++i)
1443  OpndPtrs.push_back(&Opnds[i]);
1444 
1445  // Step 2: Sort the Xor-Operands in a way such that the operands containing
1446  // the same symbolic value cluster together. For instance, the input operand
1447  // sequence ("x | 123", "y & 456", "x & 789") will be sorted into:
1448  // ("x | 123", "x & 789", "y & 456").
1449  //
1450  // The purpose is twofold:
1451  // 1) Cluster together the operands sharing the same symbolic-value.
1452  // 2) Operand having smaller symbolic-value-rank is permuted earlier, which
1453  // could potentially shorten crital path, and expose more loop-invariants.
1454  // Note that values' rank are basically defined in RPO order (FIXME).
1455  // So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
1456  // than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
1457  // "z" in the order of X-Y-Z is better than any other orders.
1458  llvm::stable_sort(OpndPtrs, [](XorOpnd *LHS, XorOpnd *RHS) {
1459  return LHS->getSymbolicRank() < RHS->getSymbolicRank();
1460  });
1461 
1462  // Step 3: Combine adjacent operands
1463  XorOpnd *PrevOpnd = nullptr;
1464  bool Changed = false;
1465  for (unsigned i = 0, e = Opnds.size(); i < e; i++) {
1466  XorOpnd *CurrOpnd = OpndPtrs[i];
1467  // The combined value
1468  Value *CV;
1469 
1470  // Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
1471  if (!ConstOpnd.isNullValue() &&
1472  CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) {
1473  Changed = true;
1474  if (CV)
1475  *CurrOpnd = XorOpnd(CV);
1476  else {
1477  CurrOpnd->Invalidate();
1478  continue;
1479  }
1480  }
1481 
1482  if (!PrevOpnd || CurrOpnd->getSymbolicPart() != PrevOpnd->getSymbolicPart()) {
1483  PrevOpnd = CurrOpnd;
1484  continue;
1485  }
1486 
1487  // step 3.2: When previous and current operands share the same symbolic
1488  // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"
1489  if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) {
1490  // Remove previous operand
1491  PrevOpnd->Invalidate();
1492  if (CV) {
1493  *CurrOpnd = XorOpnd(CV);
1494  PrevOpnd = CurrOpnd;
1495  } else {
1496  CurrOpnd->Invalidate();
1497  PrevOpnd = nullptr;
1498  }
1499  Changed = true;
1500  }
1501  }
1502 
1503  // Step 4: Reassemble the Ops
1504  if (Changed) {
1505  Ops.clear();
1506  for (unsigned int i = 0, e = Opnds.size(); i < e; i++) {
1507  XorOpnd &O = Opnds[i];
1508  if (O.isInvalid())
1509  continue;
1510  ValueEntry VE(getRank(O.getValue()), O.getValue());
1511  Ops.push_back(VE);
1512  }
1513  if (!ConstOpnd.isNullValue()) {
1514  Value *C = ConstantInt::get(Ty, ConstOpnd);
1515  ValueEntry VE(getRank(C), C);
1516  Ops.push_back(VE);
1517  }
1518  unsigned Sz = Ops.size();
1519  if (Sz == 1)
1520  return Ops.back().Op;
1521  if (Sz == 0) {
1522  assert(ConstOpnd.isNullValue());
1523  return ConstantInt::get(Ty, ConstOpnd);
1524  }
1525  }
1526 
1527  return nullptr;
1528 }
1529 
1530 /// Optimize a series of operands to an 'add' instruction. This
1531 /// optimizes based on identities. If it can be reduced to a single Value, it
1532 /// is returned, otherwise the Ops list is mutated as necessary.
1533 Value *ReassociatePass::OptimizeAdd(Instruction *I,
1535  // Scan the operand lists looking for X and -X pairs. If we find any, we
1536  // can simplify expressions like X+-X == 0 and X+~X ==-1. While we're at it,
1537  // scan for any
1538  // duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z.
1539 
1540  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1541  Value *TheOp = Ops[i].Op;
1542  // Check to see if we've seen this operand before. If so, we factor all
1543  // instances of the operand together. Due to our sorting criteria, we know
1544  // that these need to be next to each other in the vector.
1545  if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) {
1546  // Rescan the list, remove all instances of this operand from the expr.
1547  unsigned NumFound = 0;
1548  do {
1549  Ops.erase(Ops.begin()+i);
1550  ++NumFound;
1551  } while (i != Ops.size() && Ops[i].Op == TheOp);
1552 
1553  LLVM_DEBUG(dbgs() << "\nFACTORING [" << NumFound << "]: " << *TheOp
1554  << '\n');
1555  ++NumFactor;
1556 
1557  // Insert a new multiply.
1558  Type *Ty = TheOp->getType();
1559  Constant *C = Ty->isIntOrIntVectorTy() ?
1560  ConstantInt::get(Ty, NumFound) : ConstantFP::get(Ty, NumFound);
1561  Instruction *Mul = CreateMul(TheOp, C, "factor", I, I);
1562 
1563  // Now that we have inserted a multiply, optimize it. This allows us to
1564  // handle cases that require multiple factoring steps, such as this:
1565  // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
1566  RedoInsts.insert(Mul);
1567 
1568  // If every add operand was a duplicate, return the multiply.
1569  if (Ops.empty())
1570  return Mul;
1571 
1572  // Otherwise, we had some input that didn't have the dupe, such as
1573  // "A + A + B" -> "A*2 + B". Add the new multiply to the list of
1574  // things being added by this operation.
1575  Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul));
1576 
1577  --i;
1578  e = Ops.size();
1579  continue;
1580  }
1581 
1582  // Check for X and -X or X and ~X in the operand list.
1583  Value *X;
1584  if (!match(TheOp, m_Neg(m_Value(X))) && !match(TheOp, m_Not(m_Value(X))) &&
1585  !match(TheOp, m_FNeg(m_Value(X))))
1586  continue;
1587 
1588  unsigned FoundX = FindInOperandList(Ops, i, X);
1589  if (FoundX == i)
1590  continue;
1591 
1592  // Remove X and -X from the operand list.
1593  if (Ops.size() == 2 &&
1594  (match(TheOp, m_Neg(m_Value())) || match(TheOp, m_FNeg(m_Value()))))
1595  return Constant::getNullValue(X->getType());
1596 
1597  // Remove X and ~X from the operand list.
1598  if (Ops.size() == 2 && match(TheOp, m_Not(m_Value())))
1599  return Constant::getAllOnesValue(X->getType());
1600 
1601  Ops.erase(Ops.begin()+i);
1602  if (i < FoundX)
1603  --FoundX;
1604  else
1605  --i; // Need to back up an extra one.
1606  Ops.erase(Ops.begin()+FoundX);
1607  ++NumAnnihil;
1608  --i; // Revisit element.
1609  e -= 2; // Removed two elements.
1610 
1611  // if X and ~X we append -1 to the operand list.
1612  if (match(TheOp, m_Not(m_Value()))) {
1613  Value *V = Constant::getAllOnesValue(X->getType());
1614  Ops.insert(Ops.end(), ValueEntry(getRank(V), V));
1615  e += 1;
1616  }
1617  }
1618 
1619  // Scan the operand list, checking to see if there are any common factors
1620  // between operands. Consider something like A*A+A*B*C+D. We would like to
1621  // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies.
1622  // To efficiently find this, we count the number of times a factor occurs
1623  // for any ADD operands that are MULs.
1624  DenseMap<Value*, unsigned> FactorOccurrences;
1625 
1626  // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4)
1627  // where they are actually the same multiply.
1628  unsigned MaxOcc = 0;
1629  Value *MaxOccVal = nullptr;
1630  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1631  BinaryOperator *BOp =
1632  isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
1633  if (!BOp)
1634  continue;
1635 
1636  // Compute all of the factors of this added value.
1637  SmallVector<Value*, 8> Factors;
1638  FindSingleUseMultiplyFactors(BOp, Factors);
1639  assert(Factors.size() > 1 && "Bad linearize!");
1640 
1641  // Add one to FactorOccurrences for each unique factor in this op.
1642  SmallPtrSet<Value*, 8> Duplicates;
1643  for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
1644  Value *Factor = Factors[i];
1645  if (!Duplicates.insert(Factor).second)
1646  continue;
1647 
1648  unsigned Occ = ++FactorOccurrences[Factor];
1649  if (Occ > MaxOcc) {
1650  MaxOcc = Occ;
1651  MaxOccVal = Factor;
1652  }
1653 
1654  // If Factor is a negative constant, add the negated value as a factor
1655  // because we can percolate the negate out. Watch for minint, which
1656  // cannot be positivified.
1657  if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) {
1658  if (CI->isNegative() && !CI->isMinValue(true)) {
1659  Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
1660  if (!Duplicates.insert(Factor).second)
1661  continue;
1662  unsigned Occ = ++FactorOccurrences[Factor];
1663  if (Occ > MaxOcc) {
1664  MaxOcc = Occ;
1665  MaxOccVal = Factor;
1666  }
1667  }
1668  } else if (ConstantFP *CF = dyn_cast<ConstantFP>(Factor)) {
1669  if (CF->isNegative()) {
1670  APFloat F(CF->getValueAPF());
1671  F.changeSign();
1672  Factor = ConstantFP::get(CF->getContext(), F);
1673  if (!Duplicates.insert(Factor).second)
1674  continue;
1675  unsigned Occ = ++FactorOccurrences[Factor];
1676  if (Occ > MaxOcc) {
1677  MaxOcc = Occ;
1678  MaxOccVal = Factor;
1679  }
1680  }
1681  }
1682  }
1683  }
1684 
1685  // If any factor occurred more than one time, we can pull it out.
1686  if (MaxOcc > 1) {
1687  LLVM_DEBUG(dbgs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal
1688  << '\n');
1689  ++NumFactor;
1690 
1691  // Create a new instruction that uses the MaxOccVal twice. If we don't do
1692  // this, we could otherwise run into situations where removing a factor
1693  // from an expression will drop a use of maxocc, and this can cause
1694  // RemoveFactorFromExpression on successive values to behave differently.
1695  Instruction *DummyInst =
1696  I->getType()->isIntOrIntVectorTy()
1697  ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal)
1698  : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal);
1699 
1701  for (unsigned i = 0; i != Ops.size(); ++i) {
1702  // Only try to remove factors from expressions we're allowed to.
1703  BinaryOperator *BOp =
1704  isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
1705  if (!BOp)
1706  continue;
1707 
1708  if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
1709  // The factorized operand may occur several times. Convert them all in
1710  // one fell swoop.
1711  for (unsigned j = Ops.size(); j != i;) {
1712  --j;
1713  if (Ops[j].Op == Ops[i].Op) {
1714  NewMulOps.push_back(V);
1715  Ops.erase(Ops.begin()+j);
1716  }
1717  }
1718  --i;
1719  }
1720  }
1721 
1722  // No need for extra uses anymore.
1723  DummyInst->deleteValue();
1724 
1725  unsigned NumAddedValues = NewMulOps.size();
1726  Value *V = EmitAddTreeOfValues(I, NewMulOps);
1727 
1728  // Now that we have inserted the add tree, optimize it. This allows us to
1729  // handle cases that require multiple factoring steps, such as this:
1730  // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C))
1731  assert(NumAddedValues > 1 && "Each occurrence should contribute a value");
1732  (void)NumAddedValues;
1733  if (Instruction *VI = dyn_cast<Instruction>(V))
1734  RedoInsts.insert(VI);
1735 
1736  // Create the multiply.
1737  Instruction *V2 = CreateMul(V, MaxOccVal, "reass.mul", I, I);
1738 
1739  // Rerun associate on the multiply in case the inner expression turned into
1740  // a multiply. We want to make sure that we keep things in canonical form.
1741  RedoInsts.insert(V2);
1742 
1743  // If every add operand included the factor (e.g. "A*B + A*C"), then the
1744  // entire result expression is just the multiply "A*(B+C)".
1745  if (Ops.empty())
1746  return V2;
1747 
1748  // Otherwise, we had some input that didn't have the factor, such as
1749  // "A*B + A*C + D" -> "A*(B+C) + D". Add the new multiply to the list of
1750  // things being added by this operation.
1751  Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
1752  }
1753 
1754  return nullptr;
1755 }
1756 
1757 /// Build up a vector of value/power pairs factoring a product.
1758 ///
1759 /// Given a series of multiplication operands, build a vector of factors and
1760 /// the powers each is raised to when forming the final product. Sort them in
1761 /// the order of descending power.
1762 ///
1763 /// (x*x) -> [(x, 2)]
1764 /// ((x*x)*x) -> [(x, 3)]
1765 /// ((((x*y)*x)*y)*x) -> [(x, 3), (y, 2)]
1766 ///
1767 /// \returns Whether any factors have a power greater than one.
1769  SmallVectorImpl<Factor> &Factors) {
1770  // FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this.
1771  // Compute the sum of powers of simplifiable factors.
1772  unsigned FactorPowerSum = 0;
1773  for (unsigned Idx = 1, Size = Ops.size(); Idx < Size; ++Idx) {
1774  Value *Op = Ops[Idx-1].Op;
1775 
1776  // Count the number of occurrences of this value.
1777  unsigned Count = 1;
1778  for (; Idx < Size && Ops[Idx].Op == Op; ++Idx)
1779  ++Count;
1780  // Track for simplification all factors which occur 2 or more times.
1781  if (Count > 1)
1782  FactorPowerSum += Count;
1783  }
1784 
1785  // We can only simplify factors if the sum of the powers of our simplifiable
1786  // factors is 4 or higher. When that is the case, we will *always* have
1787  // a simplification. This is an important invariant to prevent cyclicly
1788  // trying to simplify already minimal formations.
1789  if (FactorPowerSum < 4)
1790  return false;
1791 
1792  // Now gather the simplifiable factors, removing them from Ops.
1793  FactorPowerSum = 0;
1794  for (unsigned Idx = 1; Idx < Ops.size(); ++Idx) {
1795  Value *Op = Ops[Idx-1].Op;
1796 
1797  // Count the number of occurrences of this value.
1798  unsigned Count = 1;
1799  for (; Idx < Ops.size() && Ops[Idx].Op == Op; ++Idx)
1800  ++Count;
1801  if (Count == 1)
1802  continue;
1803  // Move an even number of occurrences to Factors.
1804  Count &= ~1U;
1805  Idx -= Count;
1806  FactorPowerSum += Count;
1807  Factors.push_back(Factor(Op, Count));
1808  Ops.erase(Ops.begin()+Idx, Ops.begin()+Idx+Count);
1809  }
1810 
1811  // None of the adjustments above should have reduced the sum of factor powers
1812  // below our mininum of '4'.
1813  assert(FactorPowerSum >= 4);
1814 
1815  llvm::stable_sort(Factors, [](const Factor &LHS, const Factor &RHS) {
1816  return LHS.Power > RHS.Power;
1817  });
1818  return true;
1819 }
1820 
1821 /// Build a tree of multiplies, computing the product of Ops.
1823  SmallVectorImpl<Value*> &Ops) {
1824  if (Ops.size() == 1)
1825  return Ops.back();
1826 
1827  Value *LHS = Ops.pop_back_val();
1828  do {
1829  if (LHS->getType()->isIntOrIntVectorTy())
1830  LHS = Builder.CreateMul(LHS, Ops.pop_back_val());
1831  else
1832  LHS = Builder.CreateFMul(LHS, Ops.pop_back_val());
1833  } while (!Ops.empty());
1834 
1835  return LHS;
1836 }
1837 
1838 /// Build a minimal multiplication DAG for (a^x)*(b^y)*(c^z)*...
1839 ///
1840 /// Given a vector of values raised to various powers, where no two values are
1841 /// equal and the powers are sorted in decreasing order, compute the minimal
1842 /// DAG of multiplies to compute the final product, and return that product
1843 /// value.
1844 Value *
1845 ReassociatePass::buildMinimalMultiplyDAG(IRBuilderBase &Builder,
1846  SmallVectorImpl<Factor> &Factors) {
1847  assert(Factors[0].Power);
1848  SmallVector<Value *, 4> OuterProduct;
1849  for (unsigned LastIdx = 0, Idx = 1, Size = Factors.size();
1850  Idx < Size && Factors[Idx].Power > 0; ++Idx) {
1851  if (Factors[Idx].Power != Factors[LastIdx].Power) {
1852  LastIdx = Idx;
1853  continue;
1854  }
1855 
1856  // We want to multiply across all the factors with the same power so that
1857  // we can raise them to that power as a single entity. Build a mini tree
1858  // for that.
1859  SmallVector<Value *, 4> InnerProduct;
1860  InnerProduct.push_back(Factors[LastIdx].Base);
1861  do {
1862  InnerProduct.push_back(Factors[Idx].Base);
1863  ++Idx;
1864  } while (Idx < Size && Factors[Idx].Power == Factors[LastIdx].Power);
1865 
1866  // Reset the base value of the first factor to the new expression tree.
1867  // We'll remove all the factors with the same power in a second pass.
1868  Value *M = Factors[LastIdx].Base = buildMultiplyTree(Builder, InnerProduct);
1869  if (Instruction *MI = dyn_cast<Instruction>(M))
1870  RedoInsts.insert(MI);
1871 
1872  LastIdx = Idx;
1873  }
1874  // Unique factors with equal powers -- we've folded them into the first one's
1875  // base.
1876  Factors.erase(std::unique(Factors.begin(), Factors.end(),
1877  [](const Factor &LHS, const Factor &RHS) {
1878  return LHS.Power == RHS.Power;
1879  }),
1880  Factors.end());
1881 
1882  // Iteratively collect the base of each factor with an add power into the
1883  // outer product, and halve each power in preparation for squaring the
1884  // expression.
1885  for (unsigned Idx = 0, Size = Factors.size(); Idx != Size; ++Idx) {
1886  if (Factors[Idx].Power & 1)
1887  OuterProduct.push_back(Factors[Idx].Base);
1888  Factors[Idx].Power >>= 1;
1889  }
1890  if (Factors[0].Power) {
1891  Value *SquareRoot = buildMinimalMultiplyDAG(Builder, Factors);
1892  OuterProduct.push_back(SquareRoot);
1893  OuterProduct.push_back(SquareRoot);
1894  }
1895  if (OuterProduct.size() == 1)
1896  return OuterProduct.front();
1897 
1898  Value *V = buildMultiplyTree(Builder, OuterProduct);
1899  return V;
1900 }
1901 
1902 Value *ReassociatePass::OptimizeMul(BinaryOperator *I,
1904  // We can only optimize the multiplies when there is a chain of more than
1905  // three, such that a balanced tree might require fewer total multiplies.
1906  if (Ops.size() < 4)
1907  return nullptr;
1908 
1909  // Try to turn linear trees of multiplies without other uses of the
1910  // intermediate stages into minimal multiply DAGs with perfect sub-expression
1911  // re-use.
1912  SmallVector<Factor, 4> Factors;
1913  if (!collectMultiplyFactors(Ops, Factors))
1914  return nullptr; // All distinct factors, so nothing left for us to do.
1915 
1917  // The reassociate transformation for FP operations is performed only
1918  // if unsafe algebra is permitted by FastMathFlags. Propagate those flags
1919  // to the newly generated operations.
1920  if (auto FPI = dyn_cast<FPMathOperator>(I))
1921  Builder.setFastMathFlags(FPI->getFastMathFlags());
1922 
1923  Value *V = buildMinimalMultiplyDAG(Builder, Factors);
1924  if (Ops.empty())
1925  return V;
1926 
1927  ValueEntry NewEntry = ValueEntry(getRank(V), V);
1928  Ops.insert(llvm::lower_bound(Ops, NewEntry), NewEntry);
1929  return nullptr;
1930 }
1931 
1932 Value *ReassociatePass::OptimizeExpression(BinaryOperator *I,
1934  // Now that we have the linearized expression tree, try to optimize it.
1935  // Start by folding any constants that we found.
1936  Constant *Cst = nullptr;
1937  unsigned Opcode = I->getOpcode();
1938  while (!Ops.empty() && isa<Constant>(Ops.back().Op)) {
1939  Constant *C = cast<Constant>(Ops.pop_back_val().Op);
1940  Cst = Cst ? ConstantExpr::get(Opcode, C, Cst) : C;
1941  }
1942  // If there was nothing but constants then we are done.
1943  if (Ops.empty())
1944  return Cst;
1945 
1946  // Put the combined constant back at the end of the operand list, except if
1947  // there is no point. For example, an add of 0 gets dropped here, while a
1948  // multiplication by zero turns the whole expression into zero.
1949  if (Cst && Cst != ConstantExpr::getBinOpIdentity(Opcode, I->getType())) {
1950  if (Cst == ConstantExpr::getBinOpAbsorber(Opcode, I->getType()))
1951  return Cst;
1952  Ops.push_back(ValueEntry(0, Cst));
1953  }
1954 
1955  if (Ops.size() == 1) return Ops[0].Op;
1956 
1957  // Handle destructive annihilation due to identities between elements in the
1958  // argument list here.
1959  unsigned NumOps = Ops.size();
1960  switch (Opcode) {
1961  default: break;
1962  case Instruction::And:
1963  case Instruction::Or:
1964  if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
1965  return Result;
1966  break;
1967 
1968  case Instruction::Xor:
1969  if (Value *Result = OptimizeXor(I, Ops))
1970  return Result;
1971  break;
1972 
1973  case Instruction::Add:
1974  case Instruction::FAdd:
1975  if (Value *Result = OptimizeAdd(I, Ops))
1976  return Result;
1977  break;
1978 
1979  case Instruction::Mul:
1980  case Instruction::FMul:
1981  if (Value *Result = OptimizeMul(I, Ops))
1982  return Result;
1983  break;
1984  }
1985 
1986  if (Ops.size() != NumOps)
1987  return OptimizeExpression(I, Ops);
1988  return nullptr;
1989 }
1990 
1991 // Remove dead instructions and if any operands are trivially dead add them to
1992 // Insts so they will be removed as well.
1993 void ReassociatePass::RecursivelyEraseDeadInsts(Instruction *I,
1994  OrderedSet &Insts) {
1995  assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
1996  SmallVector<Value *, 4> Ops(I->operands());
1997  ValueRankMap.erase(I);
1998  Insts.remove(I);
1999  RedoInsts.remove(I);
2001  I->eraseFromParent();
2002  for (auto Op : Ops)
2003  if (Instruction *OpInst = dyn_cast<Instruction>(Op))
2004  if (OpInst->use_empty())
2005  Insts.insert(OpInst);
2006 }
2007 
2008 /// Zap the given instruction, adding interesting operands to the work list.
2009 void ReassociatePass::EraseInst(Instruction *I) {
2010  assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
2011  LLVM_DEBUG(dbgs() << "Erasing dead inst: "; I->dump());
2012 
2013  SmallVector<Value *, 8> Ops(I->operands());
2014  // Erase the dead instruction.
2015  ValueRankMap.erase(I);
2016  RedoInsts.remove(I);
2018  I->eraseFromParent();
2019  // Optimize its operands.
2020  SmallPtrSet<Instruction *, 8> Visited; // Detect self-referential nodes.
2021  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2022  if (Instruction *Op = dyn_cast<Instruction>(Ops[i])) {
2023  // If this is a node in an expression tree, climb to the expression root
2024  // and add that since that's where optimization actually happens.
2025  unsigned Opcode = Op->getOpcode();
2026  while (Op->hasOneUse() && Op->user_back()->getOpcode() == Opcode &&
2027  Visited.insert(Op).second)
2028  Op = Op->user_back();
2029 
2030  // The instruction we're going to push may be coming from a
2031  // dead block, and Reassociate skips the processing of unreachable
2032  // blocks because it's a waste of time and also because it can
2033  // lead to infinite loop due to LLVM's non-standard definition
2034  // of dominance.
2035  if (ValueRankMap.find(Op) != ValueRankMap.end())
2036  RedoInsts.insert(Op);
2037  }
2038 
2039  MadeChange = true;
2040 }
2041 
2042 /// Recursively analyze an expression to build a list of instructions that have
2043 /// negative floating-point constant operands. The caller can then transform
2044 /// the list to create positive constants for better reassociation and CSE.
2045 static void getNegatibleInsts(Value *V,
2046  SmallVectorImpl<Instruction *> &Candidates) {
2047  // Handle only one-use instructions. Combining negations does not justify
2048  // replicating instructions.
2049  Instruction *I;
2050  if (!match(V, m_OneUse(m_Instruction(I))))
2051  return;
2052 
2053  // Handle expressions of multiplications and divisions.
2054  // TODO: This could look through floating-point casts.
2055  const APFloat *C;
2056  switch (I->getOpcode()) {
2057  case Instruction::FMul:
2058  // Not expecting non-canonical code here. Bail out and wait.
2059  if (match(I->getOperand(0), m_Constant()))
2060  break;
2061 
2062  if (match(I->getOperand(1), m_APFloat(C)) && C->isNegative()) {
2063  Candidates.push_back(I);
2064  LLVM_DEBUG(dbgs() << "FMul with negative constant: " << *I << '\n');
2065  }
2066  getNegatibleInsts(I->getOperand(0), Candidates);
2067  getNegatibleInsts(I->getOperand(1), Candidates);
2068  break;
2069  case Instruction::FDiv:
2070  // Not expecting non-canonical code here. Bail out and wait.
2071  if (match(I->getOperand(0), m_Constant()) &&
2072  match(I->getOperand(1), m_Constant()))
2073  break;
2074 
2075  if ((match(I->getOperand(0), m_APFloat(C)) && C->isNegative()) ||
2076  (match(I->getOperand(1), m_APFloat(C)) && C->isNegative())) {
2077  Candidates.push_back(I);
2078  LLVM_DEBUG(dbgs() << "FDiv with negative constant: " << *I << '\n');
2079  }
2080  getNegatibleInsts(I->getOperand(0), Candidates);
2081  getNegatibleInsts(I->getOperand(1), Candidates);
2082  break;
2083  default:
2084  break;
2085  }
2086 }
2087 
2088 /// Given an fadd/fsub with an operand that is a one-use instruction
2089 /// (the fadd/fsub), try to change negative floating-point constants into
2090 /// positive constants to increase potential for reassociation and CSE.
2091 Instruction *ReassociatePass::canonicalizeNegFPConstantsForOp(Instruction *I,
2092  Instruction *Op,
2093  Value *OtherOp) {
2094  assert((I->getOpcode() == Instruction::FAdd ||
2095  I->getOpcode() == Instruction::FSub) && "Expected fadd/fsub");
2096 
2097  // Collect instructions with negative FP constants from the subtree that ends
2098  // in Op.
2099  SmallVector<Instruction *, 4> Candidates;
2100  getNegatibleInsts(Op, Candidates);
2101  if (Candidates.empty())
2102  return nullptr;
2103 
2104  // Don't canonicalize x + (-Constant * y) -> x - (Constant * y), if the
2105  // resulting subtract will be broken up later. This can get us into an
2106  // infinite loop during reassociation.
2107  bool IsFSub = I->getOpcode() == Instruction::FSub;
2108  bool NeedsSubtract = !IsFSub && Candidates.size() % 2 == 1;
2109  if (NeedsSubtract && ShouldBreakUpSubtract(I))
2110  return nullptr;
2111 
2112  for (Instruction *Negatible : Candidates) {
2113  const APFloat *C;
2114  if (match(Negatible->getOperand(0), m_APFloat(C))) {
2115  assert(!match(Negatible->getOperand(1), m_Constant()) &&
2116  "Expecting only 1 constant operand");
2117  assert(C->isNegative() && "Expected negative FP constant");
2118  Negatible->setOperand(0, ConstantFP::get(Negatible->getType(), abs(*C)));
2119  MadeChange = true;
2120  }
2121  if (match(Negatible->getOperand(1), m_APFloat(C))) {
2122  assert(!match(Negatible->getOperand(0), m_Constant()) &&
2123  "Expecting only 1 constant operand");
2124  assert(C->isNegative() && "Expected negative FP constant");
2125  Negatible->setOperand(1, ConstantFP::get(Negatible->getType(), abs(*C)));
2126  MadeChange = true;
2127  }
2128  }
2129  assert(MadeChange == true && "Negative constant candidate was not changed");
2130 
2131  // Negations cancelled out.
2132  if (Candidates.size() % 2 == 0)
2133  return I;
2134 
2135  // Negate the final operand in the expression by flipping the opcode of this
2136  // fadd/fsub.
2137  assert(Candidates.size() % 2 == 1 && "Expected odd number");
2139  Value *NewInst = IsFSub ? Builder.CreateFAddFMF(OtherOp, Op, I)
2140  : Builder.CreateFSubFMF(OtherOp, Op, I);
2141  I->replaceAllUsesWith(NewInst);
2142  RedoInsts.insert(I);
2143  return dyn_cast<Instruction>(NewInst);
2144 }
2145 
2146 /// Canonicalize expressions that contain a negative floating-point constant
2147 /// of the following form:
2148 /// OtherOp + (subtree) -> OtherOp {+/-} (canonical subtree)
2149 /// (subtree) + OtherOp -> OtherOp {+/-} (canonical subtree)
2150 /// OtherOp - (subtree) -> OtherOp {+/-} (canonical subtree)
2151 ///
2152 /// The fadd/fsub opcode may be switched to allow folding a negation into the
2153 /// input instruction.
2154 Instruction *ReassociatePass::canonicalizeNegFPConstants(Instruction *I) {
2155  LLVM_DEBUG(dbgs() << "Combine negations for: " << *I << '\n');
2156  Value *X;
2157  Instruction *Op;
2159  if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2160  I = R;
2162  if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2163  I = R;
2165  if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2166  I = R;
2167  return I;
2168 }
2169 
2170 /// Inspect and optimize the given instruction. Note that erasing
2171 /// instructions is not allowed.
2172 void ReassociatePass::OptimizeInst(Instruction *I) {
2173  // Only consider operations that we understand.
2174  if (!isa<UnaryOperator>(I) && !isa<BinaryOperator>(I))
2175  return;
2176 
2177  if (I->getOpcode() == Instruction::Shl && isa<ConstantInt>(I->getOperand(1)))
2178  // If an operand of this shift is a reassociable multiply, or if the shift
2179  // is used by a reassociable multiply or add, turn into a multiply.
2180  if (isReassociableOp(I->getOperand(0), Instruction::Mul) ||
2181  (I->hasOneUse() &&
2182  (isReassociableOp(I->user_back(), Instruction::Mul) ||
2183  isReassociableOp(I->user_back(), Instruction::Add)))) {
2185  RedoInsts.insert(I);
2186  MadeChange = true;
2187  I = NI;
2188  }
2189 
2190  // Commute binary operators, to canonicalize the order of their operands.
2191  // This can potentially expose more CSE opportunities, and makes writing other
2192  // transformations simpler.
2193  if (I->isCommutative())
2194  canonicalizeOperands(I);
2195 
2196  // Canonicalize negative constants out of expressions.
2197  if (Instruction *Res = canonicalizeNegFPConstants(I))
2198  I = Res;
2199 
2200  // Don't optimize floating-point instructions unless they are 'fast'.
2201  if (I->getType()->isFPOrFPVectorTy() && !I->isFast())
2202  return;
2203 
2204  // Do not reassociate boolean (i1) expressions. We want to preserve the
2205  // original order of evaluation for short-circuited comparisons that
2206  // SimplifyCFG has folded to AND/OR expressions. If the expression
2207  // is not further optimized, it is likely to be transformed back to a
2208  // short-circuited form for code gen, and the source order may have been
2209  // optimized for the most likely conditions.
2210  if (I->getType()->isIntegerTy(1))
2211  return;
2212 
2213  // If this is a bitwise or instruction of operands
2214  // with no common bits set, convert it to X+Y.
2215  if (I->getOpcode() == Instruction::Or &&
2217  haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1),
2218  I->getModule()->getDataLayout(), /*AC=*/nullptr, I,
2219  /*DT=*/nullptr)) {
2221  RedoInsts.insert(I);
2222  MadeChange = true;
2223  I = NI;
2224  }
2225 
2226  // If this is a subtract instruction which is not already in negate form,
2227  // see if we can convert it to X+-Y.
2228  if (I->getOpcode() == Instruction::Sub) {
2229  if (ShouldBreakUpSubtract(I)) {
2230  Instruction *NI = BreakUpSubtract(I, RedoInsts);
2231  RedoInsts.insert(I);
2232  MadeChange = true;
2233  I = NI;
2234  } else if (match(I, m_Neg(m_Value()))) {
2235  // Otherwise, this is a negation. See if the operand is a multiply tree
2236  // and if this is not an inner node of a multiply tree.
2237  if (isReassociableOp(I->getOperand(1), Instruction::Mul) &&
2238  (!I->hasOneUse() ||
2239  !isReassociableOp(I->user_back(), Instruction::Mul))) {
2241  // If the negate was simplified, revisit the users to see if we can
2242  // reassociate further.
2243  for (User *U : NI->users()) {
2244  if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
2245  RedoInsts.insert(Tmp);
2246  }
2247  RedoInsts.insert(I);
2248  MadeChange = true;
2249  I = NI;
2250  }
2251  }
2252  } else if (I->getOpcode() == Instruction::FNeg ||
2253  I->getOpcode() == Instruction::FSub) {
2254  if (ShouldBreakUpSubtract(I)) {
2255  Instruction *NI = BreakUpSubtract(I, RedoInsts);
2256  RedoInsts.insert(I);
2257  MadeChange = true;
2258  I = NI;
2259  } else if (match(I, m_FNeg(m_Value()))) {
2260  // Otherwise, this is a negation. See if the operand is a multiply tree
2261  // and if this is not an inner node of a multiply tree.
2262  Value *Op = isa<BinaryOperator>(I) ? I->getOperand(1) :
2263  I->getOperand(0);
2264  if (isReassociableOp(Op, Instruction::FMul) &&
2265  (!I->hasOneUse() ||
2266  !isReassociableOp(I->user_back(), Instruction::FMul))) {
2267  // If the negate was simplified, revisit the users to see if we can
2268  // reassociate further.
2270  for (User *U : NI->users()) {
2271  if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
2272  RedoInsts.insert(Tmp);
2273  }
2274  RedoInsts.insert(I);
2275  MadeChange = true;
2276  I = NI;
2277  }
2278  }
2279  }
2280 
2281  // If this instruction is an associative binary operator, process it.
2282  if (!I->isAssociative()) return;
2283  BinaryOperator *BO = cast<BinaryOperator>(I);
2284 
2285  // If this is an interior node of a reassociable tree, ignore it until we
2286  // get to the root of the tree, to avoid N^2 analysis.
2287  unsigned Opcode = BO->getOpcode();
2288  if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) {
2289  // During the initial run we will get to the root of the tree.
2290  // But if we get here while we are redoing instructions, there is no
2291  // guarantee that the root will be visited. So Redo later
2292  if (BO->user_back() != BO &&
2293  BO->getParent() == BO->user_back()->getParent())
2294  RedoInsts.insert(BO->user_back());
2295  return;
2296  }
2297 
2298  // If this is an add tree that is used by a sub instruction, ignore it
2299  // until we process the subtract.
2300  if (BO->hasOneUse() && BO->getOpcode() == Instruction::Add &&
2301  cast<Instruction>(BO->user_back())->getOpcode() == Instruction::Sub)
2302  return;
2303  if (BO->hasOneUse() && BO->getOpcode() == Instruction::FAdd &&
2304  cast<Instruction>(BO->user_back())->getOpcode() == Instruction::FSub)
2305  return;
2306 
2307  ReassociateExpression(BO);
2308 }
2309 
2310 void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
2311  // First, walk the expression tree, linearizing the tree, collecting the
2312  // operand information.
2314  MadeChange |= LinearizeExprTree(I, Tree);
2316  Ops.reserve(Tree.size());
2317  for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
2318  RepeatedValue E = Tree[i];
2319  Ops.append(E.second.getZExtValue(),
2320  ValueEntry(getRank(E.first), E.first));
2321  }
2322 
2323  LLVM_DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n');
2324 
2325  // Now that we have linearized the tree to a list and have gathered all of
2326  // the operands and their ranks, sort the operands by their rank. Use a
2327  // stable_sort so that values with equal ranks will have their relative
2328  // positions maintained (and so the compiler is deterministic). Note that
2329  // this sorts so that the highest ranking values end up at the beginning of
2330  // the vector.
2331  llvm::stable_sort(Ops);
2332 
2333  // Now that we have the expression tree in a convenient
2334  // sorted form, optimize it globally if possible.
2335  if (Value *V = OptimizeExpression(I, Ops)) {
2336  if (V == I)
2337  // Self-referential expression in unreachable code.
2338  return;
2339  // This expression tree simplified to something that isn't a tree,
2340  // eliminate it.
2341  LLVM_DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n');
2342  I->replaceAllUsesWith(V);
2343  if (Instruction *VI = dyn_cast<Instruction>(V))
2344  if (I->getDebugLoc())
2345  VI->setDebugLoc(I->getDebugLoc());
2346  RedoInsts.insert(I);
2347  ++NumAnnihil;
2348  return;
2349  }
2350 
2351  // We want to sink immediates as deeply as possible except in the case where
2352  // this is a multiply tree used only by an add, and the immediate is a -1.
2353  // In this case we reassociate to put the negation on the outside so that we
2354  // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y
2355  if (I->hasOneUse()) {
2356  if (I->getOpcode() == Instruction::Mul &&
2357  cast<Instruction>(I->user_back())->getOpcode() == Instruction::Add &&
2358  isa<ConstantInt>(Ops.back().Op) &&
2359  cast<ConstantInt>(Ops.back().Op)->isMinusOne()) {
2360  ValueEntry Tmp = Ops.pop_back_val();
2361  Ops.insert(Ops.begin(), Tmp);
2362  } else if (I->getOpcode() == Instruction::FMul &&
2363  cast<Instruction>(I->user_back())->getOpcode() ==
2364  Instruction::FAdd &&
2365  isa<ConstantFP>(Ops.back().Op) &&
2366  cast<ConstantFP>(Ops.back().Op)->isExactlyValue(-1.0)) {
2367  ValueEntry Tmp = Ops.pop_back_val();
2368  Ops.insert(Ops.begin(), Tmp);
2369  }
2370  }
2371 
2372  LLVM_DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n');
2373 
2374  if (Ops.size() == 1) {
2375  if (Ops[0].Op == I)
2376  // Self-referential expression in unreachable code.
2377  return;
2378 
2379  // This expression tree simplified to something that isn't a tree,
2380  // eliminate it.
2381  I->replaceAllUsesWith(Ops[0].Op);
2382  if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op))
2383  OI->setDebugLoc(I->getDebugLoc());
2384  RedoInsts.insert(I);
2385  return;
2386  }
2387 
2388  if (Ops.size() > 2 && Ops.size() <= GlobalReassociateLimit) {
2389  // Find the pair with the highest count in the pairmap and move it to the
2390  // back of the list so that it can later be CSE'd.
2391  // example:
2392  // a*b*c*d*e
2393  // if c*e is the most "popular" pair, we can express this as
2394  // (((c*e)*d)*b)*a
2395  unsigned Max = 1;
2396  unsigned BestRank = 0;
2397  std::pair<unsigned, unsigned> BestPair;
2398  unsigned Idx = I->getOpcode() - Instruction::BinaryOpsBegin;
2399  for (unsigned i = 0; i < Ops.size() - 1; ++i)
2400  for (unsigned j = i + 1; j < Ops.size(); ++j) {
2401  unsigned Score = 0;
2402  Value *Op0 = Ops[i].Op;
2403  Value *Op1 = Ops[j].Op;
2404  if (std::less<Value *>()(Op1, Op0))
2405  std::swap(Op0, Op1);
2406  auto it = PairMap[Idx].find({Op0, Op1});
2407  if (it != PairMap[Idx].end()) {
2408  // Functions like BreakUpSubtract() can erase the Values we're using
2409  // as keys and create new Values after we built the PairMap. There's a
2410  // small chance that the new nodes can have the same address as
2411  // something already in the table. We shouldn't accumulate the stored
2412  // score in that case as it refers to the wrong Value.
2413  if (it->second.isValid())
2414  Score += it->second.Score;
2415  }
2416 
2417  unsigned MaxRank = std::max(Ops[i].Rank, Ops[j].Rank);
2418  if (Score > Max || (Score == Max && MaxRank < BestRank)) {
2419  BestPair = {i, j};
2420  Max = Score;
2421  BestRank = MaxRank;
2422  }
2423  }
2424  if (Max > 1) {
2425  auto Op0 = Ops[BestPair.first];
2426  auto Op1 = Ops[BestPair.second];
2427  Ops.erase(&Ops[BestPair.second]);
2428  Ops.erase(&Ops[BestPair.first]);
2429  Ops.push_back(Op0);
2430  Ops.push_back(Op1);
2431  }
2432  }
2433  // Now that we ordered and optimized the expressions, splat them back into
2434  // the expression tree, removing any unneeded nodes.
2435  RewriteExprTree(I, Ops);
2436 }
2437 
2438 void
2439 ReassociatePass::BuildPairMap(ReversePostOrderTraversal<Function *> &RPOT) {
2440  // Make a "pairmap" of how often each operand pair occurs.
2441  for (BasicBlock *BI : RPOT) {
2442  for (Instruction &I : *BI) {
2443  if (!I.isAssociative())
2444  continue;
2445 
2446  // Ignore nodes that aren't at the root of trees.
2447  if (I.hasOneUse() && I.user_back()->getOpcode() == I.getOpcode())
2448  continue;
2449 
2450  // Collect all operands in a single reassociable expression.
2451  // Since Reassociate has already been run once, we can assume things
2452  // are already canonical according to Reassociation's regime.
2453  SmallVector<Value *, 8> Worklist = { I.getOperand(0), I.getOperand(1) };
2455  while (!Worklist.empty() && Ops.size() <= GlobalReassociateLimit) {
2456  Value *Op = Worklist.pop_back_val();
2457  Instruction *OpI = dyn_cast<Instruction>(Op);
2458  if (!OpI || OpI->getOpcode() != I.getOpcode() || !OpI->hasOneUse()) {
2459  Ops.push_back(Op);
2460  continue;
2461  }
2462  // Be paranoid about self-referencing expressions in unreachable code.
2463  if (OpI->getOperand(0) != OpI)
2464  Worklist.push_back(OpI->getOperand(0));
2465  if (OpI->getOperand(1) != OpI)
2466  Worklist.push_back(OpI->getOperand(1));
2467  }
2468  // Skip extremely long expressions.
2469  if (Ops.size() > GlobalReassociateLimit)
2470  continue;
2471 
2472  // Add all pairwise combinations of operands to the pair map.
2473  unsigned BinaryIdx = I.getOpcode() - Instruction::BinaryOpsBegin;
2475  for (unsigned i = 0; i < Ops.size() - 1; ++i) {
2476  for (unsigned j = i + 1; j < Ops.size(); ++j) {
2477  // Canonicalize operand orderings.
2478  Value *Op0 = Ops[i];
2479  Value *Op1 = Ops[j];
2480  if (std::less<Value *>()(Op1, Op0))
2481  std::swap(Op0, Op1);
2482  if (!Visited.insert({Op0, Op1}).second)
2483  continue;
2484  auto res = PairMap[BinaryIdx].insert({{Op0, Op1}, {Op0, Op1, 1}});
2485  if (!res.second) {
2486  // If either key value has been erased then we've got the same
2487  // address by coincidence. That can't happen here because nothing is
2488  // erasing values but it can happen by the time we're querying the
2489  // map.
2490  assert(res.first->second.isValid() && "WeakVH invalidated");
2491  ++res.first->second.Score;
2492  }
2493  }
2494  }
2495  }
2496  }
2497 }
2498 
2500  // Get the functions basic blocks in Reverse Post Order. This order is used by
2501  // BuildRankMap to pre calculate ranks correctly. It also excludes dead basic
2502  // blocks (it has been seen that the analysis in this pass could hang when
2503  // analysing dead basic blocks).
2505 
2506  // Calculate the rank map for F.
2507  BuildRankMap(F, RPOT);
2508 
2509  // Build the pair map before running reassociate.
2510  // Technically this would be more accurate if we did it after one round
2511  // of reassociation, but in practice it doesn't seem to help much on
2512  // real-world code, so don't waste the compile time running reassociate
2513  // twice.
2514  // If a user wants, they could expicitly run reassociate twice in their
2515  // pass pipeline for further potential gains.
2516  // It might also be possible to update the pair map during runtime, but the
2517  // overhead of that may be large if there's many reassociable chains.
2518  BuildPairMap(RPOT);
2519 
2520  MadeChange = false;
2521 
2522  // Traverse the same blocks that were analysed by BuildRankMap.
2523  for (BasicBlock *BI : RPOT) {
2524  assert(RankMap.count(&*BI) && "BB should be ranked.");
2525  // Optimize every instruction in the basic block.
2526  for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;)
2527  if (isInstructionTriviallyDead(&*II)) {
2528  EraseInst(&*II++);
2529  } else {
2530  OptimizeInst(&*II);
2531  assert(II->getParent() == &*BI && "Moved to a different block!");
2532  ++II;
2533  }
2534 
2535  // Make a copy of all the instructions to be redone so we can remove dead
2536  // instructions.
2537  OrderedSet ToRedo(RedoInsts);
2538  // Iterate over all instructions to be reevaluated and remove trivially dead
2539  // instructions. If any operand of the trivially dead instruction becomes
2540  // dead mark it for deletion as well. Continue this process until all
2541  // trivially dead instructions have been removed.
2542  while (!ToRedo.empty()) {
2543  Instruction *I = ToRedo.pop_back_val();
2545  RecursivelyEraseDeadInsts(I, ToRedo);
2546  MadeChange = true;
2547  }
2548  }
2549 
2550  // Now that we have removed dead instructions, we can reoptimize the
2551  // remaining instructions.
2552  while (!RedoInsts.empty()) {
2553  Instruction *I = RedoInsts.front();
2554  RedoInsts.erase(RedoInsts.begin());
2556  EraseInst(I);
2557  else
2558  OptimizeInst(I);
2559  }
2560  }
2561 
2562  // We are done with the rank map and pair map.
2563  RankMap.clear();
2564  ValueRankMap.clear();
2565  for (auto &Entry : PairMap)
2566  Entry.clear();
2567 
2568  if (MadeChange) {
2569  PreservedAnalyses PA;
2570  PA.preserveSet<CFGAnalyses>();
2571  PA.preserve<AAManager>();
2572  PA.preserve<BasicAA>();
2573  PA.preserve<GlobalsAA>();
2574  return PA;
2575  }
2576 
2577  return PreservedAnalyses::all();
2578 }
2579 
2580 namespace {
2581 
2582  class ReassociateLegacyPass : public FunctionPass {
2583  ReassociatePass Impl;
2584 
2585  public:
2586  static char ID; // Pass identification, replacement for typeid
2587 
2588  ReassociateLegacyPass() : FunctionPass(ID) {
2590  }
2591 
2592  bool runOnFunction(Function &F) override {
2593  if (skipFunction(F))
2594  return false;
2595 
2596  FunctionAnalysisManager DummyFAM;
2597  auto PA = Impl.run(F, DummyFAM);
2598  return !PA.areAllPreserved();
2599  }
2600 
2601  void getAnalysisUsage(AnalysisUsage &AU) const override {
2602  AU.setPreservesCFG();
2606  }
2607  };
2608 
2609 } // end anonymous namespace
2610 
2611 char ReassociateLegacyPass::ID = 0;
2612 
2613 INITIALIZE_PASS(ReassociateLegacyPass, "reassociate",
2614  "Reassociate expressions", false, false)
2615 
2616 // Public interface to the Reassociate pass
2618  return new ReassociateLegacyPass();
2619 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::GlobalsAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: GlobalsModRef.h:132
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::BasicAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: BasicAliasAnalysis.h:233
ShouldBreakUpSubtract
static bool ShouldBreakUpSubtract(Instruction *Sub)
Return true if we should break up this subtract of X-Y into (X + -Y).
Definition: Reassociate.cpp:1019
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1221
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:100
LowerNegateToMultiply
static BinaryOperator * LowerNegateToMultiply(Instruction *Neg)
Replace 0-X with X*-1.
Definition: Reassociate.cpp:270
llvm
Definition: AllocatorList.h:23
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:705
llvm::haveNoCommonBitsSet
bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if LHS and RHS have no common bits set.
Definition: ValueTracking.cpp:283
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::RecurKind::Or
@ Or
Bitwise or logical OR of integers.
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:447
llvm::reassociate::ValueEntry
Definition: Reassociate.h:46
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
Scalar.h
llvm::Function
Definition: Function.h:61
llvm::lower_bound
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1615
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::ReassociatePass
Reassociate commutative expressions.
Definition: Reassociate.h:71
it
Reference model for inliner Oz decision policy Note this model is also referenced by test Transforms Inline ML tests if replacing it
Definition: README.txt:3
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
createAndInstr
static Value * createAndInstr(Instruction *InsertBefore, Value *Opnd, const APInt &ConstOpnd)
Helper function of CombineXorOpnd().
Definition: Reassociate.cpp:1280
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
collectMultiplyFactors
static bool collectMultiplyFactors(SmallVectorImpl< ValueEntry > &Ops, SmallVectorImpl< Factor > &Factors)
Build up a vector of value/power pairs factoring a product.
Definition: Reassociate.cpp:1768
ErrorHandling.h
llvm::Function::getEntryBlock
const BasicBlock & getEntryBlock() const
Definition: Function.h:748
llvm::IRBuilder<>
ValueTracking.h
Local.h
llvm::PatternMatch::m_APFloat
apfloat_match m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
Definition: PatternMatch.h:243
INITIALIZE_PASS
INITIALIZE_PASS(ReassociateLegacyPass, "reassociate", "Reassociate expressions", false, false) FunctionPass *llvm
Definition: Reassociate.cpp:2613
GlobalsModRef.h
llvm::Type::isFPOrFPVectorTy
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:190
APInt.h
llvm::FlagsOp
Definition: MachineIRBuilder.h:208
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
DenseMap.h
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1581
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
llvm::Instruction::setHasNoUnsignedWrap
void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
Definition: Instruction.cpp:120
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
Operator.h
llvm::Value::user_begin
user_iterator user_begin()
Definition: Value.h:410
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
EmitAddTreeOfValues
static Value * EmitAddTreeOfValues(Instruction *I, SmallVectorImpl< WeakTrackingVH > &Ops)
Emit a tree of add instructions, summing Ops together and returning the result.
Definition: Reassociate.cpp:1128
llvm::reassociate::XorOpnd::setSymbolicRank
void setSymbolicRank(unsigned R)
Definition: Reassociate.cpp:109
llvm::BinaryOperator::CreateNeg
NUW NUW NUW NUW Exact static Exact BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", Instruction *InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
Definition: Instructions.cpp:2565
llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:160
BasicAliasAnalysis.h
llvm::APInt::isMinValue
bool isMinValue() const
Determine if this is the smallest unsigned value.
Definition: APInt.h:442
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:129
llvm::PatternMatch::m_FAdd
BinaryOp_match< LHS, RHS, Instruction::FAdd > m_FAdd(const LHS &L, const RHS &R)
Definition: PatternMatch.h:965
llvm::PatternMatch::m_FSub
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:977
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:205
Instruction.h
llvm::PatternMatch::m_APInt
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:226
llvm::APInt::uge
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1313
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:160
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
IncorporateWeight
static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode)
Add the extra weight 'RHS' to the existing weight 'LHS', reducing the combined weight using any speci...
Definition: Reassociate.cpp:305
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:648
llvm::Instruction::setHasNoSignedWrap
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
Definition: Instruction.cpp:124
llvm::User
Definition: User.h:44
FindInOperandList
static unsigned FindInOperandList(const SmallVectorImpl< ValueEntry > &Ops, unsigned i, Value *X)
Scan backwards and forwards among values with the same rank as element i to see if X exists.
Definition: Reassociate.cpp:1102
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
InstrTypes.h
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
getNegatibleInsts
static void getNegatibleInsts(Value *V, SmallVectorImpl< Instruction * > &Candidates)
Recursively analyze an expression to build a list of instructions that have negative floating-point c...
Definition: Reassociate.cpp:2045
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:199
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::reassociate::XorOpnd::isInvalid
bool isInvalid() const
Definition: Reassociate.cpp:101
OptimizeAndOrXor
static Value * OptimizeAndOrXor(unsigned Opcode, SmallVectorImpl< ValueEntry > &Ops)
Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
Definition: Reassociate.cpp:1230
llvm::Instruction::isNilpotent
bool isNilpotent() const
Return true if the instruction is nilpotent:
Definition: Instruction.h:574
llvm::PatternMatch::m_Instruction
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:704
llvm::reassociate::XorOpnd
Utility class representing a non-constant Xor-operand.
Definition: Reassociate.cpp:97
llvm::ConstantExpr::getBinOpAbsorber
static Constant * getBinOpAbsorber(unsigned Opcode, Type *Ty)
Return the absorbing element for the given binary operation, i.e.
Definition: Constants.cpp:2832
llvm::reassociate::XorOpnd::isOrExpr
bool isOrExpr() const
Definition: Reassociate.cpp:102
llvm::Instruction::getOpcodeName
const char * getOpcodeName() const
Definition: Instruction.h:162
llvm::PatternMatch::m_FNeg
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
Definition: PatternMatch.h:1014
llvm::Constant::getAllOnesValue
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:405
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:395
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:154
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1631
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:255
APFloat.h
This file declares a class to represent arbitrary precision floating point values and provide a varie...
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1770
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:885
SmallPtrSet.h
PatternMatch.h
RepeatedValue
std::pair< Value *, APInt > RepeatedValue
Definition: Reassociate.cpp:378
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:96
Type.h
llvm::ConstantExpr::getFNeg
static Constant * getFNeg(Constant *C)
Definition: Constants.cpp:2652
llvm::APInt::isAllOnesValue
bool isAllOnesValue() const
Determine if all bits are set.
Definition: APInt.h:401
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::mayBeMemoryDependent
bool mayBeMemoryDependent(const Instruction &I)
Returns true if the result or effects of the given instructions I depend on or influence global memor...
Definition: ValueTracking.cpp:4608
llvm::initializeReassociateLegacyPassPass
void initializeReassociateLegacyPassPass(PassRegistry &)
CFG.h
llvm::InvokeInst
Invoke instruction.
Definition: Instructions.h:3686
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::ReassociatePass::OrderedSet
SetVector< AssertingVH< Instruction >, std::deque< AssertingVH< Instruction > >> OrderedSet
Definition: Reassociate.h:74
llvm::APInt::getOneBitSet
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:593
llvm::Instruction::andIRFlags
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
Definition: Instruction.cpp:292
BasicBlock.h
ConvertShiftToMul
static BinaryOperator * ConvertShiftToMul(Instruction *Shl)
If this is a shift of a reassociable multiply or is used by one, change this into a multiply by a con...
Definition: Reassociate.cpp:1072
llvm::APFloat
Definition: APFloat.h:701
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:128
llvm::reassociate::ValueEntry::Op
Value * Op
Definition: Reassociate.h:48
VI
@ VI
Definition: SIInstrInfo.cpp:7343
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::UnaryOperator::CreateFNegFMF
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:166
llvm::ConstantExpr::getBinOpIdentity
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2786
llvm::reassociate::XorOpnd::getConstPart
const APInt & getConstPart() const
Definition: Reassociate.cpp:106
isReassociableOp
static BinaryOperator * isReassociableOp(Value *V, unsigned Opcode)
Return true if V is an instruction of the specified opcode and if it only has one use.
Definition: Reassociate.cpp:149
llvm::Instruction::user_back
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:91
llvm::reassociate::XorOpnd::XorOpnd
XorOpnd(Value *V)
Definition: Reassociate.cpp:119
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:176
isInteresting
static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, ScalarEvolution *SE, LoopInfo *LI)
isInteresting - Test whether the given expression is "interesting" when used by the given expression,...
Definition: IVUsers.cpp:60
llvm::BasicAAWrapperPass
Legacy wrapper pass to provide the BasicAAResult object.
Definition: BasicAliasAnalysis.h:245
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:58
CreateNeg
static Instruction * CreateNeg(Value *S1, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
Definition: Reassociate.cpp:258
llvm::DenseMap
Definition: DenseMap.h:714
llvm::ConstantExpr::get
static Constant * get(unsigned Opcode, Constant *C1, unsigned Flags=0, Type *OnlyIfReducedTy=nullptr)
get - Return a unary operator constant expression, folding if possible.
Definition: Constants.cpp:2241
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::Value::clearSubclassOptionalData
void clearSubclassOptionalData()
Clear the optional flags contained in this value.
Definition: Value.h:553
llvm::ConstantExpr::getShl
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2740
NegateValue
static Value * NegateValue(Value *V, Instruction *BI, ReassociatePass::OrderedSet &ToRedo)
Insert instructions before the instruction pointed to by BI, that computes the negative version of th...
Definition: Reassociate.cpp:817
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:362
llvm::Instruction::isIdempotent
bool isIdempotent() const
Return true if the instruction is idempotent:
Definition: Instruction.h:560
llvm::APInt::getBoolValue
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:483
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Reassociate.h
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:958
CarmichaelShift
static unsigned CarmichaelShift(unsigned Bitwidth)
Returns k such that lambda(2^Bitwidth) = 2^k, where lambda is the Carmichael function.
Definition: Reassociate.cpp:292
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::reassociate::XorOpnd::getSymbolicPart
Value * getSymbolicPart() const
Definition: Reassociate.cpp:104
llvm::reassociate::Factor::Power
unsigned Power
Definition: Reassociate.h:61
llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:98
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:649
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:70
llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
PrintOps
static void PrintOps(Instruction *I, const SmallVectorImpl< ValueEntry > &Ops)
Print out the expression identified in the Ops list.
Definition: Reassociate.cpp:77
llvm::isInstructionTriviallyDead
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:389
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::BinaryOperator
Definition: InstrTypes.h:190
llvm::reassociate::XorOpnd::Invalidate
void Invalidate()
Definition: Reassociate.cpp:108
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1512
convertOrWithNoCommonBitsToAdd
static BinaryOperator * convertOrWithNoCommonBitsToAdd(Instruction *Or)
If we have (X|Y), and iff X and Y have no common bits set, transform this into (X+Y) to allow arithme...
Definition: Reassociate.cpp:1002
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::PreservedAnalyses::areAllPreserved
bool areAllPreserved() const
Test whether all analyses are preserved (and none are abandoned).
Definition: PassManager.h:330
FindSingleUseMultiplyFactors
static void FindSingleUseMultiplyFactors(Value *V, SmallVectorImpl< Value * > &Factors)
If V is a single-use multiply, recursively add its operands as factors, otherwise add V to the list o...
Definition: Reassociate.cpp:1214
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:116
llvm::reassociate::XorOpnd::getSymbolicRank
unsigned getSymbolicRank() const
Definition: Reassociate.cpp:105
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:526
llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:95
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
llvm::RecurKind::Mul
@ Mul
Product of integers.
llvm::APInt::ult
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1205
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:180
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:298
ValueHandle.h
llvm::createReassociatePass
FunctionPass * createReassociatePass()
Argument.h
llvm::APInt::getNullValue
static APInt getNullValue(unsigned numBits)
Get the '0' value.
Definition: APInt.h:574
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
j
return j(j<< 16)
llvm::Instruction::setFastMathFlags
void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
Definition: Instruction.cpp:209
Constant.h
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:123
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:1640
llvm::ReassociatePass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
Definition: Reassociate.cpp:2499
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:347
llvm::ConstantExpr::getNeg
static Constant * getNeg(Constant *C, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2645
get
Should compile to something r4 addze r3 instead we get
Definition: README.txt:24
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:53
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:208
llvm::ConstantFP::get
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:932
Casting.h
Function.h
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
PassManager.h
llvm::salvageDebugInfo
void salvageDebugInfo(Instruction &I)
Assuming the instruction I is going to be deleted, attempt to salvage debug users of I by writing the...
Definition: Local.cpp:1799
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:184
llvm::ReversePostOrderTraversal
Definition: PostOrderIterator.h:290
llvm::replaceDbgUsesWithUndef
bool replaceDbgUsesWithUndef(Instruction *I)
Replace all the uses of an SSA value in @llvm.dbg intrinsics with undef.
Definition: Local.cpp:564
llvm::codeview::VB
@ VB
Definition: CodeView.h:155
llvm::APInt::isNullValue
bool isNullValue() const
Determine if all bits are clear.
Definition: APInt.h:411
llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:768
Instructions.h
PostOrderIterator.h
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:191
llvm::pdb::DbgHeaderType::Max
@ Max
LinearizeExprTree
static bool LinearizeExprTree(Instruction *I, SmallVectorImpl< RepeatedValue > &Ops)
Given an associative binary expression, return the leaf nodes in Ops along with their weights (how ma...
Definition: Reassociate.cpp:453
SmallVector.h
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:365
User.h
CreateMul
static BinaryOperator * CreateMul(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
Definition: Reassociate.cpp:246
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1269
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::Value::deleteValue
void deleteValue()
Delete a pointer to a generic Value.
Definition: Value.cpp:111
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:143
llvm::MIPatternMatch::m_Neg
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
Definition: MIPatternMatch.h:495
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:350
Threshold
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:43
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::MIPatternMatch::m_Not
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
Definition: MIPatternMatch.h:503
CreateAdd
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
Definition: Reassociate.cpp:234
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:376
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
raw_ostream.h
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:624
llvm::BinaryOperator::Create
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Definition: Instructions.cpp:2549
Value.h
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1272
llvm::reassociate::Factor
Utility class representing a base and exponent pair which form one factor of some product.
Definition: Reassociate.h:59
llvm::MIPatternMatch::m_OneUse
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
Definition: MIPatternMatch.h:39
InitializePasses.h
buildMultiplyTree
static Value * buildMultiplyTree(IRBuilderBase &Builder, SmallVectorImpl< Value * > &Ops)
Build a tree of multiplies, computing the product of Ops.
Definition: Reassociate.cpp:1822
llvm::reassociate::XorOpnd::getValue
Value * getValue() const
Definition: Reassociate.cpp:103
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
BreakUpSubtract
static BinaryOperator * BreakUpSubtract(Instruction *Sub, ReassociatePass::OrderedSet &ToRedo)
If we have (X-Y), and if either X is an add, or if this is only used by an add, transform this into (...
Definition: Reassociate.cpp:1049
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:434
shouldConvertOrWithNoCommonBitsToAdd
static bool shouldConvertOrWithNoCommonBitsToAdd(Instruction *Or)
Return true if it may be profitable to convert this (X|Y) into (X+Y).
Definition: Reassociate.cpp:978
reassociate
nary reassociate
Definition: NaryReassociate.cpp:162
isLoadCombineCandidate
static bool isLoadCombineCandidate(Instruction *Or)
Definition: Reassociate.cpp:928
SetVector.h
llvm::Instruction::moveBefore
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:97
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:773