LLVM  15.0.0git
Reassociate.cpp
Go to the documentation of this file.
1 //===- Reassociate.cpp - Reassociate binary expressions -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass reassociates commutative expressions in an order that is designed
10 // to promote better constant propagation, GCSE, LICM, PRE, etc.
11 //
12 // For example: 4 + (x + 5) -> x + (4 + 5)
13 //
14 // In the implementation of this algorithm, constants are assigned rank = 0,
15 // function arguments are rank = 1, and other values are assigned ranks
16 // corresponding to the reverse post order traversal of current function
17 // (starting at 2), which effectively gives values in deep loops higher rank
18 // than values not in loops.
19 //
20 //===----------------------------------------------------------------------===//
21 
23 #include "llvm/ADT/APFloat.h"
24 #include "llvm/ADT/APInt.h"
25 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/SmallPtrSet.h"
28 #include "llvm/ADT/SmallSet.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/Statistic.h"
34 #include "llvm/IR/Argument.h"
35 #include "llvm/IR/BasicBlock.h"
36 #include "llvm/IR/CFG.h"
37 #include "llvm/IR/Constant.h"
38 #include "llvm/IR/Constants.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/IRBuilder.h"
41 #include "llvm/IR/InstrTypes.h"
42 #include "llvm/IR/Instruction.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/Operator.h"
45 #include "llvm/IR/PassManager.h"
46 #include "llvm/IR/PatternMatch.h"
47 #include "llvm/IR/Type.h"
48 #include "llvm/IR/User.h"
49 #include "llvm/IR/Value.h"
50 #include "llvm/IR/ValueHandle.h"
51 #include "llvm/InitializePasses.h"
52 #include "llvm/Pass.h"
53 #include "llvm/Support/Casting.h"
54 #include "llvm/Support/Debug.h"
56 #include "llvm/Transforms/Scalar.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <utility>
61 
62 using namespace llvm;
63 using namespace reassociate;
64 using namespace PatternMatch;
65 
66 #define DEBUG_TYPE "reassociate"
67 
68 STATISTIC(NumChanged, "Number of insts reassociated");
69 STATISTIC(NumAnnihil, "Number of expr tree annihilated");
70 STATISTIC(NumFactor , "Number of multiplies factored");
71 
72 #ifndef NDEBUG
73 /// Print out the expression identified in the Ops list.
75  Module *M = I->getModule();
76  dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
77  << *Ops[0].Op->getType() << '\t';
78  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
79  dbgs() << "[ ";
80  Ops[i].Op->printAsOperand(dbgs(), false, M);
81  dbgs() << ", #" << Ops[i].Rank << "] ";
82  }
83 }
84 #endif
85 
86 /// Utility class representing a non-constant Xor-operand. We classify
87 /// non-constant Xor-Operands into two categories:
88 /// C1) The operand is in the form "X & C", where C is a constant and C != ~0
89 /// C2)
90 /// C2.1) The operand is in the form of "X | C", where C is a non-zero
91 /// constant.
92 /// C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this
93 /// operand as "E | 0"
95 public:
96  XorOpnd(Value *V);
97 
98  bool isInvalid() const { return SymbolicPart == nullptr; }
99  bool isOrExpr() const { return isOr; }
100  Value *getValue() const { return OrigVal; }
101  Value *getSymbolicPart() const { return SymbolicPart; }
102  unsigned getSymbolicRank() const { return SymbolicRank; }
103  const APInt &getConstPart() const { return ConstPart; }
104 
105  void Invalidate() { SymbolicPart = OrigVal = nullptr; }
106  void setSymbolicRank(unsigned R) { SymbolicRank = R; }
107 
108 private:
109  Value *OrigVal;
110  Value *SymbolicPart;
111  APInt ConstPart;
112  unsigned SymbolicRank;
113  bool isOr;
114 };
115 
117  assert(!isa<ConstantInt>(V) && "No ConstantInt");
118  OrigVal = V;
119  Instruction *I = dyn_cast<Instruction>(V);
120  SymbolicRank = 0;
121 
122  if (I && (I->getOpcode() == Instruction::Or ||
123  I->getOpcode() == Instruction::And)) {
124  Value *V0 = I->getOperand(0);
125  Value *V1 = I->getOperand(1);
126  const APInt *C;
127  if (match(V0, m_APInt(C)))
128  std::swap(V0, V1);
129 
130  if (match(V1, m_APInt(C))) {
131  ConstPart = *C;
132  SymbolicPart = V0;
133  isOr = (I->getOpcode() == Instruction::Or);
134  return;
135  }
136  }
137 
138  // view the operand as "V | 0"
139  SymbolicPart = V;
140  ConstPart = APInt::getZero(V->getType()->getScalarSizeInBits());
141  isOr = true;
142 }
143 
144 /// Return true if V is an instruction of the specified opcode and if it
145 /// only has one use.
146 static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
147  auto *I = dyn_cast<Instruction>(V);
148  if (I && I->hasOneUse() && I->getOpcode() == Opcode)
149  if (!isa<FPMathOperator>(I) || I->isFast())
150  return cast<BinaryOperator>(I);
151  return nullptr;
152 }
153 
154 static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
155  unsigned Opcode2) {
156  auto *I = dyn_cast<Instruction>(V);
157  if (I && I->hasOneUse() &&
158  (I->getOpcode() == Opcode1 || I->getOpcode() == Opcode2))
159  if (!isa<FPMathOperator>(I) || I->isFast())
160  return cast<BinaryOperator>(I);
161  return nullptr;
162 }
163 
164 void ReassociatePass::BuildRankMap(Function &F,
166  unsigned Rank = 2;
167 
168  // Assign distinct ranks to function arguments.
169  for (auto &Arg : F.args()) {
170  ValueRankMap[&Arg] = ++Rank;
171  LLVM_DEBUG(dbgs() << "Calculated Rank[" << Arg.getName() << "] = " << Rank
172  << "\n");
173  }
174 
175  // Traverse basic blocks in ReversePostOrder.
176  for (BasicBlock *BB : RPOT) {
177  unsigned BBRank = RankMap[BB] = ++Rank << 16;
178 
179  // Walk the basic block, adding precomputed ranks for any instructions that
180  // we cannot move. This ensures that the ranks for these instructions are
181  // all different in the block.
182  for (Instruction &I : *BB)
184  ValueRankMap[&I] = ++BBRank;
185  }
186 }
187 
188 unsigned ReassociatePass::getRank(Value *V) {
189  Instruction *I = dyn_cast<Instruction>(V);
190  if (!I) {
191  if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument.
192  return 0; // Otherwise it's a global or constant, rank 0.
193  }
194 
195  if (unsigned Rank = ValueRankMap[I])
196  return Rank; // Rank already known?
197 
198  // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that
199  // we can reassociate expressions for code motion! Since we do not recurse
200  // for PHI nodes, we cannot have infinite recursion here, because there
201  // cannot be loops in the value graph that do not go through PHI nodes.
202  unsigned Rank = 0, MaxRank = RankMap[I->getParent()];
203  for (unsigned i = 0, e = I->getNumOperands(); i != e && Rank != MaxRank; ++i)
204  Rank = std::max(Rank, getRank(I->getOperand(i)));
205 
206  // If this is a 'not' or 'neg' instruction, do not count it for rank. This
207  // assures us that X and ~X will have the same rank.
208  if (!match(I, m_Not(m_Value())) && !match(I, m_Neg(m_Value())) &&
209  !match(I, m_FNeg(m_Value())))
210  ++Rank;
211 
212  LLVM_DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank
213  << "\n");
214 
215  return ValueRankMap[I] = Rank;
216 }
217 
218 // Canonicalize constants to RHS. Otherwise, sort the operands by rank.
219 void ReassociatePass::canonicalizeOperands(Instruction *I) {
220  assert(isa<BinaryOperator>(I) && "Expected binary operator.");
221  assert(I->isCommutative() && "Expected commutative operator.");
222 
223  Value *LHS = I->getOperand(0);
224  Value *RHS = I->getOperand(1);
225  if (LHS == RHS || isa<Constant>(RHS))
226  return;
227  if (isa<Constant>(LHS) || getRank(RHS) < getRank(LHS))
228  cast<BinaryOperator>(I)->swapOperands();
229 }
230 
231 static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name,
232  Instruction *InsertBefore, Value *FlagsOp) {
233  if (S1->getType()->isIntOrIntVectorTy())
234  return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore);
235  else {
236  BinaryOperator *Res =
237  BinaryOperator::CreateFAdd(S1, S2, Name, InsertBefore);
238  Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
239  return Res;
240  }
241 }
242 
243 static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name,
244  Instruction *InsertBefore, Value *FlagsOp) {
245  if (S1->getType()->isIntOrIntVectorTy())
246  return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore);
247  else {
248  BinaryOperator *Res =
249  BinaryOperator::CreateFMul(S1, S2, Name, InsertBefore);
250  Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
251  return Res;
252  }
253 }
254 
255 static Instruction *CreateNeg(Value *S1, const Twine &Name,
256  Instruction *InsertBefore, Value *FlagsOp) {
257  if (S1->getType()->isIntOrIntVectorTy())
258  return BinaryOperator::CreateNeg(S1, Name, InsertBefore);
259 
260  if (auto *FMFSource = dyn_cast<Instruction>(FlagsOp))
261  return UnaryOperator::CreateFNegFMF(S1, FMFSource, Name, InsertBefore);
262 
263  return UnaryOperator::CreateFNeg(S1, Name, InsertBefore);
264 }
265 
266 /// Replace 0-X with X*-1.
268  assert((isa<UnaryOperator>(Neg) || isa<BinaryOperator>(Neg)) &&
269  "Expected a Negate!");
270  // FIXME: It's not safe to lower a unary FNeg into a FMul by -1.0.
271  unsigned OpNo = isa<BinaryOperator>(Neg) ? 1 : 0;
272  Type *Ty = Neg->getType();
273  Constant *NegOne = Ty->isIntOrIntVectorTy() ?
275 
276  BinaryOperator *Res = CreateMul(Neg->getOperand(OpNo), NegOne, "", Neg, Neg);
277  Neg->setOperand(OpNo, Constant::getNullValue(Ty)); // Drop use of op.
278  Res->takeName(Neg);
279  Neg->replaceAllUsesWith(Res);
280  Res->setDebugLoc(Neg->getDebugLoc());
281  return Res;
282 }
283 
284 /// Returns k such that lambda(2^Bitwidth) = 2^k, where lambda is the Carmichael
285 /// function. This means that x^(2^k) === 1 mod 2^Bitwidth for
286 /// every odd x, i.e. x^(2^k) = 1 for every odd x in Bitwidth-bit arithmetic.
287 /// Note that 0 <= k < Bitwidth, and if Bitwidth > 3 then x^(2^k) = 0 for every
288 /// even x in Bitwidth-bit arithmetic.
289 static unsigned CarmichaelShift(unsigned Bitwidth) {
290  if (Bitwidth < 3)
291  return Bitwidth - 1;
292  return Bitwidth - 2;
293 }
294 
295 /// Add the extra weight 'RHS' to the existing weight 'LHS',
296 /// reducing the combined weight using any special properties of the operation.
297 /// The existing weight LHS represents the computation X op X op ... op X where
298 /// X occurs LHS times. The combined weight represents X op X op ... op X with
299 /// X occurring LHS + RHS times. If op is "Xor" for example then the combined
300 /// operation is equivalent to X if LHS + RHS is odd, or 0 if LHS + RHS is even;
301 /// the routine returns 1 in LHS in the first case, and 0 in LHS in the second.
302 static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) {
303  // If we were working with infinite precision arithmetic then the combined
304  // weight would be LHS + RHS. But we are using finite precision arithmetic,
305  // and the APInt sum LHS + RHS may not be correct if it wraps (it is correct
306  // for nilpotent operations and addition, but not for idempotent operations
307  // and multiplication), so it is important to correctly reduce the combined
308  // weight back into range if wrapping would be wrong.
309 
310  // If RHS is zero then the weight didn't change.
311  if (RHS.isMinValue())
312  return;
313  // If LHS is zero then the combined weight is RHS.
314  if (LHS.isMinValue()) {
315  LHS = RHS;
316  return;
317  }
318  // From this point on we know that neither LHS nor RHS is zero.
319 
320  if (Instruction::isIdempotent(Opcode)) {
321  // Idempotent means X op X === X, so any non-zero weight is equivalent to a
322  // weight of 1. Keeping weights at zero or one also means that wrapping is
323  // not a problem.
324  assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
325  return; // Return a weight of 1.
326  }
327  if (Instruction::isNilpotent(Opcode)) {
328  // Nilpotent means X op X === 0, so reduce weights modulo 2.
329  assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
330  LHS = 0; // 1 + 1 === 0 modulo 2.
331  return;
332  }
333  if (Opcode == Instruction::Add || Opcode == Instruction::FAdd) {
334  // TODO: Reduce the weight by exploiting nsw/nuw?
335  LHS += RHS;
336  return;
337  }
338 
339  assert((Opcode == Instruction::Mul || Opcode == Instruction::FMul) &&
340  "Unknown associative operation!");
341  unsigned Bitwidth = LHS.getBitWidth();
342  // If CM is the Carmichael number then a weight W satisfying W >= CM+Bitwidth
343  // can be replaced with W-CM. That's because x^W=x^(W-CM) for every Bitwidth
344  // bit number x, since either x is odd in which case x^CM = 1, or x is even in
345  // which case both x^W and x^(W - CM) are zero. By subtracting off multiples
346  // of CM like this weights can always be reduced to the range [0, CM+Bitwidth)
347  // which by a happy accident means that they can always be represented using
348  // Bitwidth bits.
349  // TODO: Reduce the weight by exploiting nsw/nuw? (Could do much better than
350  // the Carmichael number).
351  if (Bitwidth > 3) {
352  /// CM - The value of Carmichael's lambda function.
353  APInt CM = APInt::getOneBitSet(Bitwidth, CarmichaelShift(Bitwidth));
354  // Any weight W >= Threshold can be replaced with W - CM.
355  APInt Threshold = CM + Bitwidth;
356  assert(LHS.ult(Threshold) && RHS.ult(Threshold) && "Weights not reduced!");
357  // For Bitwidth 4 or more the following sum does not overflow.
358  LHS += RHS;
359  while (LHS.uge(Threshold))
360  LHS -= CM;
361  } else {
362  // To avoid problems with overflow do everything the same as above but using
363  // a larger type.
364  unsigned CM = 1U << CarmichaelShift(Bitwidth);
365  unsigned Threshold = CM + Bitwidth;
366  assert(LHS.getZExtValue() < Threshold && RHS.getZExtValue() < Threshold &&
367  "Weights not reduced!");
368  unsigned Total = LHS.getZExtValue() + RHS.getZExtValue();
369  while (Total >= Threshold)
370  Total -= CM;
371  LHS = Total;
372  }
373 }
374 
375 using RepeatedValue = std::pair<Value*, APInt>;
376 
377 /// Given an associative binary expression, return the leaf
378 /// nodes in Ops along with their weights (how many times the leaf occurs). The
379 /// original expression is the same as
380 /// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times
381 /// op
382 /// (Ops[1].first op Ops[1].first op ... Ops[1].first) <- Ops[1].second times
383 /// op
384 /// ...
385 /// op
386 /// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times
387 ///
388 /// Note that the values Ops[0].first, ..., Ops[N].first are all distinct.
389 ///
390 /// This routine may modify the function, in which case it returns 'true'. The
391 /// changes it makes may well be destructive, changing the value computed by 'I'
392 /// to something completely different. Thus if the routine returns 'true' then
393 /// you MUST either replace I with a new expression computed from the Ops array,
394 /// or use RewriteExprTree to put the values back in.
395 ///
396 /// A leaf node is either not a binary operation of the same kind as the root
397 /// node 'I' (i.e. is not a binary operator at all, or is, but with a different
398 /// opcode), or is the same kind of binary operator but has a use which either
399 /// does not belong to the expression, or does belong to the expression but is
400 /// a leaf node. Every leaf node has at least one use that is a non-leaf node
401 /// of the expression, while for non-leaf nodes (except for the root 'I') every
402 /// use is a non-leaf node of the expression.
403 ///
404 /// For example:
405 /// expression graph node names
406 ///
407 /// + | I
408 /// / \ |
409 /// + + | A, B
410 /// / \ / \ |
411 /// * + * | C, D, E
412 /// / \ / \ / \ |
413 /// + * | F, G
414 ///
415 /// The leaf nodes are C, E, F and G. The Ops array will contain (maybe not in
416 /// that order) (C, 1), (E, 1), (F, 2), (G, 2).
417 ///
418 /// The expression is maximal: if some instruction is a binary operator of the
419 /// same kind as 'I', and all of its uses are non-leaf nodes of the expression,
420 /// then the instruction also belongs to the expression, is not a leaf node of
421 /// it, and its operands also belong to the expression (but may be leaf nodes).
422 ///
423 /// NOTE: This routine will set operands of non-leaf non-root nodes to undef in
424 /// order to ensure that every non-root node in the expression has *exactly one*
425 /// use by a non-leaf node of the expression. This destruction means that the
426 /// caller MUST either replace 'I' with a new expression or use something like
427 /// RewriteExprTree to put the values back in if the routine indicates that it
428 /// made a change by returning 'true'.
429 ///
430 /// In the above example either the right operand of A or the left operand of B
431 /// will be replaced by undef. If it is B's operand then this gives:
432 ///
433 /// + | I
434 /// / \ |
435 /// + + | A, B - operand of B replaced with undef
436 /// / \ \ |
437 /// * + * | C, D, E
438 /// / \ / \ / \ |
439 /// + * | F, G
440 ///
441 /// Note that such undef operands can only be reached by passing through 'I'.
442 /// For example, if you visit operands recursively starting from a leaf node
443 /// then you will never see such an undef operand unless you get back to 'I',
444 /// which requires passing through a phi node.
445 ///
446 /// Note that this routine may also mutate binary operators of the wrong type
447 /// that have all uses inside the expression (i.e. only used by non-leaf nodes
448 /// of the expression) if it can turn them into binary operators of the right
449 /// type and thus make the expression bigger.
452  assert((isa<UnaryOperator>(I) || isa<BinaryOperator>(I)) &&
453  "Expected a UnaryOperator or BinaryOperator!");
454  LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
455  unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits();
456  unsigned Opcode = I->getOpcode();
457  assert(I->isAssociative() && I->isCommutative() &&
458  "Expected an associative and commutative operation!");
459 
460  // Visit all operands of the expression, keeping track of their weight (the
461  // number of paths from the expression root to the operand, or if you like
462  // the number of times that operand occurs in the linearized expression).
463  // For example, if I = X + A, where X = A + B, then I, X and B have weight 1
464  // while A has weight two.
465 
466  // Worklist of non-leaf nodes (their operands are in the expression too) along
467  // with their weights, representing a certain number of paths to the operator.
468  // If an operator occurs in the worklist multiple times then we found multiple
469  // ways to get to it.
470  SmallVector<std::pair<Instruction*, APInt>, 8> Worklist; // (Op, Weight)
471  Worklist.push_back(std::make_pair(I, APInt(Bitwidth, 1)));
472  bool Changed = false;
473 
474  // Leaves of the expression are values that either aren't the right kind of
475  // operation (eg: a constant, or a multiply in an add tree), or are, but have
476  // some uses that are not inside the expression. For example, in I = X + X,
477  // X = A + B, the value X has two uses (by I) that are in the expression. If
478  // X has any other uses, for example in a return instruction, then we consider
479  // X to be a leaf, and won't analyze it further. When we first visit a value,
480  // if it has more than one use then at first we conservatively consider it to
481  // be a leaf. Later, as the expression is explored, we may discover some more
482  // uses of the value from inside the expression. If all uses turn out to be
483  // from within the expression (and the value is a binary operator of the right
484  // kind) then the value is no longer considered to be a leaf, and its operands
485  // are explored.
486 
487  // Leaves - Keeps track of the set of putative leaves as well as the number of
488  // paths to each leaf seen so far.
489  using LeafMap = DenseMap<Value *, APInt>;
490  LeafMap Leaves; // Leaf -> Total weight so far.
491  SmallVector<Value *, 8> LeafOrder; // Ensure deterministic leaf output order.
492 
493 #ifndef NDEBUG
494  SmallPtrSet<Value *, 8> Visited; // For checking the iteration scheme.
495 #endif
496  while (!Worklist.empty()) {
497  std::pair<Instruction*, APInt> P = Worklist.pop_back_val();
498  I = P.first; // We examine the operands of this binary operator.
499 
500  for (unsigned OpIdx = 0; OpIdx < I->getNumOperands(); ++OpIdx) { // Visit operands.
501  Value *Op = I->getOperand(OpIdx);
502  APInt Weight = P.second; // Number of paths to this operand.
503  LLVM_DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
504  assert(!Op->use_empty() && "No uses, so how did we get to it?!");
505 
506  // If this is a binary operation of the right kind with only one use then
507  // add its operands to the expression.
508  if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
509  assert(Visited.insert(Op).second && "Not first visit!");
510  LLVM_DEBUG(dbgs() << "DIRECT ADD: " << *Op << " (" << Weight << ")\n");
511  Worklist.push_back(std::make_pair(BO, Weight));
512  continue;
513  }
514 
515  // Appears to be a leaf. Is the operand already in the set of leaves?
516  LeafMap::iterator It = Leaves.find(Op);
517  if (It == Leaves.end()) {
518  // Not in the leaf map. Must be the first time we saw this operand.
519  assert(Visited.insert(Op).second && "Not first visit!");
520  if (!Op->hasOneUse()) {
521  // This value has uses not accounted for by the expression, so it is
522  // not safe to modify. Mark it as being a leaf.
523  LLVM_DEBUG(dbgs()
524  << "ADD USES LEAF: " << *Op << " (" << Weight << ")\n");
525  LeafOrder.push_back(Op);
526  Leaves[Op] = Weight;
527  continue;
528  }
529  // No uses outside the expression, try morphing it.
530  } else {
531  // Already in the leaf map.
532  assert(It != Leaves.end() && Visited.count(Op) &&
533  "In leaf map but not visited!");
534 
535  // Update the number of paths to the leaf.
536  IncorporateWeight(It->second, Weight, Opcode);
537 
538 #if 0 // TODO: Re-enable once PR13021 is fixed.
539  // The leaf already has one use from inside the expression. As we want
540  // exactly one such use, drop this new use of the leaf.
541  assert(!Op->hasOneUse() && "Only one use, but we got here twice!");
542  I->setOperand(OpIdx, UndefValue::get(I->getType()));
543  Changed = true;
544 
545  // If the leaf is a binary operation of the right kind and we now see
546  // that its multiple original uses were in fact all by nodes belonging
547  // to the expression, then no longer consider it to be a leaf and add
548  // its operands to the expression.
549  if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
550  LLVM_DEBUG(dbgs() << "UNLEAF: " << *Op << " (" << It->second << ")\n");
551  Worklist.push_back(std::make_pair(BO, It->second));
552  Leaves.erase(It);
553  continue;
554  }
555 #endif
556 
557  // If we still have uses that are not accounted for by the expression
558  // then it is not safe to modify the value.
559  if (!Op->hasOneUse())
560  continue;
561 
562  // No uses outside the expression, try morphing it.
563  Weight = It->second;
564  Leaves.erase(It); // Since the value may be morphed below.
565  }
566 
567  // At this point we have a value which, first of all, is not a binary
568  // expression of the right kind, and secondly, is only used inside the
569  // expression. This means that it can safely be modified. See if we
570  // can usefully morph it into an expression of the right kind.
571  assert((!isa<Instruction>(Op) ||
572  cast<Instruction>(Op)->getOpcode() != Opcode
573  || (isa<FPMathOperator>(Op) &&
574  !cast<Instruction>(Op)->isFast())) &&
575  "Should have been handled above!");
576  assert(Op->hasOneUse() && "Has uses outside the expression tree!");
577 
578  // If this is a multiply expression, turn any internal negations into
579  // multiplies by -1 so they can be reassociated.
580  if (Instruction *Tmp = dyn_cast<Instruction>(Op))
581  if ((Opcode == Instruction::Mul && match(Tmp, m_Neg(m_Value()))) ||
582  (Opcode == Instruction::FMul && match(Tmp, m_FNeg(m_Value())))) {
583  LLVM_DEBUG(dbgs()
584  << "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
585  Tmp = LowerNegateToMultiply(Tmp);
586  LLVM_DEBUG(dbgs() << *Tmp << '\n');
587  Worklist.push_back(std::make_pair(Tmp, Weight));
588  Changed = true;
589  continue;
590  }
591 
592  // Failed to morph into an expression of the right type. This really is
593  // a leaf.
594  LLVM_DEBUG(dbgs() << "ADD LEAF: " << *Op << " (" << Weight << ")\n");
595  assert(!isReassociableOp(Op, Opcode) && "Value was morphed?");
596  LeafOrder.push_back(Op);
597  Leaves[Op] = Weight;
598  }
599  }
600 
601  // The leaves, repeated according to their weights, represent the linearized
602  // form of the expression.
603  for (unsigned i = 0, e = LeafOrder.size(); i != e; ++i) {
604  Value *V = LeafOrder[i];
605  LeafMap::iterator It = Leaves.find(V);
606  if (It == Leaves.end())
607  // Node initially thought to be a leaf wasn't.
608  continue;
609  assert(!isReassociableOp(V, Opcode) && "Shouldn't be a leaf!");
610  APInt Weight = It->second;
611  if (Weight.isMinValue())
612  // Leaf already output or weight reduction eliminated it.
613  continue;
614  // Ensure the leaf is only output once.
615  It->second = 0;
616  Ops.push_back(std::make_pair(V, Weight));
617  }
618 
619  // For nilpotent operations or addition there may be no operands, for example
620  // because the expression was "X xor X" or consisted of 2^Bitwidth additions:
621  // in both cases the weight reduces to 0 causing the value to be skipped.
622  if (Ops.empty()) {
623  Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType());
624  assert(Identity && "Associative operation without identity!");
625  Ops.emplace_back(Identity, APInt(Bitwidth, 1));
626  }
627 
628  return Changed;
629 }
630 
631 /// Now that the operands for this expression tree are
632 /// linearized and optimized, emit them in-order.
633 void ReassociatePass::RewriteExprTree(BinaryOperator *I,
635  assert(Ops.size() > 1 && "Single values should be used directly!");
636 
637  // Since our optimizations should never increase the number of operations, the
638  // new expression can usually be written reusing the existing binary operators
639  // from the original expression tree, without creating any new instructions,
640  // though the rewritten expression may have a completely different topology.
641  // We take care to not change anything if the new expression will be the same
642  // as the original. If more than trivial changes (like commuting operands)
643  // were made then we are obliged to clear out any optional subclass data like
644  // nsw flags.
645 
646  /// NodesToRewrite - Nodes from the original expression available for writing
647  /// the new expression into.
648  SmallVector<BinaryOperator*, 8> NodesToRewrite;
649  unsigned Opcode = I->getOpcode();
650  BinaryOperator *Op = I;
651 
652  /// NotRewritable - The operands being written will be the leaves of the new
653  /// expression and must not be used as inner nodes (via NodesToRewrite) by
654  /// mistake. Inner nodes are always reassociable, and usually leaves are not
655  /// (if they were they would have been incorporated into the expression and so
656  /// would not be leaves), so most of the time there is no danger of this. But
657  /// in rare cases a leaf may become reassociable if an optimization kills uses
658  /// of it, or it may momentarily become reassociable during rewriting (below)
659  /// due it being removed as an operand of one of its uses. Ensure that misuse
660  /// of leaf nodes as inner nodes cannot occur by remembering all of the future
661  /// leaves and refusing to reuse any of them as inner nodes.
662  SmallPtrSet<Value*, 8> NotRewritable;
663  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
664  NotRewritable.insert(Ops[i].Op);
665 
666  // ExpressionChanged - Non-null if the rewritten expression differs from the
667  // original in some non-trivial way, requiring the clearing of optional flags.
668  // Flags are cleared from the operator in ExpressionChanged up to I inclusive.
669  BinaryOperator *ExpressionChanged = nullptr;
670  for (unsigned i = 0; ; ++i) {
671  // The last operation (which comes earliest in the IR) is special as both
672  // operands will come from Ops, rather than just one with the other being
673  // a subexpression.
674  if (i+2 == Ops.size()) {
675  Value *NewLHS = Ops[i].Op;
676  Value *NewRHS = Ops[i+1].Op;
677  Value *OldLHS = Op->getOperand(0);
678  Value *OldRHS = Op->getOperand(1);
679 
680  if (NewLHS == OldLHS && NewRHS == OldRHS)
681  // Nothing changed, leave it alone.
682  break;
683 
684  if (NewLHS == OldRHS && NewRHS == OldLHS) {
685  // The order of the operands was reversed. Swap them.
686  LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
687  Op->swapOperands();
688  LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
689  MadeChange = true;
690  ++NumChanged;
691  break;
692  }
693 
694  // The new operation differs non-trivially from the original. Overwrite
695  // the old operands with the new ones.
696  LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
697  if (NewLHS != OldLHS) {
698  BinaryOperator *BO = isReassociableOp(OldLHS, Opcode);
699  if (BO && !NotRewritable.count(BO))
700  NodesToRewrite.push_back(BO);
701  Op->setOperand(0, NewLHS);
702  }
703  if (NewRHS != OldRHS) {
704  BinaryOperator *BO = isReassociableOp(OldRHS, Opcode);
705  if (BO && !NotRewritable.count(BO))
706  NodesToRewrite.push_back(BO);
707  Op->setOperand(1, NewRHS);
708  }
709  LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
710 
711  ExpressionChanged = Op;
712  MadeChange = true;
713  ++NumChanged;
714 
715  break;
716  }
717 
718  // Not the last operation. The left-hand side will be a sub-expression
719  // while the right-hand side will be the current element of Ops.
720  Value *NewRHS = Ops[i].Op;
721  if (NewRHS != Op->getOperand(1)) {
722  LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
723  if (NewRHS == Op->getOperand(0)) {
724  // The new right-hand side was already present as the left operand. If
725  // we are lucky then swapping the operands will sort out both of them.
726  Op->swapOperands();
727  } else {
728  // Overwrite with the new right-hand side.
729  BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode);
730  if (BO && !NotRewritable.count(BO))
731  NodesToRewrite.push_back(BO);
732  Op->setOperand(1, NewRHS);
733  ExpressionChanged = Op;
734  }
735  LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
736  MadeChange = true;
737  ++NumChanged;
738  }
739 
740  // Now deal with the left-hand side. If this is already an operation node
741  // from the original expression then just rewrite the rest of the expression
742  // into it.
743  BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode);
744  if (BO && !NotRewritable.count(BO)) {
745  Op = BO;
746  continue;
747  }
748 
749  // Otherwise, grab a spare node from the original expression and use that as
750  // the left-hand side. If there are no nodes left then the optimizers made
751  // an expression with more nodes than the original! This usually means that
752  // they did something stupid but it might mean that the problem was just too
753  // hard (finding the mimimal number of multiplications needed to realize a
754  // multiplication expression is NP-complete). Whatever the reason, smart or
755  // stupid, create a new node if there are none left.
756  BinaryOperator *NewOp;
757  if (NodesToRewrite.empty()) {
758  Constant *Undef = UndefValue::get(I->getType());
760  Undef, Undef, "", I);
761  if (NewOp->getType()->isFPOrFPVectorTy())
762  NewOp->setFastMathFlags(I->getFastMathFlags());
763  } else {
764  NewOp = NodesToRewrite.pop_back_val();
765  }
766 
767  LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
768  Op->setOperand(0, NewOp);
769  LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
770  ExpressionChanged = Op;
771  MadeChange = true;
772  ++NumChanged;
773  Op = NewOp;
774  }
775 
776  // If the expression changed non-trivially then clear out all subclass data
777  // starting from the operator specified in ExpressionChanged, and compactify
778  // the operators to just before the expression root to guarantee that the
779  // expression tree is dominated by all of Ops.
780  if (ExpressionChanged)
781  do {
782  // Preserve FastMathFlags.
783  if (isa<FPMathOperator>(I)) {
784  FastMathFlags Flags = I->getFastMathFlags();
785  ExpressionChanged->clearSubclassOptionalData();
786  ExpressionChanged->setFastMathFlags(Flags);
787  } else
788  ExpressionChanged->clearSubclassOptionalData();
789 
790  if (ExpressionChanged == I)
791  break;
792 
793  // Discard any debug info related to the expressions that has changed (we
794  // can leave debug infor related to the root, since the result of the
795  // expression tree should be the same even after reassociation).
796  replaceDbgUsesWithUndef(ExpressionChanged);
797 
798  ExpressionChanged->moveBefore(I);
799  ExpressionChanged = cast<BinaryOperator>(*ExpressionChanged->user_begin());
800  } while (true);
801 
802  // Throw away any left over nodes from the original expression.
803  for (unsigned i = 0, e = NodesToRewrite.size(); i != e; ++i)
804  RedoInsts.insert(NodesToRewrite[i]);
805 }
806 
807 /// Insert instructions before the instruction pointed to by BI,
808 /// that computes the negative version of the value specified. The negative
809 /// version of the value is returned, and BI is left pointing at the instruction
810 /// that should be processed next by the reassociation pass.
811 /// Also add intermediate instructions to the redo list that are modified while
812 /// pushing the negates through adds. These will be revisited to see if
813 /// additional opportunities have been exposed.
815  ReassociatePass::OrderedSet &ToRedo) {
816  if (auto *C = dyn_cast<Constant>(V))
819 
820  // We are trying to expose opportunity for reassociation. One of the things
821  // that we want to do to achieve this is to push a negation as deep into an
822  // expression chain as possible, to expose the add instructions. In practice,
823  // this means that we turn this:
824  // X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D
825  // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate
826  // the constants. We assume that instcombine will clean up the mess later if
827  // we introduce tons of unnecessary negation instructions.
828  //
829  if (BinaryOperator *I =
830  isReassociableOp(V, Instruction::Add, Instruction::FAdd)) {
831  // Push the negates through the add.
832  I->setOperand(0, NegateValue(I->getOperand(0), BI, ToRedo));
833  I->setOperand(1, NegateValue(I->getOperand(1), BI, ToRedo));
834  if (I->getOpcode() == Instruction::Add) {
835  I->setHasNoUnsignedWrap(false);
836  I->setHasNoSignedWrap(false);
837  }
838 
839  // We must move the add instruction here, because the neg instructions do
840  // not dominate the old add instruction in general. By moving it, we are
841  // assured that the neg instructions we just inserted dominate the
842  // instruction we are about to insert after them.
843  //
844  I->moveBefore(BI);
845  I->setName(I->getName()+".neg");
846 
847  // Add the intermediate negates to the redo list as processing them later
848  // could expose more reassociating opportunities.
849  ToRedo.insert(I);
850  return I;
851  }
852 
853  // Okay, we need to materialize a negated version of V with an instruction.
854  // Scan the use lists of V to see if we have one already.
855  for (User *U : V->users()) {
856  if (!match(U, m_Neg(m_Value())) && !match(U, m_FNeg(m_Value())))
857  continue;
858 
859  // We found one! Now we have to make sure that the definition dominates
860  // this use. We do this by moving it to the entry block (if it is a
861  // non-instruction value) or right after the definition. These negates will
862  // be zapped by reassociate later, so we don't need much finesse here.
863  Instruction *TheNeg = cast<Instruction>(U);
864 
865  // Verify that the negate is in this function, V might be a constant expr.
866  if (TheNeg->getParent()->getParent() != BI->getParent()->getParent())
867  continue;
868 
869  bool FoundCatchSwitch = false;
870 
871  BasicBlock::iterator InsertPt;
872  if (Instruction *InstInput = dyn_cast<Instruction>(V)) {
873  if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) {
874  InsertPt = II->getNormalDest()->begin();
875  } else {
876  InsertPt = ++InstInput->getIterator();
877  }
878 
879  const BasicBlock *BB = InsertPt->getParent();
880 
881  // Make sure we don't move anything before PHIs or exception
882  // handling pads.
883  while (InsertPt != BB->end() && (isa<PHINode>(InsertPt) ||
884  InsertPt->isEHPad())) {
885  if (isa<CatchSwitchInst>(InsertPt))
886  // A catchswitch cannot have anything in the block except
887  // itself and PHIs. We'll bail out below.
888  FoundCatchSwitch = true;
889  ++InsertPt;
890  }
891  } else {
892  InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin();
893  }
894 
895  // We found a catchswitch in the block where we want to move the
896  // neg. We cannot move anything into that block. Bail and just
897  // create the neg before BI, as if we hadn't found an existing
898  // neg.
899  if (FoundCatchSwitch)
900  break;
901 
902  TheNeg->moveBefore(&*InsertPt);
903  if (TheNeg->getOpcode() == Instruction::Sub) {
904  TheNeg->setHasNoUnsignedWrap(false);
905  TheNeg->setHasNoSignedWrap(false);
906  } else {
907  TheNeg->andIRFlags(BI);
908  }
909  ToRedo.insert(TheNeg);
910  return TheNeg;
911  }
912 
913  // Insert a 'neg' instruction that subtracts the value from zero to get the
914  // negation.
915  Instruction *NewNeg = CreateNeg(V, V->getName() + ".neg", BI, BI);
916  ToRedo.insert(NewNeg);
917  return NewNeg;
918 }
919 
920 // See if this `or` looks like an load widening reduction, i.e. that it
921 // consists of an `or`/`shl`/`zext`/`load` nodes only. Note that we don't
922 // ensure that the pattern is *really* a load widening reduction,
923 // we do not ensure that it can really be replaced with a widened load,
924 // only that it mostly looks like one.
928 
929  auto Enqueue = [&](Value *V) {
930  auto *I = dyn_cast<Instruction>(V);
931  // Each node of an `or` reduction must be an instruction,
932  if (!I)
933  return false; // Node is certainly not part of an `or` load reduction.
934  // Only process instructions we have never processed before.
935  if (Visited.insert(I).second)
936  Worklist.emplace_back(I);
937  return true; // Will need to look at parent nodes.
938  };
939 
940  if (!Enqueue(Or))
941  return false; // Not an `or` reduction pattern.
942 
943  while (!Worklist.empty()) {
944  auto *I = Worklist.pop_back_val();
945 
946  // Okay, which instruction is this node?
947  switch (I->getOpcode()) {
948  case Instruction::Or:
949  // Got an `or` node. That's fine, just recurse into it's operands.
950  for (Value *Op : I->operands())
951  if (!Enqueue(Op))
952  return false; // Not an `or` reduction pattern.
953  continue;
954 
955  case Instruction::Shl:
956  case Instruction::ZExt:
957  // `shl`/`zext` nodes are fine, just recurse into their base operand.
958  if (!Enqueue(I->getOperand(0)))
959  return false; // Not an `or` reduction pattern.
960  continue;
961 
962  case Instruction::Load:
963  // Perfect, `load` node means we've reached an edge of the graph.
964  continue;
965 
966  default: // Unknown node.
967  return false; // Not an `or` reduction pattern.
968  }
969  }
970 
971  return true;
972 }
973 
974 /// Return true if it may be profitable to convert this (X|Y) into (X+Y).
976  // Don't bother to convert this up unless either the LHS is an associable add
977  // or subtract or mul or if this is only used by one of the above.
978  // This is only a compile-time improvement, it is not needed for correctness!
979  auto isInteresting = [](Value *V) {
980  for (auto Op : {Instruction::Add, Instruction::Sub, Instruction::Mul,
981  Instruction::Shl})
982  if (isReassociableOp(V, Op))
983  return true;
984  return false;
985  };
986 
987  if (any_of(Or->operands(), isInteresting))
988  return true;
989 
990  Value *VB = Or->user_back();
991  if (Or->hasOneUse() && isInteresting(VB))
992  return true;
993 
994  return false;
995 }
996 
997 /// If we have (X|Y), and iff X and Y have no common bits set,
998 /// transform this into (X+Y) to allow arithmetics reassociation.
1000  // Convert an or into an add.
1001  BinaryOperator *New =
1002  CreateAdd(Or->getOperand(0), Or->getOperand(1), "", Or, Or);
1003  New->setHasNoSignedWrap();
1004  New->setHasNoUnsignedWrap();
1005  New->takeName(Or);
1006 
1007  // Everyone now refers to the add instruction.
1008  Or->replaceAllUsesWith(New);
1009  New->setDebugLoc(Or->getDebugLoc());
1010 
1011  LLVM_DEBUG(dbgs() << "Converted or into an add: " << *New << '\n');
1012  return New;
1013 }
1014 
1015 /// Return true if we should break up this subtract of X-Y into (X + -Y).
1017  // If this is a negation, we can't split it up!
1018  if (match(Sub, m_Neg(m_Value())) || match(Sub, m_FNeg(m_Value())))
1019  return false;
1020 
1021  // Don't breakup X - undef.
1022  if (isa<UndefValue>(Sub->getOperand(1)))
1023  return false;
1024 
1025  // Don't bother to break this up unless either the LHS is an associable add or
1026  // subtract or if this is only used by one.
1027  Value *V0 = Sub->getOperand(0);
1028  if (isReassociableOp(V0, Instruction::Add, Instruction::FAdd) ||
1029  isReassociableOp(V0, Instruction::Sub, Instruction::FSub))
1030  return true;
1031  Value *V1 = Sub->getOperand(1);
1032  if (isReassociableOp(V1, Instruction::Add, Instruction::FAdd) ||
1033  isReassociableOp(V1, Instruction::Sub, Instruction::FSub))
1034  return true;
1035  Value *VB = Sub->user_back();
1036  if (Sub->hasOneUse() &&
1037  (isReassociableOp(VB, Instruction::Add, Instruction::FAdd) ||
1038  isReassociableOp(VB, Instruction::Sub, Instruction::FSub)))
1039  return true;
1040 
1041  return false;
1042 }
1043 
1044 /// If we have (X-Y), and if either X is an add, or if this is only used by an
1045 /// add, transform this into (X+(0-Y)) to promote better reassociation.
1047  ReassociatePass::OrderedSet &ToRedo) {
1048  // Convert a subtract into an add and a neg instruction. This allows sub
1049  // instructions to be commuted with other add instructions.
1050  //
1051  // Calculate the negative value of Operand 1 of the sub instruction,
1052  // and set it as the RHS of the add instruction we just made.
1053  Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo);
1054  BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub, Sub);
1055  Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op.
1056  Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op.
1057  New->takeName(Sub);
1058 
1059  // Everyone now refers to the add instruction.
1060  Sub->replaceAllUsesWith(New);
1061  New->setDebugLoc(Sub->getDebugLoc());
1062 
1063  LLVM_DEBUG(dbgs() << "Negated: " << *New << '\n');
1064  return New;
1065 }
1066 
1067 /// If this is a shift of a reassociable multiply or is used by one, change
1068 /// this into a multiply by a constant to assist with further reassociation.
1070  Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
1071  auto *SA = cast<ConstantInt>(Shl->getOperand(1));
1072  MulCst = ConstantExpr::getShl(MulCst, SA);
1073 
1074  BinaryOperator *Mul =
1075  BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl);
1076  Shl->setOperand(0, UndefValue::get(Shl->getType())); // Drop use of op.
1077  Mul->takeName(Shl);
1078 
1079  // Everyone now refers to the mul instruction.
1080  Shl->replaceAllUsesWith(Mul);
1081  Mul->setDebugLoc(Shl->getDebugLoc());
1082 
1083  // We can safely preserve the nuw flag in all cases. It's also safe to turn a
1084  // nuw nsw shl into a nuw nsw mul. However, nsw in isolation requires special
1085  // handling. It can be preserved as long as we're not left shifting by
1086  // bitwidth - 1.
1087  bool NSW = cast<BinaryOperator>(Shl)->hasNoSignedWrap();
1088  bool NUW = cast<BinaryOperator>(Shl)->hasNoUnsignedWrap();
1089  unsigned BitWidth = Shl->getType()->getIntegerBitWidth();
1090  if (NSW && (NUW || SA->getValue().ult(BitWidth - 1)))
1091  Mul->setHasNoSignedWrap(true);
1092  Mul->setHasNoUnsignedWrap(NUW);
1093  return Mul;
1094 }
1095 
1096 /// Scan backwards and forwards among values with the same rank as element i
1097 /// to see if X exists. If X does not exist, return i. This is useful when
1098 /// scanning for 'x' when we see '-x' because they both get the same rank.
1100  unsigned i, Value *X) {
1101  unsigned XRank = Ops[i].Rank;
1102  unsigned e = Ops.size();
1103  for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j) {
1104  if (Ops[j].Op == X)
1105  return j;
1106  if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op))
1107  if (Instruction *I2 = dyn_cast<Instruction>(X))
1108  if (I1->isIdenticalTo(I2))
1109  return j;
1110  }
1111  // Scan backwards.
1112  for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j) {
1113  if (Ops[j].Op == X)
1114  return j;
1115  if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op))
1116  if (Instruction *I2 = dyn_cast<Instruction>(X))
1117  if (I1->isIdenticalTo(I2))
1118  return j;
1119  }
1120  return i;
1121 }
1122 
1123 /// Emit a tree of add instructions, summing Ops together
1124 /// and returning the result. Insert the tree before I.
1127  if (Ops.size() == 1) return Ops.back();
1128 
1129  Value *V1 = Ops.pop_back_val();
1130  Value *V2 = EmitAddTreeOfValues(I, Ops);
1131  return CreateAdd(V2, V1, "reass.add", I, I);
1132 }
1133 
1134 /// If V is an expression tree that is a multiplication sequence,
1135 /// and if this sequence contains a multiply by Factor,
1136 /// remove Factor from the tree and return the new tree.
1137 Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) {
1138  BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
1139  if (!BO)
1140  return nullptr;
1141 
1143  MadeChange |= LinearizeExprTree(BO, Tree);
1145  Factors.reserve(Tree.size());
1146  for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
1147  RepeatedValue E = Tree[i];
1148  Factors.append(E.second.getZExtValue(),
1149  ValueEntry(getRank(E.first), E.first));
1150  }
1151 
1152  bool FoundFactor = false;
1153  bool NeedsNegate = false;
1154  for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
1155  if (Factors[i].Op == Factor) {
1156  FoundFactor = true;
1157  Factors.erase(Factors.begin()+i);
1158  break;
1159  }
1160 
1161  // If this is a negative version of this factor, remove it.
1162  if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor)) {
1163  if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op))
1164  if (FC1->getValue() == -FC2->getValue()) {
1165  FoundFactor = NeedsNegate = true;
1166  Factors.erase(Factors.begin()+i);
1167  break;
1168  }
1169  } else if (ConstantFP *FC1 = dyn_cast<ConstantFP>(Factor)) {
1170  if (ConstantFP *FC2 = dyn_cast<ConstantFP>(Factors[i].Op)) {
1171  const APFloat &F1 = FC1->getValueAPF();
1172  APFloat F2(FC2->getValueAPF());
1173  F2.changeSign();
1174  if (F1 == F2) {
1175  FoundFactor = NeedsNegate = true;
1176  Factors.erase(Factors.begin() + i);
1177  break;
1178  }
1179  }
1180  }
1181  }
1182 
1183  if (!FoundFactor) {
1184  // Make sure to restore the operands to the expression tree.
1185  RewriteExprTree(BO, Factors);
1186  return nullptr;
1187  }
1188 
1189  BasicBlock::iterator InsertPt = ++BO->getIterator();
1190 
1191  // If this was just a single multiply, remove the multiply and return the only
1192  // remaining operand.
1193  if (Factors.size() == 1) {
1194  RedoInsts.insert(BO);
1195  V = Factors[0].Op;
1196  } else {
1197  RewriteExprTree(BO, Factors);
1198  V = BO;
1199  }
1200 
1201  if (NeedsNegate)
1202  V = CreateNeg(V, "neg", &*InsertPt, BO);
1203 
1204  return V;
1205 }
1206 
1207 /// If V is a single-use multiply, recursively add its operands as factors,
1208 /// otherwise add V to the list of factors.
1209 ///
1210 /// Ops is the top-level list of add operands we're trying to factor.
1212  SmallVectorImpl<Value*> &Factors) {
1213  BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
1214  if (!BO) {
1215  Factors.push_back(V);
1216  return;
1217  }
1218 
1219  // Otherwise, add the LHS and RHS to the list of factors.
1220  FindSingleUseMultiplyFactors(BO->getOperand(1), Factors);
1221  FindSingleUseMultiplyFactors(BO->getOperand(0), Factors);
1222 }
1223 
1224 /// Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
1225 /// This optimizes based on identities. If it can be reduced to a single Value,
1226 /// it is returned, otherwise the Ops list is mutated as necessary.
1227 static Value *OptimizeAndOrXor(unsigned Opcode,
1229  // Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
1230  // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1.
1231  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1232  // First, check for X and ~X in the operand list.
1233  assert(i < Ops.size());
1234  Value *X;
1235  if (match(Ops[i].Op, m_Not(m_Value(X)))) { // Cannot occur for ^.
1236  unsigned FoundX = FindInOperandList(Ops, i, X);
1237  if (FoundX != i) {
1238  if (Opcode == Instruction::And) // ...&X&~X = 0
1239  return Constant::getNullValue(X->getType());
1240 
1241  if (Opcode == Instruction::Or) // ...|X|~X = -1
1242  return Constant::getAllOnesValue(X->getType());
1243  }
1244  }
1245 
1246  // Next, check for duplicate pairs of values, which we assume are next to
1247  // each other, due to our sorting criteria.
1248  assert(i < Ops.size());
1249  if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) {
1250  if (Opcode == Instruction::And || Opcode == Instruction::Or) {
1251  // Drop duplicate values for And and Or.
1252  Ops.erase(Ops.begin()+i);
1253  --i; --e;
1254  ++NumAnnihil;
1255  continue;
1256  }
1257 
1258  // Drop pairs of values for Xor.
1259  assert(Opcode == Instruction::Xor);
1260  if (e == 2)
1261  return Constant::getNullValue(Ops[0].Op->getType());
1262 
1263  // Y ^ X^X -> Y
1264  Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
1265  i -= 1; e -= 2;
1266  ++NumAnnihil;
1267  }
1268  }
1269  return nullptr;
1270 }
1271 
1272 /// Helper function of CombineXorOpnd(). It creates a bitwise-and
1273 /// instruction with the given two operands, and return the resulting
1274 /// instruction. There are two special cases: 1) if the constant operand is 0,
1275 /// it will return NULL. 2) if the constant is ~0, the symbolic operand will
1276 /// be returned.
1277 static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
1278  const APInt &ConstOpnd) {
1279  if (ConstOpnd.isZero())
1280  return nullptr;
1281 
1282  if (ConstOpnd.isAllOnes())
1283  return Opnd;
1284 
1285  Instruction *I = BinaryOperator::CreateAnd(
1286  Opnd, ConstantInt::get(Opnd->getType(), ConstOpnd), "and.ra",
1287  InsertBefore);
1288  I->setDebugLoc(InsertBefore->getDebugLoc());
1289  return I;
1290 }
1291 
1292 // Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
1293 // into "R ^ C", where C would be 0, and R is a symbolic value.
1294 //
1295 // If it was successful, true is returned, and the "R" and "C" is returned
1296 // via "Res" and "ConstOpnd", respectively; otherwise, false is returned,
1297 // and both "Res" and "ConstOpnd" remain unchanged.
1298 bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
1299  APInt &ConstOpnd, Value *&Res) {
1300  // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2
1301  // = ((x | c1) ^ c1) ^ (c1 ^ c2)
1302  // = (x & ~c1) ^ (c1 ^ c2)
1303  // It is useful only when c1 == c2.
1304  if (!Opnd1->isOrExpr() || Opnd1->getConstPart().isZero())
1305  return false;
1306 
1307  if (!Opnd1->getValue()->hasOneUse())
1308  return false;
1309 
1310  const APInt &C1 = Opnd1->getConstPart();
1311  if (C1 != ConstOpnd)
1312  return false;
1313 
1314  Value *X = Opnd1->getSymbolicPart();
1315  Res = createAndInstr(I, X, ~C1);
1316  // ConstOpnd was C2, now C1 ^ C2.
1317  ConstOpnd ^= C1;
1318 
1319  if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
1320  RedoInsts.insert(T);
1321  return true;
1322 }
1323 
1324 // Helper function of OptimizeXor(). It tries to simplify
1325 // "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a
1326 // symbolic value.
1327 //
1328 // If it was successful, true is returned, and the "R" and "C" is returned
1329 // via "Res" and "ConstOpnd", respectively (If the entire expression is
1330 // evaluated to a constant, the Res is set to NULL); otherwise, false is
1331 // returned, and both "Res" and "ConstOpnd" remain unchanged.
1332 bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
1333  XorOpnd *Opnd2, APInt &ConstOpnd,
1334  Value *&Res) {
1335  Value *X = Opnd1->getSymbolicPart();
1336  if (X != Opnd2->getSymbolicPart())
1337  return false;
1338 
1339  // This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.)
1340  int DeadInstNum = 1;
1341  if (Opnd1->getValue()->hasOneUse())
1342  DeadInstNum++;
1343  if (Opnd2->getValue()->hasOneUse())
1344  DeadInstNum++;
1345 
1346  // Xor-Rule 2:
1347  // (x | c1) ^ (x & c2)
1348  // = (x|c1) ^ (x&c2) ^ (c1 ^ c1) = ((x|c1) ^ c1) ^ (x & c2) ^ c1
1349  // = (x & ~c1) ^ (x & c2) ^ c1 // Xor-Rule 1
1350  // = (x & c3) ^ c1, where c3 = ~c1 ^ c2 // Xor-rule 3
1351  //
1352  if (Opnd1->isOrExpr() != Opnd2->isOrExpr()) {
1353  if (Opnd2->isOrExpr())
1354  std::swap(Opnd1, Opnd2);
1355 
1356  const APInt &C1 = Opnd1->getConstPart();
1357  const APInt &C2 = Opnd2->getConstPart();
1358  APInt C3((~C1) ^ C2);
1359 
1360  // Do not increase code size!
1361  if (!C3.isZero() && !C3.isAllOnes()) {
1362  int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
1363  if (NewInstNum > DeadInstNum)
1364  return false;
1365  }
1366 
1367  Res = createAndInstr(I, X, C3);
1368  ConstOpnd ^= C1;
1369  } else if (Opnd1->isOrExpr()) {
1370  // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
1371  //
1372  const APInt &C1 = Opnd1->getConstPart();
1373  const APInt &C2 = Opnd2->getConstPart();
1374  APInt C3 = C1 ^ C2;
1375 
1376  // Do not increase code size
1377  if (!C3.isZero() && !C3.isAllOnes()) {
1378  int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
1379  if (NewInstNum > DeadInstNum)
1380  return false;
1381  }
1382 
1383  Res = createAndInstr(I, X, C3);
1384  ConstOpnd ^= C3;
1385  } else {
1386  // Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
1387  //
1388  const APInt &C1 = Opnd1->getConstPart();
1389  const APInt &C2 = Opnd2->getConstPart();
1390  APInt C3 = C1 ^ C2;
1391  Res = createAndInstr(I, X, C3);
1392  }
1393 
1394  // Put the original operands in the Redo list; hope they will be deleted
1395  // as dead code.
1396  if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
1397  RedoInsts.insert(T);
1398  if (Instruction *T = dyn_cast<Instruction>(Opnd2->getValue()))
1399  RedoInsts.insert(T);
1400 
1401  return true;
1402 }
1403 
1404 /// Optimize a series of operands to an 'xor' instruction. If it can be reduced
1405 /// to a single Value, it is returned, otherwise the Ops list is mutated as
1406 /// necessary.
1407 Value *ReassociatePass::OptimizeXor(Instruction *I,
1409  if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops))
1410  return V;
1411 
1412  if (Ops.size() == 1)
1413  return nullptr;
1414 
1416  SmallVector<XorOpnd*, 8> OpndPtrs;
1417  Type *Ty = Ops[0].Op->getType();
1418  APInt ConstOpnd(Ty->getScalarSizeInBits(), 0);
1419 
1420  // Step 1: Convert ValueEntry to XorOpnd
1421  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1422  Value *V = Ops[i].Op;
1423  const APInt *C;
1424  // TODO: Support non-splat vectors.
1425  if (match(V, m_APInt(C))) {
1426  ConstOpnd ^= *C;
1427  } else {
1428  XorOpnd O(V);
1429  O.setSymbolicRank(getRank(O.getSymbolicPart()));
1430  Opnds.push_back(O);
1431  }
1432  }
1433 
1434  // NOTE: From this point on, do *NOT* add/delete element to/from "Opnds".
1435  // It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate
1436  // the "OpndPtrs" as well. For the similar reason, do not fuse this loop
1437  // with the previous loop --- the iterator of the "Opnds" may be invalidated
1438  // when new elements are added to the vector.
1439  for (unsigned i = 0, e = Opnds.size(); i != e; ++i)
1440  OpndPtrs.push_back(&Opnds[i]);
1441 
1442  // Step 2: Sort the Xor-Operands in a way such that the operands containing
1443  // the same symbolic value cluster together. For instance, the input operand
1444  // sequence ("x | 123", "y & 456", "x & 789") will be sorted into:
1445  // ("x | 123", "x & 789", "y & 456").
1446  //
1447  // The purpose is twofold:
1448  // 1) Cluster together the operands sharing the same symbolic-value.
1449  // 2) Operand having smaller symbolic-value-rank is permuted earlier, which
1450  // could potentially shorten crital path, and expose more loop-invariants.
1451  // Note that values' rank are basically defined in RPO order (FIXME).
1452  // So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
1453  // than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
1454  // "z" in the order of X-Y-Z is better than any other orders.
1455  llvm::stable_sort(OpndPtrs, [](XorOpnd *LHS, XorOpnd *RHS) {
1456  return LHS->getSymbolicRank() < RHS->getSymbolicRank();
1457  });
1458 
1459  // Step 3: Combine adjacent operands
1460  XorOpnd *PrevOpnd = nullptr;
1461  bool Changed = false;
1462  for (unsigned i = 0, e = Opnds.size(); i < e; i++) {
1463  XorOpnd *CurrOpnd = OpndPtrs[i];
1464  // The combined value
1465  Value *CV;
1466 
1467  // Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
1468  if (!ConstOpnd.isZero() && CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) {
1469  Changed = true;
1470  if (CV)
1471  *CurrOpnd = XorOpnd(CV);
1472  else {
1473  CurrOpnd->Invalidate();
1474  continue;
1475  }
1476  }
1477 
1478  if (!PrevOpnd || CurrOpnd->getSymbolicPart() != PrevOpnd->getSymbolicPart()) {
1479  PrevOpnd = CurrOpnd;
1480  continue;
1481  }
1482 
1483  // step 3.2: When previous and current operands share the same symbolic
1484  // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"
1485  if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) {
1486  // Remove previous operand
1487  PrevOpnd->Invalidate();
1488  if (CV) {
1489  *CurrOpnd = XorOpnd(CV);
1490  PrevOpnd = CurrOpnd;
1491  } else {
1492  CurrOpnd->Invalidate();
1493  PrevOpnd = nullptr;
1494  }
1495  Changed = true;
1496  }
1497  }
1498 
1499  // Step 4: Reassemble the Ops
1500  if (Changed) {
1501  Ops.clear();
1502  for (unsigned int i = 0, e = Opnds.size(); i < e; i++) {
1503  XorOpnd &O = Opnds[i];
1504  if (O.isInvalid())
1505  continue;
1506  ValueEntry VE(getRank(O.getValue()), O.getValue());
1507  Ops.push_back(VE);
1508  }
1509  if (!ConstOpnd.isZero()) {
1510  Value *C = ConstantInt::get(Ty, ConstOpnd);
1511  ValueEntry VE(getRank(C), C);
1512  Ops.push_back(VE);
1513  }
1514  unsigned Sz = Ops.size();
1515  if (Sz == 1)
1516  return Ops.back().Op;
1517  if (Sz == 0) {
1518  assert(ConstOpnd.isZero());
1519  return ConstantInt::get(Ty, ConstOpnd);
1520  }
1521  }
1522 
1523  return nullptr;
1524 }
1525 
1526 /// Optimize a series of operands to an 'add' instruction. This
1527 /// optimizes based on identities. If it can be reduced to a single Value, it
1528 /// is returned, otherwise the Ops list is mutated as necessary.
1529 Value *ReassociatePass::OptimizeAdd(Instruction *I,
1531  // Scan the operand lists looking for X and -X pairs. If we find any, we
1532  // can simplify expressions like X+-X == 0 and X+~X ==-1. While we're at it,
1533  // scan for any
1534  // duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z.
1535 
1536  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1537  Value *TheOp = Ops[i].Op;
1538  // Check to see if we've seen this operand before. If so, we factor all
1539  // instances of the operand together. Due to our sorting criteria, we know
1540  // that these need to be next to each other in the vector.
1541  if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) {
1542  // Rescan the list, remove all instances of this operand from the expr.
1543  unsigned NumFound = 0;
1544  do {
1545  Ops.erase(Ops.begin()+i);
1546  ++NumFound;
1547  } while (i != Ops.size() && Ops[i].Op == TheOp);
1548 
1549  LLVM_DEBUG(dbgs() << "\nFACTORING [" << NumFound << "]: " << *TheOp
1550  << '\n');
1551  ++NumFactor;
1552 
1553  // Insert a new multiply.
1554  Type *Ty = TheOp->getType();
1555  Constant *C = Ty->isIntOrIntVectorTy() ?
1556  ConstantInt::get(Ty, NumFound) : ConstantFP::get(Ty, NumFound);
1557  Instruction *Mul = CreateMul(TheOp, C, "factor", I, I);
1558 
1559  // Now that we have inserted a multiply, optimize it. This allows us to
1560  // handle cases that require multiple factoring steps, such as this:
1561  // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
1562  RedoInsts.insert(Mul);
1563 
1564  // If every add operand was a duplicate, return the multiply.
1565  if (Ops.empty())
1566  return Mul;
1567 
1568  // Otherwise, we had some input that didn't have the dupe, such as
1569  // "A + A + B" -> "A*2 + B". Add the new multiply to the list of
1570  // things being added by this operation.
1571  Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul));
1572 
1573  --i;
1574  e = Ops.size();
1575  continue;
1576  }
1577 
1578  // Check for X and -X or X and ~X in the operand list.
1579  Value *X;
1580  if (!match(TheOp, m_Neg(m_Value(X))) && !match(TheOp, m_Not(m_Value(X))) &&
1581  !match(TheOp, m_FNeg(m_Value(X))))
1582  continue;
1583 
1584  unsigned FoundX = FindInOperandList(Ops, i, X);
1585  if (FoundX == i)
1586  continue;
1587 
1588  // Remove X and -X from the operand list.
1589  if (Ops.size() == 2 &&
1590  (match(TheOp, m_Neg(m_Value())) || match(TheOp, m_FNeg(m_Value()))))
1591  return Constant::getNullValue(X->getType());
1592 
1593  // Remove X and ~X from the operand list.
1594  if (Ops.size() == 2 && match(TheOp, m_Not(m_Value())))
1595  return Constant::getAllOnesValue(X->getType());
1596 
1597  Ops.erase(Ops.begin()+i);
1598  if (i < FoundX)
1599  --FoundX;
1600  else
1601  --i; // Need to back up an extra one.
1602  Ops.erase(Ops.begin()+FoundX);
1603  ++NumAnnihil;
1604  --i; // Revisit element.
1605  e -= 2; // Removed two elements.
1606 
1607  // if X and ~X we append -1 to the operand list.
1608  if (match(TheOp, m_Not(m_Value()))) {
1609  Value *V = Constant::getAllOnesValue(X->getType());
1610  Ops.insert(Ops.end(), ValueEntry(getRank(V), V));
1611  e += 1;
1612  }
1613  }
1614 
1615  // Scan the operand list, checking to see if there are any common factors
1616  // between operands. Consider something like A*A+A*B*C+D. We would like to
1617  // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies.
1618  // To efficiently find this, we count the number of times a factor occurs
1619  // for any ADD operands that are MULs.
1620  DenseMap<Value*, unsigned> FactorOccurrences;
1621 
1622  // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4)
1623  // where they are actually the same multiply.
1624  unsigned MaxOcc = 0;
1625  Value *MaxOccVal = nullptr;
1626  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1627  BinaryOperator *BOp =
1628  isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
1629  if (!BOp)
1630  continue;
1631 
1632  // Compute all of the factors of this added value.
1633  SmallVector<Value*, 8> Factors;
1634  FindSingleUseMultiplyFactors(BOp, Factors);
1635  assert(Factors.size() > 1 && "Bad linearize!");
1636 
1637  // Add one to FactorOccurrences for each unique factor in this op.
1638  SmallPtrSet<Value*, 8> Duplicates;
1639  for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
1640  Value *Factor = Factors[i];
1641  if (!Duplicates.insert(Factor).second)
1642  continue;
1643 
1644  unsigned Occ = ++FactorOccurrences[Factor];
1645  if (Occ > MaxOcc) {
1646  MaxOcc = Occ;
1647  MaxOccVal = Factor;
1648  }
1649 
1650  // If Factor is a negative constant, add the negated value as a factor
1651  // because we can percolate the negate out. Watch for minint, which
1652  // cannot be positivified.
1653  if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) {
1654  if (CI->isNegative() && !CI->isMinValue(true)) {
1655  Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
1656  if (!Duplicates.insert(Factor).second)
1657  continue;
1658  unsigned Occ = ++FactorOccurrences[Factor];
1659  if (Occ > MaxOcc) {
1660  MaxOcc = Occ;
1661  MaxOccVal = Factor;
1662  }
1663  }
1664  } else if (ConstantFP *CF = dyn_cast<ConstantFP>(Factor)) {
1665  if (CF->isNegative()) {
1666  APFloat F(CF->getValueAPF());
1667  F.changeSign();
1668  Factor = ConstantFP::get(CF->getContext(), F);
1669  if (!Duplicates.insert(Factor).second)
1670  continue;
1671  unsigned Occ = ++FactorOccurrences[Factor];
1672  if (Occ > MaxOcc) {
1673  MaxOcc = Occ;
1674  MaxOccVal = Factor;
1675  }
1676  }
1677  }
1678  }
1679  }
1680 
1681  // If any factor occurred more than one time, we can pull it out.
1682  if (MaxOcc > 1) {
1683  LLVM_DEBUG(dbgs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal
1684  << '\n');
1685  ++NumFactor;
1686 
1687  // Create a new instruction that uses the MaxOccVal twice. If we don't do
1688  // this, we could otherwise run into situations where removing a factor
1689  // from an expression will drop a use of maxocc, and this can cause
1690  // RemoveFactorFromExpression on successive values to behave differently.
1691  Instruction *DummyInst =
1692  I->getType()->isIntOrIntVectorTy()
1693  ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal)
1694  : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal);
1695 
1697  for (unsigned i = 0; i != Ops.size(); ++i) {
1698  // Only try to remove factors from expressions we're allowed to.
1699  BinaryOperator *BOp =
1700  isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
1701  if (!BOp)
1702  continue;
1703 
1704  if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
1705  // The factorized operand may occur several times. Convert them all in
1706  // one fell swoop.
1707  for (unsigned j = Ops.size(); j != i;) {
1708  --j;
1709  if (Ops[j].Op == Ops[i].Op) {
1710  NewMulOps.push_back(V);
1711  Ops.erase(Ops.begin()+j);
1712  }
1713  }
1714  --i;
1715  }
1716  }
1717 
1718  // No need for extra uses anymore.
1719  DummyInst->deleteValue();
1720 
1721  unsigned NumAddedValues = NewMulOps.size();
1722  Value *V = EmitAddTreeOfValues(I, NewMulOps);
1723 
1724  // Now that we have inserted the add tree, optimize it. This allows us to
1725  // handle cases that require multiple factoring steps, such as this:
1726  // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C))
1727  assert(NumAddedValues > 1 && "Each occurrence should contribute a value");
1728  (void)NumAddedValues;
1729  if (Instruction *VI = dyn_cast<Instruction>(V))
1730  RedoInsts.insert(VI);
1731 
1732  // Create the multiply.
1733  Instruction *V2 = CreateMul(V, MaxOccVal, "reass.mul", I, I);
1734 
1735  // Rerun associate on the multiply in case the inner expression turned into
1736  // a multiply. We want to make sure that we keep things in canonical form.
1737  RedoInsts.insert(V2);
1738 
1739  // If every add operand included the factor (e.g. "A*B + A*C"), then the
1740  // entire result expression is just the multiply "A*(B+C)".
1741  if (Ops.empty())
1742  return V2;
1743 
1744  // Otherwise, we had some input that didn't have the factor, such as
1745  // "A*B + A*C + D" -> "A*(B+C) + D". Add the new multiply to the list of
1746  // things being added by this operation.
1747  Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
1748  }
1749 
1750  return nullptr;
1751 }
1752 
1753 /// Build up a vector of value/power pairs factoring a product.
1754 ///
1755 /// Given a series of multiplication operands, build a vector of factors and
1756 /// the powers each is raised to when forming the final product. Sort them in
1757 /// the order of descending power.
1758 ///
1759 /// (x*x) -> [(x, 2)]
1760 /// ((x*x)*x) -> [(x, 3)]
1761 /// ((((x*y)*x)*y)*x) -> [(x, 3), (y, 2)]
1762 ///
1763 /// \returns Whether any factors have a power greater than one.
1765  SmallVectorImpl<Factor> &Factors) {
1766  // FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this.
1767  // Compute the sum of powers of simplifiable factors.
1768  unsigned FactorPowerSum = 0;
1769  for (unsigned Idx = 1, Size = Ops.size(); Idx < Size; ++Idx) {
1770  Value *Op = Ops[Idx-1].Op;
1771 
1772  // Count the number of occurrences of this value.
1773  unsigned Count = 1;
1774  for (; Idx < Size && Ops[Idx].Op == Op; ++Idx)
1775  ++Count;
1776  // Track for simplification all factors which occur 2 or more times.
1777  if (Count > 1)
1778  FactorPowerSum += Count;
1779  }
1780 
1781  // We can only simplify factors if the sum of the powers of our simplifiable
1782  // factors is 4 or higher. When that is the case, we will *always* have
1783  // a simplification. This is an important invariant to prevent cyclicly
1784  // trying to simplify already minimal formations.
1785  if (FactorPowerSum < 4)
1786  return false;
1787 
1788  // Now gather the simplifiable factors, removing them from Ops.
1789  FactorPowerSum = 0;
1790  for (unsigned Idx = 1; Idx < Ops.size(); ++Idx) {
1791  Value *Op = Ops[Idx-1].Op;
1792 
1793  // Count the number of occurrences of this value.
1794  unsigned Count = 1;
1795  for (; Idx < Ops.size() && Ops[Idx].Op == Op; ++Idx)
1796  ++Count;
1797  if (Count == 1)
1798  continue;
1799  // Move an even number of occurrences to Factors.
1800  Count &= ~1U;
1801  Idx -= Count;
1802  FactorPowerSum += Count;
1803  Factors.push_back(Factor(Op, Count));
1804  Ops.erase(Ops.begin()+Idx, Ops.begin()+Idx+Count);
1805  }
1806 
1807  // None of the adjustments above should have reduced the sum of factor powers
1808  // below our mininum of '4'.
1809  assert(FactorPowerSum >= 4);
1810 
1811  llvm::stable_sort(Factors, [](const Factor &LHS, const Factor &RHS) {
1812  return LHS.Power > RHS.Power;
1813  });
1814  return true;
1815 }
1816 
1817 /// Build a tree of multiplies, computing the product of Ops.
1819  SmallVectorImpl<Value*> &Ops) {
1820  if (Ops.size() == 1)
1821  return Ops.back();
1822 
1823  Value *LHS = Ops.pop_back_val();
1824  do {
1825  if (LHS->getType()->isIntOrIntVectorTy())
1826  LHS = Builder.CreateMul(LHS, Ops.pop_back_val());
1827  else
1828  LHS = Builder.CreateFMul(LHS, Ops.pop_back_val());
1829  } while (!Ops.empty());
1830 
1831  return LHS;
1832 }
1833 
1834 /// Build a minimal multiplication DAG for (a^x)*(b^y)*(c^z)*...
1835 ///
1836 /// Given a vector of values raised to various powers, where no two values are
1837 /// equal and the powers are sorted in decreasing order, compute the minimal
1838 /// DAG of multiplies to compute the final product, and return that product
1839 /// value.
1840 Value *
1841 ReassociatePass::buildMinimalMultiplyDAG(IRBuilderBase &Builder,
1842  SmallVectorImpl<Factor> &Factors) {
1843  assert(Factors[0].Power);
1844  SmallVector<Value *, 4> OuterProduct;
1845  for (unsigned LastIdx = 0, Idx = 1, Size = Factors.size();
1846  Idx < Size && Factors[Idx].Power > 0; ++Idx) {
1847  if (Factors[Idx].Power != Factors[LastIdx].Power) {
1848  LastIdx = Idx;
1849  continue;
1850  }
1851 
1852  // We want to multiply across all the factors with the same power so that
1853  // we can raise them to that power as a single entity. Build a mini tree
1854  // for that.
1855  SmallVector<Value *, 4> InnerProduct;
1856  InnerProduct.push_back(Factors[LastIdx].Base);
1857  do {
1858  InnerProduct.push_back(Factors[Idx].Base);
1859  ++Idx;
1860  } while (Idx < Size && Factors[Idx].Power == Factors[LastIdx].Power);
1861 
1862  // Reset the base value of the first factor to the new expression tree.
1863  // We'll remove all the factors with the same power in a second pass.
1864  Value *M = Factors[LastIdx].Base = buildMultiplyTree(Builder, InnerProduct);
1865  if (Instruction *MI = dyn_cast<Instruction>(M))
1866  RedoInsts.insert(MI);
1867 
1868  LastIdx = Idx;
1869  }
1870  // Unique factors with equal powers -- we've folded them into the first one's
1871  // base.
1872  Factors.erase(std::unique(Factors.begin(), Factors.end(),
1873  [](const Factor &LHS, const Factor &RHS) {
1874  return LHS.Power == RHS.Power;
1875  }),
1876  Factors.end());
1877 
1878  // Iteratively collect the base of each factor with an add power into the
1879  // outer product, and halve each power in preparation for squaring the
1880  // expression.
1881  for (unsigned Idx = 0, Size = Factors.size(); Idx != Size; ++Idx) {
1882  if (Factors[Idx].Power & 1)
1883  OuterProduct.push_back(Factors[Idx].Base);
1884  Factors[Idx].Power >>= 1;
1885  }
1886  if (Factors[0].Power) {
1887  Value *SquareRoot = buildMinimalMultiplyDAG(Builder, Factors);
1888  OuterProduct.push_back(SquareRoot);
1889  OuterProduct.push_back(SquareRoot);
1890  }
1891  if (OuterProduct.size() == 1)
1892  return OuterProduct.front();
1893 
1894  Value *V = buildMultiplyTree(Builder, OuterProduct);
1895  return V;
1896 }
1897 
1898 Value *ReassociatePass::OptimizeMul(BinaryOperator *I,
1900  // We can only optimize the multiplies when there is a chain of more than
1901  // three, such that a balanced tree might require fewer total multiplies.
1902  if (Ops.size() < 4)
1903  return nullptr;
1904 
1905  // Try to turn linear trees of multiplies without other uses of the
1906  // intermediate stages into minimal multiply DAGs with perfect sub-expression
1907  // re-use.
1908  SmallVector<Factor, 4> Factors;
1909  if (!collectMultiplyFactors(Ops, Factors))
1910  return nullptr; // All distinct factors, so nothing left for us to do.
1911 
1913  // The reassociate transformation for FP operations is performed only
1914  // if unsafe algebra is permitted by FastMathFlags. Propagate those flags
1915  // to the newly generated operations.
1916  if (auto FPI = dyn_cast<FPMathOperator>(I))
1917  Builder.setFastMathFlags(FPI->getFastMathFlags());
1918 
1919  Value *V = buildMinimalMultiplyDAG(Builder, Factors);
1920  if (Ops.empty())
1921  return V;
1922 
1923  ValueEntry NewEntry = ValueEntry(getRank(V), V);
1924  Ops.insert(llvm::lower_bound(Ops, NewEntry), NewEntry);
1925  return nullptr;
1926 }
1927 
1928 Value *ReassociatePass::OptimizeExpression(BinaryOperator *I,
1930  // Now that we have the linearized expression tree, try to optimize it.
1931  // Start by folding any constants that we found.
1932  Constant *Cst = nullptr;
1933  unsigned Opcode = I->getOpcode();
1934  while (!Ops.empty() && isa<Constant>(Ops.back().Op)) {
1935  Constant *C = cast<Constant>(Ops.pop_back_val().Op);
1936  Cst = Cst ? ConstantExpr::get(Opcode, C, Cst) : C;
1937  }
1938  // If there was nothing but constants then we are done.
1939  if (Ops.empty())
1940  return Cst;
1941 
1942  // Put the combined constant back at the end of the operand list, except if
1943  // there is no point. For example, an add of 0 gets dropped here, while a
1944  // multiplication by zero turns the whole expression into zero.
1945  if (Cst && Cst != ConstantExpr::getBinOpIdentity(Opcode, I->getType())) {
1946  if (Cst == ConstantExpr::getBinOpAbsorber(Opcode, I->getType()))
1947  return Cst;
1948  Ops.push_back(ValueEntry(0, Cst));
1949  }
1950 
1951  if (Ops.size() == 1) return Ops[0].Op;
1952 
1953  // Handle destructive annihilation due to identities between elements in the
1954  // argument list here.
1955  unsigned NumOps = Ops.size();
1956  switch (Opcode) {
1957  default: break;
1958  case Instruction::And:
1959  case Instruction::Or:
1960  if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
1961  return Result;
1962  break;
1963 
1964  case Instruction::Xor:
1965  if (Value *Result = OptimizeXor(I, Ops))
1966  return Result;
1967  break;
1968 
1969  case Instruction::Add:
1970  case Instruction::FAdd:
1971  if (Value *Result = OptimizeAdd(I, Ops))
1972  return Result;
1973  break;
1974 
1975  case Instruction::Mul:
1976  case Instruction::FMul:
1977  if (Value *Result = OptimizeMul(I, Ops))
1978  return Result;
1979  break;
1980  }
1981 
1982  if (Ops.size() != NumOps)
1983  return OptimizeExpression(I, Ops);
1984  return nullptr;
1985 }
1986 
1987 // Remove dead instructions and if any operands are trivially dead add them to
1988 // Insts so they will be removed as well.
1989 void ReassociatePass::RecursivelyEraseDeadInsts(Instruction *I,
1990  OrderedSet &Insts) {
1991  assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
1992  SmallVector<Value *, 4> Ops(I->operands());
1993  ValueRankMap.erase(I);
1994  Insts.remove(I);
1995  RedoInsts.remove(I);
1997  I->eraseFromParent();
1998  for (auto Op : Ops)
1999  if (Instruction *OpInst = dyn_cast<Instruction>(Op))
2000  if (OpInst->use_empty())
2001  Insts.insert(OpInst);
2002 }
2003 
2004 /// Zap the given instruction, adding interesting operands to the work list.
2005 void ReassociatePass::EraseInst(Instruction *I) {
2006  assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
2007  LLVM_DEBUG(dbgs() << "Erasing dead inst: "; I->dump());
2008 
2009  SmallVector<Value *, 8> Ops(I->operands());
2010  // Erase the dead instruction.
2011  ValueRankMap.erase(I);
2012  RedoInsts.remove(I);
2014  I->eraseFromParent();
2015  // Optimize its operands.
2016  SmallPtrSet<Instruction *, 8> Visited; // Detect self-referential nodes.
2017  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2018  if (Instruction *Op = dyn_cast<Instruction>(Ops[i])) {
2019  // If this is a node in an expression tree, climb to the expression root
2020  // and add that since that's where optimization actually happens.
2021  unsigned Opcode = Op->getOpcode();
2022  while (Op->hasOneUse() && Op->user_back()->getOpcode() == Opcode &&
2023  Visited.insert(Op).second)
2024  Op = Op->user_back();
2025 
2026  // The instruction we're going to push may be coming from a
2027  // dead block, and Reassociate skips the processing of unreachable
2028  // blocks because it's a waste of time and also because it can
2029  // lead to infinite loop due to LLVM's non-standard definition
2030  // of dominance.
2031  if (ValueRankMap.find(Op) != ValueRankMap.end())
2032  RedoInsts.insert(Op);
2033  }
2034 
2035  MadeChange = true;
2036 }
2037 
2038 /// Recursively analyze an expression to build a list of instructions that have
2039 /// negative floating-point constant operands. The caller can then transform
2040 /// the list to create positive constants for better reassociation and CSE.
2041 static void getNegatibleInsts(Value *V,
2042  SmallVectorImpl<Instruction *> &Candidates) {
2043  // Handle only one-use instructions. Combining negations does not justify
2044  // replicating instructions.
2045  Instruction *I;
2046  if (!match(V, m_OneUse(m_Instruction(I))))
2047  return;
2048 
2049  // Handle expressions of multiplications and divisions.
2050  // TODO: This could look through floating-point casts.
2051  const APFloat *C;
2052  switch (I->getOpcode()) {
2053  case Instruction::FMul:
2054  // Not expecting non-canonical code here. Bail out and wait.
2055  if (match(I->getOperand(0), m_Constant()))
2056  break;
2057 
2058  if (match(I->getOperand(1), m_APFloat(C)) && C->isNegative()) {
2059  Candidates.push_back(I);
2060  LLVM_DEBUG(dbgs() << "FMul with negative constant: " << *I << '\n');
2061  }
2062  getNegatibleInsts(I->getOperand(0), Candidates);
2063  getNegatibleInsts(I->getOperand(1), Candidates);
2064  break;
2065  case Instruction::FDiv:
2066  // Not expecting non-canonical code here. Bail out and wait.
2067  if (match(I->getOperand(0), m_Constant()) &&
2068  match(I->getOperand(1), m_Constant()))
2069  break;
2070 
2071  if ((match(I->getOperand(0), m_APFloat(C)) && C->isNegative()) ||
2072  (match(I->getOperand(1), m_APFloat(C)) && C->isNegative())) {
2073  Candidates.push_back(I);
2074  LLVM_DEBUG(dbgs() << "FDiv with negative constant: " << *I << '\n');
2075  }
2076  getNegatibleInsts(I->getOperand(0), Candidates);
2077  getNegatibleInsts(I->getOperand(1), Candidates);
2078  break;
2079  default:
2080  break;
2081  }
2082 }
2083 
2084 /// Given an fadd/fsub with an operand that is a one-use instruction
2085 /// (the fadd/fsub), try to change negative floating-point constants into
2086 /// positive constants to increase potential for reassociation and CSE.
2087 Instruction *ReassociatePass::canonicalizeNegFPConstantsForOp(Instruction *I,
2088  Instruction *Op,
2089  Value *OtherOp) {
2090  assert((I->getOpcode() == Instruction::FAdd ||
2091  I->getOpcode() == Instruction::FSub) && "Expected fadd/fsub");
2092 
2093  // Collect instructions with negative FP constants from the subtree that ends
2094  // in Op.
2095  SmallVector<Instruction *, 4> Candidates;
2096  getNegatibleInsts(Op, Candidates);
2097  if (Candidates.empty())
2098  return nullptr;
2099 
2100  // Don't canonicalize x + (-Constant * y) -> x - (Constant * y), if the
2101  // resulting subtract will be broken up later. This can get us into an
2102  // infinite loop during reassociation.
2103  bool IsFSub = I->getOpcode() == Instruction::FSub;
2104  bool NeedsSubtract = !IsFSub && Candidates.size() % 2 == 1;
2105  if (NeedsSubtract && ShouldBreakUpSubtract(I))
2106  return nullptr;
2107 
2108  for (Instruction *Negatible : Candidates) {
2109  const APFloat *C;
2110  if (match(Negatible->getOperand(0), m_APFloat(C))) {
2111  assert(!match(Negatible->getOperand(1), m_Constant()) &&
2112  "Expecting only 1 constant operand");
2113  assert(C->isNegative() && "Expected negative FP constant");
2114  Negatible->setOperand(0, ConstantFP::get(Negatible->getType(), abs(*C)));
2115  MadeChange = true;
2116  }
2117  if (match(Negatible->getOperand(1), m_APFloat(C))) {
2118  assert(!match(Negatible->getOperand(0), m_Constant()) &&
2119  "Expecting only 1 constant operand");
2120  assert(C->isNegative() && "Expected negative FP constant");
2121  Negatible->setOperand(1, ConstantFP::get(Negatible->getType(), abs(*C)));
2122  MadeChange = true;
2123  }
2124  }
2125  assert(MadeChange == true && "Negative constant candidate was not changed");
2126 
2127  // Negations cancelled out.
2128  if (Candidates.size() % 2 == 0)
2129  return I;
2130 
2131  // Negate the final operand in the expression by flipping the opcode of this
2132  // fadd/fsub.
2133  assert(Candidates.size() % 2 == 1 && "Expected odd number");
2135  Value *NewInst = IsFSub ? Builder.CreateFAddFMF(OtherOp, Op, I)
2136  : Builder.CreateFSubFMF(OtherOp, Op, I);
2137  I->replaceAllUsesWith(NewInst);
2138  RedoInsts.insert(I);
2139  return dyn_cast<Instruction>(NewInst);
2140 }
2141 
2142 /// Canonicalize expressions that contain a negative floating-point constant
2143 /// of the following form:
2144 /// OtherOp + (subtree) -> OtherOp {+/-} (canonical subtree)
2145 /// (subtree) + OtherOp -> OtherOp {+/-} (canonical subtree)
2146 /// OtherOp - (subtree) -> OtherOp {+/-} (canonical subtree)
2147 ///
2148 /// The fadd/fsub opcode may be switched to allow folding a negation into the
2149 /// input instruction.
2150 Instruction *ReassociatePass::canonicalizeNegFPConstants(Instruction *I) {
2151  LLVM_DEBUG(dbgs() << "Combine negations for: " << *I << '\n');
2152  Value *X;
2153  Instruction *Op;
2155  if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2156  I = R;
2158  if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2159  I = R;
2161  if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2162  I = R;
2163  return I;
2164 }
2165 
2166 /// Inspect and optimize the given instruction. Note that erasing
2167 /// instructions is not allowed.
2168 void ReassociatePass::OptimizeInst(Instruction *I) {
2169  // Only consider operations that we understand.
2170  if (!isa<UnaryOperator>(I) && !isa<BinaryOperator>(I))
2171  return;
2172 
2173  if (I->getOpcode() == Instruction::Shl && isa<ConstantInt>(I->getOperand(1)))
2174  // If an operand of this shift is a reassociable multiply, or if the shift
2175  // is used by a reassociable multiply or add, turn into a multiply.
2176  if (isReassociableOp(I->getOperand(0), Instruction::Mul) ||
2177  (I->hasOneUse() &&
2178  (isReassociableOp(I->user_back(), Instruction::Mul) ||
2179  isReassociableOp(I->user_back(), Instruction::Add)))) {
2181  RedoInsts.insert(I);
2182  MadeChange = true;
2183  I = NI;
2184  }
2185 
2186  // Commute binary operators, to canonicalize the order of their operands.
2187  // This can potentially expose more CSE opportunities, and makes writing other
2188  // transformations simpler.
2189  if (I->isCommutative())
2190  canonicalizeOperands(I);
2191 
2192  // Canonicalize negative constants out of expressions.
2193  if (Instruction *Res = canonicalizeNegFPConstants(I))
2194  I = Res;
2195 
2196  // Don't optimize floating-point instructions unless they are 'fast'.
2197  if (I->getType()->isFPOrFPVectorTy() && !I->isFast())
2198  return;
2199 
2200  // Do not reassociate boolean (i1) expressions. We want to preserve the
2201  // original order of evaluation for short-circuited comparisons that
2202  // SimplifyCFG has folded to AND/OR expressions. If the expression
2203  // is not further optimized, it is likely to be transformed back to a
2204  // short-circuited form for code gen, and the source order may have been
2205  // optimized for the most likely conditions.
2206  if (I->getType()->isIntegerTy(1))
2207  return;
2208 
2209  // If this is a bitwise or instruction of operands
2210  // with no common bits set, convert it to X+Y.
2211  if (I->getOpcode() == Instruction::Or &&
2213  haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1),
2214  I->getModule()->getDataLayout(), /*AC=*/nullptr, I,
2215  /*DT=*/nullptr)) {
2217  RedoInsts.insert(I);
2218  MadeChange = true;
2219  I = NI;
2220  }
2221 
2222  // If this is a subtract instruction which is not already in negate form,
2223  // see if we can convert it to X+-Y.
2224  if (I->getOpcode() == Instruction::Sub) {
2225  if (ShouldBreakUpSubtract(I)) {
2226  Instruction *NI = BreakUpSubtract(I, RedoInsts);
2227  RedoInsts.insert(I);
2228  MadeChange = true;
2229  I = NI;
2230  } else if (match(I, m_Neg(m_Value()))) {
2231  // Otherwise, this is a negation. See if the operand is a multiply tree
2232  // and if this is not an inner node of a multiply tree.
2233  if (isReassociableOp(I->getOperand(1), Instruction::Mul) &&
2234  (!I->hasOneUse() ||
2235  !isReassociableOp(I->user_back(), Instruction::Mul))) {
2237  // If the negate was simplified, revisit the users to see if we can
2238  // reassociate further.
2239  for (User *U : NI->users()) {
2240  if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
2241  RedoInsts.insert(Tmp);
2242  }
2243  RedoInsts.insert(I);
2244  MadeChange = true;
2245  I = NI;
2246  }
2247  }
2248  } else if (I->getOpcode() == Instruction::FNeg ||
2249  I->getOpcode() == Instruction::FSub) {
2250  if (ShouldBreakUpSubtract(I)) {
2251  Instruction *NI = BreakUpSubtract(I, RedoInsts);
2252  RedoInsts.insert(I);
2253  MadeChange = true;
2254  I = NI;
2255  } else if (match(I, m_FNeg(m_Value()))) {
2256  // Otherwise, this is a negation. See if the operand is a multiply tree
2257  // and if this is not an inner node of a multiply tree.
2258  Value *Op = isa<BinaryOperator>(I) ? I->getOperand(1) :
2259  I->getOperand(0);
2260  if (isReassociableOp(Op, Instruction::FMul) &&
2261  (!I->hasOneUse() ||
2262  !isReassociableOp(I->user_back(), Instruction::FMul))) {
2263  // If the negate was simplified, revisit the users to see if we can
2264  // reassociate further.
2266  for (User *U : NI->users()) {
2267  if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
2268  RedoInsts.insert(Tmp);
2269  }
2270  RedoInsts.insert(I);
2271  MadeChange = true;
2272  I = NI;
2273  }
2274  }
2275  }
2276 
2277  // If this instruction is an associative binary operator, process it.
2278  if (!I->isAssociative()) return;
2279  BinaryOperator *BO = cast<BinaryOperator>(I);
2280 
2281  // If this is an interior node of a reassociable tree, ignore it until we
2282  // get to the root of the tree, to avoid N^2 analysis.
2283  unsigned Opcode = BO->getOpcode();
2284  if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) {
2285  // During the initial run we will get to the root of the tree.
2286  // But if we get here while we are redoing instructions, there is no
2287  // guarantee that the root will be visited. So Redo later
2288  if (BO->user_back() != BO &&
2289  BO->getParent() == BO->user_back()->getParent())
2290  RedoInsts.insert(BO->user_back());
2291  return;
2292  }
2293 
2294  // If this is an add tree that is used by a sub instruction, ignore it
2295  // until we process the subtract.
2296  if (BO->hasOneUse() && BO->getOpcode() == Instruction::Add &&
2297  cast<Instruction>(BO->user_back())->getOpcode() == Instruction::Sub)
2298  return;
2299  if (BO->hasOneUse() && BO->getOpcode() == Instruction::FAdd &&
2300  cast<Instruction>(BO->user_back())->getOpcode() == Instruction::FSub)
2301  return;
2302 
2303  ReassociateExpression(BO);
2304 }
2305 
2306 void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
2307  // First, walk the expression tree, linearizing the tree, collecting the
2308  // operand information.
2310  MadeChange |= LinearizeExprTree(I, Tree);
2312  Ops.reserve(Tree.size());
2313  for (const RepeatedValue &E : Tree)
2314  Ops.append(E.second.getZExtValue(), ValueEntry(getRank(E.first), E.first));
2315 
2316  LLVM_DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n');
2317 
2318  // Now that we have linearized the tree to a list and have gathered all of
2319  // the operands and their ranks, sort the operands by their rank. Use a
2320  // stable_sort so that values with equal ranks will have their relative
2321  // positions maintained (and so the compiler is deterministic). Note that
2322  // this sorts so that the highest ranking values end up at the beginning of
2323  // the vector.
2324  llvm::stable_sort(Ops);
2325 
2326  // Now that we have the expression tree in a convenient
2327  // sorted form, optimize it globally if possible.
2328  if (Value *V = OptimizeExpression(I, Ops)) {
2329  if (V == I)
2330  // Self-referential expression in unreachable code.
2331  return;
2332  // This expression tree simplified to something that isn't a tree,
2333  // eliminate it.
2334  LLVM_DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n');
2335  I->replaceAllUsesWith(V);
2336  if (Instruction *VI = dyn_cast<Instruction>(V))
2337  if (I->getDebugLoc())
2338  VI->setDebugLoc(I->getDebugLoc());
2339  RedoInsts.insert(I);
2340  ++NumAnnihil;
2341  return;
2342  }
2343 
2344  // We want to sink immediates as deeply as possible except in the case where
2345  // this is a multiply tree used only by an add, and the immediate is a -1.
2346  // In this case we reassociate to put the negation on the outside so that we
2347  // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y
2348  if (I->hasOneUse()) {
2349  if (I->getOpcode() == Instruction::Mul &&
2350  cast<Instruction>(I->user_back())->getOpcode() == Instruction::Add &&
2351  isa<ConstantInt>(Ops.back().Op) &&
2352  cast<ConstantInt>(Ops.back().Op)->isMinusOne()) {
2353  ValueEntry Tmp = Ops.pop_back_val();
2354  Ops.insert(Ops.begin(), Tmp);
2355  } else if (I->getOpcode() == Instruction::FMul &&
2356  cast<Instruction>(I->user_back())->getOpcode() ==
2357  Instruction::FAdd &&
2358  isa<ConstantFP>(Ops.back().Op) &&
2359  cast<ConstantFP>(Ops.back().Op)->isExactlyValue(-1.0)) {
2360  ValueEntry Tmp = Ops.pop_back_val();
2361  Ops.insert(Ops.begin(), Tmp);
2362  }
2363  }
2364 
2365  LLVM_DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n');
2366 
2367  if (Ops.size() == 1) {
2368  if (Ops[0].Op == I)
2369  // Self-referential expression in unreachable code.
2370  return;
2371 
2372  // This expression tree simplified to something that isn't a tree,
2373  // eliminate it.
2374  I->replaceAllUsesWith(Ops[0].Op);
2375  if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op))
2376  OI->setDebugLoc(I->getDebugLoc());
2377  RedoInsts.insert(I);
2378  return;
2379  }
2380 
2381  if (Ops.size() > 2 && Ops.size() <= GlobalReassociateLimit) {
2382  // Find the pair with the highest count in the pairmap and move it to the
2383  // back of the list so that it can later be CSE'd.
2384  // example:
2385  // a*b*c*d*e
2386  // if c*e is the most "popular" pair, we can express this as
2387  // (((c*e)*d)*b)*a
2388  unsigned Max = 1;
2389  unsigned BestRank = 0;
2390  std::pair<unsigned, unsigned> BestPair;
2391  unsigned Idx = I->getOpcode() - Instruction::BinaryOpsBegin;
2392  for (unsigned i = 0; i < Ops.size() - 1; ++i)
2393  for (unsigned j = i + 1; j < Ops.size(); ++j) {
2394  unsigned Score = 0;
2395  Value *Op0 = Ops[i].Op;
2396  Value *Op1 = Ops[j].Op;
2397  if (std::less<Value *>()(Op1, Op0))
2398  std::swap(Op0, Op1);
2399  auto it = PairMap[Idx].find({Op0, Op1});
2400  if (it != PairMap[Idx].end()) {
2401  // Functions like BreakUpSubtract() can erase the Values we're using
2402  // as keys and create new Values after we built the PairMap. There's a
2403  // small chance that the new nodes can have the same address as
2404  // something already in the table. We shouldn't accumulate the stored
2405  // score in that case as it refers to the wrong Value.
2406  if (it->second.isValid())
2407  Score += it->second.Score;
2408  }
2409 
2410  unsigned MaxRank = std::max(Ops[i].Rank, Ops[j].Rank);
2411  if (Score > Max || (Score == Max && MaxRank < BestRank)) {
2412  BestPair = {i, j};
2413  Max = Score;
2414  BestRank = MaxRank;
2415  }
2416  }
2417  if (Max > 1) {
2418  auto Op0 = Ops[BestPair.first];
2419  auto Op1 = Ops[BestPair.second];
2420  Ops.erase(&Ops[BestPair.second]);
2421  Ops.erase(&Ops[BestPair.first]);
2422  Ops.push_back(Op0);
2423  Ops.push_back(Op1);
2424  }
2425  }
2426  // Now that we ordered and optimized the expressions, splat them back into
2427  // the expression tree, removing any unneeded nodes.
2428  RewriteExprTree(I, Ops);
2429 }
2430 
2431 void
2432 ReassociatePass::BuildPairMap(ReversePostOrderTraversal<Function *> &RPOT) {
2433  // Make a "pairmap" of how often each operand pair occurs.
2434  for (BasicBlock *BI : RPOT) {
2435  for (Instruction &I : *BI) {
2436  if (!I.isAssociative())
2437  continue;
2438 
2439  // Ignore nodes that aren't at the root of trees.
2440  if (I.hasOneUse() && I.user_back()->getOpcode() == I.getOpcode())
2441  continue;
2442 
2443  // Collect all operands in a single reassociable expression.
2444  // Since Reassociate has already been run once, we can assume things
2445  // are already canonical according to Reassociation's regime.
2446  SmallVector<Value *, 8> Worklist = { I.getOperand(0), I.getOperand(1) };
2448  while (!Worklist.empty() && Ops.size() <= GlobalReassociateLimit) {
2449  Value *Op = Worklist.pop_back_val();
2450  Instruction *OpI = dyn_cast<Instruction>(Op);
2451  if (!OpI || OpI->getOpcode() != I.getOpcode() || !OpI->hasOneUse()) {
2452  Ops.push_back(Op);
2453  continue;
2454  }
2455  // Be paranoid about self-referencing expressions in unreachable code.
2456  if (OpI->getOperand(0) != OpI)
2457  Worklist.push_back(OpI->getOperand(0));
2458  if (OpI->getOperand(1) != OpI)
2459  Worklist.push_back(OpI->getOperand(1));
2460  }
2461  // Skip extremely long expressions.
2462  if (Ops.size() > GlobalReassociateLimit)
2463  continue;
2464 
2465  // Add all pairwise combinations of operands to the pair map.
2466  unsigned BinaryIdx = I.getOpcode() - Instruction::BinaryOpsBegin;
2468  for (unsigned i = 0; i < Ops.size() - 1; ++i) {
2469  for (unsigned j = i + 1; j < Ops.size(); ++j) {
2470  // Canonicalize operand orderings.
2471  Value *Op0 = Ops[i];
2472  Value *Op1 = Ops[j];
2473  if (std::less<Value *>()(Op1, Op0))
2474  std::swap(Op0, Op1);
2475  if (!Visited.insert({Op0, Op1}).second)
2476  continue;
2477  auto res = PairMap[BinaryIdx].insert({{Op0, Op1}, {Op0, Op1, 1}});
2478  if (!res.second) {
2479  // If either key value has been erased then we've got the same
2480  // address by coincidence. That can't happen here because nothing is
2481  // erasing values but it can happen by the time we're querying the
2482  // map.
2483  assert(res.first->second.isValid() && "WeakVH invalidated");
2484  ++res.first->second.Score;
2485  }
2486  }
2487  }
2488  }
2489  }
2490 }
2491 
2493  // Get the functions basic blocks in Reverse Post Order. This order is used by
2494  // BuildRankMap to pre calculate ranks correctly. It also excludes dead basic
2495  // blocks (it has been seen that the analysis in this pass could hang when
2496  // analysing dead basic blocks).
2498 
2499  // Calculate the rank map for F.
2500  BuildRankMap(F, RPOT);
2501 
2502  // Build the pair map before running reassociate.
2503  // Technically this would be more accurate if we did it after one round
2504  // of reassociation, but in practice it doesn't seem to help much on
2505  // real-world code, so don't waste the compile time running reassociate
2506  // twice.
2507  // If a user wants, they could expicitly run reassociate twice in their
2508  // pass pipeline for further potential gains.
2509  // It might also be possible to update the pair map during runtime, but the
2510  // overhead of that may be large if there's many reassociable chains.
2511  BuildPairMap(RPOT);
2512 
2513  MadeChange = false;
2514 
2515  // Traverse the same blocks that were analysed by BuildRankMap.
2516  for (BasicBlock *BI : RPOT) {
2517  assert(RankMap.count(&*BI) && "BB should be ranked.");
2518  // Optimize every instruction in the basic block.
2519  for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;)
2520  if (isInstructionTriviallyDead(&*II)) {
2521  EraseInst(&*II++);
2522  } else {
2523  OptimizeInst(&*II);
2524  assert(II->getParent() == &*BI && "Moved to a different block!");
2525  ++II;
2526  }
2527 
2528  // Make a copy of all the instructions to be redone so we can remove dead
2529  // instructions.
2530  OrderedSet ToRedo(RedoInsts);
2531  // Iterate over all instructions to be reevaluated and remove trivially dead
2532  // instructions. If any operand of the trivially dead instruction becomes
2533  // dead mark it for deletion as well. Continue this process until all
2534  // trivially dead instructions have been removed.
2535  while (!ToRedo.empty()) {
2536  Instruction *I = ToRedo.pop_back_val();
2538  RecursivelyEraseDeadInsts(I, ToRedo);
2539  MadeChange = true;
2540  }
2541  }
2542 
2543  // Now that we have removed dead instructions, we can reoptimize the
2544  // remaining instructions.
2545  while (!RedoInsts.empty()) {
2546  Instruction *I = RedoInsts.front();
2547  RedoInsts.erase(RedoInsts.begin());
2549  EraseInst(I);
2550  else
2551  OptimizeInst(I);
2552  }
2553  }
2554 
2555  // We are done with the rank map and pair map.
2556  RankMap.clear();
2557  ValueRankMap.clear();
2558  for (auto &Entry : PairMap)
2559  Entry.clear();
2560 
2561  if (MadeChange) {
2562  PreservedAnalyses PA;
2563  PA.preserveSet<CFGAnalyses>();
2564  return PA;
2565  }
2566 
2567  return PreservedAnalyses::all();
2568 }
2569 
2570 namespace {
2571 
2572  class ReassociateLegacyPass : public FunctionPass {
2573  ReassociatePass Impl;
2574 
2575  public:
2576  static char ID; // Pass identification, replacement for typeid
2577 
2578  ReassociateLegacyPass() : FunctionPass(ID) {
2580  }
2581 
2582  bool runOnFunction(Function &F) override {
2583  if (skipFunction(F))
2584  return false;
2585 
2586  FunctionAnalysisManager DummyFAM;
2587  auto PA = Impl.run(F, DummyFAM);
2588  return !PA.areAllPreserved();
2589  }
2590 
2591  void getAnalysisUsage(AnalysisUsage &AU) const override {
2592  AU.setPreservesCFG();
2596  }
2597  };
2598 
2599 } // end anonymous namespace
2600 
2601 char ReassociateLegacyPass::ID = 0;
2602 
2603 INITIALIZE_PASS(ReassociateLegacyPass, "reassociate",
2604  "Reassociate expressions", false, false)
2605 
2606 // Public interface to the Reassociate pass
2608  return new ReassociateLegacyPass();
2609 }
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
ShouldBreakUpSubtract
static bool ShouldBreakUpSubtract(Instruction *Sub)
Return true if we should break up this subtract of X-Y into (X + -Y).
Definition: Reassociate.cpp:1016
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
LowerNegateToMultiply
static BinaryOperator * LowerNegateToMultiply(Instruction *Neg)
Replace 0-X with X*-1.
Definition: Reassociate.cpp:267
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:724
llvm::haveNoCommonBitsSet
bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if LHS and RHS have no common bits set.
Definition: ValueTracking.cpp:267
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
it
into xmm2 addss xmm2 xmm1 xmm3 addss xmm3 movaps xmm0 unpcklps xmm0 ret seems silly when it could just be one addps Expand libm rounding functions main should enable SSE DAZ mode and other fast SSE modes Think about doing i64 math in SSE regs on x86 This testcase should have no SSE instructions in it
Definition: README-SSE.txt:81
llvm::RecurKind::Or
@ Or
Bitwise or logical OR of integers.
llvm::mayHaveNonDefUseDependency
bool mayHaveNonDefUseDependency(const Instruction &I)
Returns true if the result or effects of the given instructions I depend values not reachable through...
Definition: ValueTracking.cpp:4804
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::reassociate::ValueEntry
Definition: Reassociate.h:46
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
Scalar.h
llvm::Function
Definition: Function.h:60
llvm::lower_bound
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1736
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::ReassociatePass
Reassociate commutative expressions.
Definition: Reassociate.h:71
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
createAndInstr
static Value * createAndInstr(Instruction *InsertBefore, Value *Opnd, const APInt &ConstOpnd)
Helper function of CombineXorOpnd().
Definition: Reassociate.cpp:1277
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
collectMultiplyFactors
static bool collectMultiplyFactors(SmallVectorImpl< ValueEntry > &Ops, SmallVectorImpl< Factor > &Factors)
Build up a vector of value/power pairs factoring a product.
Definition: Reassociate.cpp:1764
llvm::Function::getEntryBlock
const BasicBlock & getEntryBlock() const
Definition: Function.h:710
llvm::IRBuilder<>
ValueTracking.h
Local.h
llvm::PatternMatch::m_APFloat
apfloat_match m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
Definition: PatternMatch.h:289
INITIALIZE_PASS
INITIALIZE_PASS(ReassociateLegacyPass, "reassociate", "Reassociate expressions", false, false) FunctionPass *llvm
Definition: Reassociate.cpp:2603
GlobalsModRef.h
llvm::Type::isFPOrFPVectorTy
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:179
APInt.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
DenseMap.h
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:136
llvm::Instruction::setHasNoUnsignedWrap
void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
Definition: Instruction.cpp:123
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
Operator.h
llvm::Value::user_begin
user_iterator user_begin()
Definition: Value.h:397
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:654
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
EmitAddTreeOfValues
static Value * EmitAddTreeOfValues(Instruction *I, SmallVectorImpl< WeakTrackingVH > &Ops)
Emit a tree of add instructions, summing Ops together and returning the result.
Definition: Reassociate.cpp:1125
llvm::reassociate::XorOpnd::setSymbolicRank
void setSymbolicRank(unsigned R)
Definition: Reassociate.cpp:106
llvm::BinaryOperator::CreateNeg
NUW NUW NUW NUW Exact static Exact BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", Instruction *InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
Definition: Instructions.cpp:2794
llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:21
BasicAliasAnalysis.h
llvm::APInt::isMinValue
bool isMinValue() const
Determine if this is the smallest unsigned value.
Definition: APInt.h:402
llvm::APInt::getZero
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:177
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
I1
@ I1
Definition: DXILOpLowering.cpp:37
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:240
llvm::PatternMatch::m_FAdd
BinaryOp_match< LHS, RHS, Instruction::FAdd > m_FAdd(const LHS &L, const RHS &R)
Definition: PatternMatch.h:979
llvm::PatternMatch::m_FSub
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:991
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:186
Instruction.h
llvm::PatternMatch::m_APInt
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:272
llvm::ConstantExpr::getBinOpIdentity
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2845
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:157
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::APInt::isZero
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:359
IncorporateWeight
static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode)
Add the extra weight 'RHS' to the existing weight 'LHS', reducing the combined weight using any speci...
Definition: Reassociate.cpp:302
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:667
llvm::Instruction::setHasNoSignedWrap
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
Definition: Instruction.cpp:127
llvm::User
Definition: User.h:44
FindInOperandList
static unsigned FindInOperandList(const SmallVectorImpl< ValueEntry > &Ops, unsigned i, Value *X)
Scan backwards and forwards among values with the same rank as element i to see if X exists.
Definition: Reassociate.cpp:1099
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
InstrTypes.h
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:297
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
getNegatibleInsts
static void getNegatibleInsts(Value *V, SmallVectorImpl< Instruction * > &Candidates)
Recursively analyze an expression to build a list of instructions that have negative floating-point c...
Definition: Reassociate.cpp:2041
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:190
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::reassociate::XorOpnd::isInvalid
bool isInvalid() const
Definition: Reassociate.cpp:98
OptimizeAndOrXor
static Value * OptimizeAndOrXor(unsigned Opcode, SmallVectorImpl< ValueEntry > &Ops)
Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
Definition: Reassociate.cpp:1227
llvm::Instruction::isNilpotent
bool isNilpotent() const
Return true if the instruction is nilpotent:
Definition: Instruction.h:587
llvm::PatternMatch::m_Instruction
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:710
llvm::reassociate::XorOpnd
Utility class representing a non-constant Xor-operand.
Definition: Reassociate.cpp:94
llvm::ConstantExpr::getBinOpAbsorber
static Constant * getBinOpAbsorber(unsigned Opcode, Type *Ty)
Return the absorbing element for the given binary operation, i.e.
Definition: Constants.cpp:2890
llvm::reassociate::XorOpnd::isOrExpr
bool isOrExpr() const
Definition: Reassociate.cpp:99
llvm::Instruction::getOpcodeName
const char * getOpcodeName() const
Definition: Instruction.h:159
llvm::PatternMatch::m_FNeg
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
Definition: PatternMatch.h:1027
llvm::Constant::getAllOnesValue
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:395
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:392
llvm::Instruction
Definition: Instruction.h:42
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
llvm::APInt::isAllOnes
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:347
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
APFloat.h
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1777
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:928
SmallPtrSet.h
PatternMatch.h
RepeatedValue
std::pair< Value *, APInt > RepeatedValue
Definition: Reassociate.cpp:375
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Type.h
llvm::ConstantExpr::getFNeg
static Constant * getFNeg(Constant *C)
Definition: Constants.cpp:2711
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::initializeReassociateLegacyPassPass
void initializeReassociateLegacyPassPass(PassRegistry &)
CFG.h
llvm::InvokeInst
Invoke instruction.
Definition: Instructions.h:3763
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::ReassociatePass::OrderedSet
SetVector< AssertingVH< Instruction >, std::deque< AssertingVH< Instruction > >> OrderedSet
Definition: Reassociate.h:74
llvm::APInt::getOneBitSet
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:222
llvm::Instruction::andIRFlags
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
Definition: Instruction.cpp:322
BasicBlock.h
ConvertShiftToMul
static BinaryOperator * ConvertShiftToMul(Instruction *Shl)
If this is a shift of a reassociable multiply or is used by one, change this into a multiply by a con...
Definition: Reassociate.cpp:1069
llvm::APFloat
Definition: APFloat.h:701
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:239
llvm::reassociate::ValueEntry::Op
Value * Op
Definition: Reassociate.h:48
VI
@ VI
Definition: SIInstrInfo.cpp:7824
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::UnaryOperator::CreateFNegFMF
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:164
llvm::reassociate::XorOpnd::getConstPart
const APInt & getConstPart() const
Definition: Reassociate.cpp:103
isReassociableOp
static BinaryOperator * isReassociableOp(Value *V, unsigned Opcode)
Return true if V is an instruction of the specified opcode and if it only has one use.
Definition: Reassociate.cpp:146
llvm::Instruction::user_back
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:88
llvm::reassociate::XorOpnd::XorOpnd
XorOpnd(Value *V)
Definition: Reassociate.cpp:116
isInteresting
static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, ScalarEvolution *SE, LoopInfo *LI)
isInteresting - Test whether the given expression is "interesting" when used by the given expression,...
Definition: IVUsers.cpp:56
llvm::BasicAAWrapperPass
Legacy wrapper pass to provide the BasicAAResult object.
Definition: BasicAliasAnalysis.h:173
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
CreateNeg
static Instruction * CreateNeg(Value *S1, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
Definition: Reassociate.cpp:255
llvm::DenseMap
Definition: DenseMap.h:716
llvm::ConstantExpr::get
static Constant * get(unsigned Opcode, Constant *C1, unsigned Flags=0, Type *OnlyIfReducedTy=nullptr)
get - Return a unary operator constant expression, folding if possible.
Definition: Constants.cpp:2300
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::Value::clearSubclassOptionalData
void clearSubclassOptionalData()
Clear the optional flags contained in this value.
Definition: Value.h:544
llvm::ConstantExpr::getShl
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2799
NegateValue
static Value * NegateValue(Value *V, Instruction *BI, ReassociatePass::OrderedSet &ToRedo)
Insert instructions before the instruction pointed to by BI, that computes the negative version of th...
Definition: Reassociate.cpp:814
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:364
llvm::Instruction::isIdempotent
bool isIdempotent() const
Return true if the instruction is idempotent:
Definition: Instruction.h:573
llvm::APInt::getBoolValue
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:452
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Reassociate.h
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
CarmichaelShift
static unsigned CarmichaelShift(unsigned Bitwidth)
Returns k such that lambda(2^Bitwidth) = 2^k, where lambda is the Carmichael function.
Definition: Reassociate.cpp:289
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::reassociate::XorOpnd::getSymbolicPart
Value * getSymbolicPart() const
Definition: Reassociate.cpp:101
llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:144
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
PrintOps
static void PrintOps(Instruction *I, const SmallVectorImpl< ValueEntry > &Ops)
Print out the expression identified in the Ops list.
Definition: Reassociate.cpp:74
llvm::isInstructionTriviallyDead
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:395
llvm::BinaryOperator
Definition: InstrTypes.h:188
llvm::reassociate::XorOpnd::Invalidate
void Invalidate()
Definition: Reassociate.cpp:105
Mul
BinaryOperator * Mul
Definition: X86PartialReduction.cpp:70
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1624
convertOrWithNoCommonBitsToAdd
static BinaryOperator * convertOrWithNoCommonBitsToAdd(Instruction *Or)
If we have (X|Y), and iff X and Y have no common bits set, transform this into (X+Y) to allow arithme...
Definition: Reassociate.cpp:999
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
llvm::PreservedAnalyses::areAllPreserved
bool areAllPreserved() const
Test whether all analyses are preserved (and none are abandoned).
Definition: PassManager.h:327
FindSingleUseMultiplyFactors
static void FindSingleUseMultiplyFactors(Value *V, SmallVectorImpl< Value * > &Factors)
If V is a single-use multiply, recursively add its operands as factors, otherwise add V to the list o...
Definition: Reassociate.cpp:1211
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
llvm::reassociate::XorOpnd::getSymbolicRank
unsigned getSymbolicRank() const
Definition: Reassociate.cpp:102
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:529
llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:93
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
ValueHandle.h
llvm::createReassociatePass
FunctionPass * createReassociatePass()
Argument.h
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
j
return j(j<< 16)
llvm::Instruction::setFastMathFlags
void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
Definition: Instruction.cpp:239
Constant.h
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:123
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:1761
llvm::ReassociatePass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
Definition: Reassociate.cpp:2492
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:350
llvm::ConstantExpr::getNeg
static Constant * getNeg(Constant *C, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2704
get
Should compile to something r4 addze r3 instead we get
Definition: README.txt:24
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:345
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:197
llvm::ConstantFP::get
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:975
Casting.h
Function.h
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
PassManager.h
llvm::salvageDebugInfo
void salvageDebugInfo(Instruction &I)
Assuming the instruction I is going to be deleted, attempt to salvage debug users of I by writing the...
Definition: Local.cpp:1739
llvm::unique
auto unique(Range &&R, Predicate P)
Definition: STLExtras.h:1779
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:591
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::ReversePostOrderTraversal
Definition: PostOrderIterator.h:291
llvm::replaceDbgUsesWithUndef
bool replaceDbgUsesWithUndef(Instruction *I)
Replace all the uses of an SSA value in @llvm.dbg intrinsics with undef.
Definition: Local.cpp:594
llvm::codeview::VB
@ VB
Definition: CodeView.h:158
llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:786
Instructions.h
PostOrderIterator.h
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
llvm::pdb::DbgHeaderType::Max
@ Max
LinearizeExprTree
static bool LinearizeExprTree(Instruction *I, SmallVectorImpl< RepeatedValue > &Ops)
Given an associative binary expression, return the leaf nodes in Ops along with their weights (how ma...
Definition: Reassociate.cpp:450
SmallVector.h
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:367
User.h
CreateMul
static BinaryOperator * CreateMul(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
Definition: Reassociate.cpp:243
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1351
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::Value::deleteValue
void deleteValue()
Delete a pointer to a generic Value.
Definition: Value.cpp:106
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:148
llvm::MIPatternMatch::m_Neg
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
Definition: MIPatternMatch.h:688
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::MIPatternMatch::m_Not
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
Definition: MIPatternMatch.h:696
CreateAdd
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
Definition: Reassociate.cpp:231
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:378
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
raw_ostream.h
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:644
llvm::BinaryOperator::Create
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Definition: Instructions.cpp:2778
Value.h
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1282
llvm::reassociate::Factor
Utility class representing a base and exponent pair which form one factor of some product.
Definition: Reassociate.h:59
llvm::MIPatternMatch::m_OneUse
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
Definition: MIPatternMatch.h:45
InitializePasses.h
buildMultiplyTree
static Value * buildMultiplyTree(IRBuilderBase &Builder, SmallVectorImpl< Value * > &Ops)
Build a tree of multiplies, computing the product of Ops.
Definition: Reassociate.cpp:1818
llvm::reassociate::XorOpnd::getValue
Value * getValue() const
Definition: Reassociate.cpp:100
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
BreakUpSubtract
static BinaryOperator * BreakUpSubtract(Instruction *Sub, ReassociatePass::OrderedSet &ToRedo)
If we have (X-Y), and if either X is an add, or if this is only used by an add, transform this into (...
Definition: Reassociate.cpp:1046
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
shouldConvertOrWithNoCommonBitsToAdd
static bool shouldConvertOrWithNoCommonBitsToAdd(Instruction *Or)
Return true if it may be profitable to convert this (X|Y) into (X+Y).
Definition: Reassociate.cpp:975
reassociate
nary reassociate
Definition: NaryReassociate.cpp:162
isLoadCombineCandidate
static bool isLoadCombineCandidate(Instruction *Or)
Definition: Reassociate.cpp:925
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::Instruction::moveBefore
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:96
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:927
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:792
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52