LLVM 17.0.0git
Reassociate.cpp
Go to the documentation of this file.
1//===- Reassociate.cpp - Reassociate binary expressions -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass reassociates commutative expressions in an order that is designed
10// to promote better constant propagation, GCSE, LICM, PRE, etc.
11//
12// For example: 4 + (x + 5) -> x + (4 + 5)
13//
14// In the implementation of this algorithm, constants are assigned rank = 0,
15// function arguments are rank = 1, and other values are assigned ranks
16// corresponding to the reverse post order traversal of current function
17// (starting at 2), which effectively gives values in deep loops higher rank
18// than values not in loops.
19//
20//===----------------------------------------------------------------------===//
21
23#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/APInt.h"
25#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
35#include "llvm/IR/Argument.h"
36#include "llvm/IR/BasicBlock.h"
37#include "llvm/IR/CFG.h"
38#include "llvm/IR/Constant.h"
39#include "llvm/IR/Constants.h"
40#include "llvm/IR/Function.h"
41#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/InstrTypes.h"
43#include "llvm/IR/Instruction.h"
45#include "llvm/IR/Operator.h"
46#include "llvm/IR/PassManager.h"
48#include "llvm/IR/Type.h"
49#include "llvm/IR/User.h"
50#include "llvm/IR/Value.h"
51#include "llvm/IR/ValueHandle.h"
53#include "llvm/Pass.h"
55#include "llvm/Support/Debug.h"
59#include <algorithm>
60#include <cassert>
61#include <utility>
62
63using namespace llvm;
64using namespace reassociate;
65using namespace PatternMatch;
66
67#define DEBUG_TYPE "reassociate"
68
69STATISTIC(NumChanged, "Number of insts reassociated");
70STATISTIC(NumAnnihil, "Number of expr tree annihilated");
71STATISTIC(NumFactor , "Number of multiplies factored");
72
73#ifndef NDEBUG
74/// Print out the expression identified in the Ops list.
76 Module *M = I->getModule();
77 dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
78 << *Ops[0].Op->getType() << '\t';
79 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
80 dbgs() << "[ ";
81 Ops[i].Op->printAsOperand(dbgs(), false, M);
82 dbgs() << ", #" << Ops[i].Rank << "] ";
83 }
84}
85#endif
86
87/// Utility class representing a non-constant Xor-operand. We classify
88/// non-constant Xor-Operands into two categories:
89/// C1) The operand is in the form "X & C", where C is a constant and C != ~0
90/// C2)
91/// C2.1) The operand is in the form of "X | C", where C is a non-zero
92/// constant.
93/// C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this
94/// operand as "E | 0"
96public:
97 XorOpnd(Value *V);
98
99 bool isInvalid() const { return SymbolicPart == nullptr; }
100 bool isOrExpr() const { return isOr; }
101 Value *getValue() const { return OrigVal; }
102 Value *getSymbolicPart() const { return SymbolicPart; }
103 unsigned getSymbolicRank() const { return SymbolicRank; }
104 const APInt &getConstPart() const { return ConstPart; }
105
106 void Invalidate() { SymbolicPart = OrigVal = nullptr; }
107 void setSymbolicRank(unsigned R) { SymbolicRank = R; }
108
109private:
110 Value *OrigVal;
111 Value *SymbolicPart;
112 APInt ConstPart;
113 unsigned SymbolicRank;
114 bool isOr;
115};
116
118 assert(!isa<ConstantInt>(V) && "No ConstantInt");
119 OrigVal = V;
120 Instruction *I = dyn_cast<Instruction>(V);
121 SymbolicRank = 0;
122
123 if (I && (I->getOpcode() == Instruction::Or ||
124 I->getOpcode() == Instruction::And)) {
125 Value *V0 = I->getOperand(0);
126 Value *V1 = I->getOperand(1);
127 const APInt *C;
128 if (match(V0, m_APInt(C)))
129 std::swap(V0, V1);
130
131 if (match(V1, m_APInt(C))) {
132 ConstPart = *C;
133 SymbolicPart = V0;
134 isOr = (I->getOpcode() == Instruction::Or);
135 return;
136 }
137 }
138
139 // view the operand as "V | 0"
140 SymbolicPart = V;
141 ConstPart = APInt::getZero(V->getType()->getScalarSizeInBits());
142 isOr = true;
143}
144
145/// Return true if I is an instruction with the FastMathFlags that are needed
146/// for general reassociation set. This is not the same as testing
147/// Instruction::isAssociative() because it includes operations like fsub.
148/// (This routine is only intended to be called for floating-point operations.)
150 assert(I && isa<FPMathOperator>(I) && "Should only check FP ops");
151 return I->hasAllowReassoc() && I->hasNoSignedZeros();
152}
153
154/// Return true if V is an instruction of the specified opcode and if it
155/// only has one use.
156static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
157 auto *BO = dyn_cast<BinaryOperator>(V);
158 if (BO && BO->hasOneUse() && BO->getOpcode() == Opcode)
159 if (!isa<FPMathOperator>(BO) || hasFPAssociativeFlags(BO))
160 return BO;
161 return nullptr;
162}
163
164static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
165 unsigned Opcode2) {
166 auto *BO = dyn_cast<BinaryOperator>(V);
167 if (BO && BO->hasOneUse() &&
168 (BO->getOpcode() == Opcode1 || BO->getOpcode() == Opcode2))
169 if (!isa<FPMathOperator>(BO) || hasFPAssociativeFlags(BO))
170 return BO;
171 return nullptr;
172}
173
174void ReassociatePass::BuildRankMap(Function &F,
176 unsigned Rank = 2;
177
178 // Assign distinct ranks to function arguments.
179 for (auto &Arg : F.args()) {
180 ValueRankMap[&Arg] = ++Rank;
181 LLVM_DEBUG(dbgs() << "Calculated Rank[" << Arg.getName() << "] = " << Rank
182 << "\n");
183 }
184
185 // Traverse basic blocks in ReversePostOrder.
186 for (BasicBlock *BB : RPOT) {
187 unsigned BBRank = RankMap[BB] = ++Rank << 16;
188
189 // Walk the basic block, adding precomputed ranks for any instructions that
190 // we cannot move. This ensures that the ranks for these instructions are
191 // all different in the block.
192 for (Instruction &I : *BB)
194 ValueRankMap[&I] = ++BBRank;
195 }
196}
197
198unsigned ReassociatePass::getRank(Value *V) {
199 Instruction *I = dyn_cast<Instruction>(V);
200 if (!I) {
201 if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument.
202 return 0; // Otherwise it's a global or constant, rank 0.
203 }
204
205 if (unsigned Rank = ValueRankMap[I])
206 return Rank; // Rank already known?
207
208 // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that
209 // we can reassociate expressions for code motion! Since we do not recurse
210 // for PHI nodes, we cannot have infinite recursion here, because there
211 // cannot be loops in the value graph that do not go through PHI nodes.
212 unsigned Rank = 0, MaxRank = RankMap[I->getParent()];
213 for (unsigned i = 0, e = I->getNumOperands(); i != e && Rank != MaxRank; ++i)
214 Rank = std::max(Rank, getRank(I->getOperand(i)));
215
216 // If this is a 'not' or 'neg' instruction, do not count it for rank. This
217 // assures us that X and ~X will have the same rank.
218 if (!match(I, m_Not(m_Value())) && !match(I, m_Neg(m_Value())) &&
219 !match(I, m_FNeg(m_Value())))
220 ++Rank;
221
222 LLVM_DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank
223 << "\n");
224
225 return ValueRankMap[I] = Rank;
226}
227
228// Canonicalize constants to RHS. Otherwise, sort the operands by rank.
229void ReassociatePass::canonicalizeOperands(Instruction *I) {
230 assert(isa<BinaryOperator>(I) && "Expected binary operator.");
231 assert(I->isCommutative() && "Expected commutative operator.");
232
233 Value *LHS = I->getOperand(0);
234 Value *RHS = I->getOperand(1);
235 if (LHS == RHS || isa<Constant>(RHS))
236 return;
237 if (isa<Constant>(LHS) || getRank(RHS) < getRank(LHS))
238 cast<BinaryOperator>(I)->swapOperands();
239}
240
241static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name,
242 Instruction *InsertBefore, Value *FlagsOp) {
243 if (S1->getType()->isIntOrIntVectorTy())
244 return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore);
245 else {
246 BinaryOperator *Res =
247 BinaryOperator::CreateFAdd(S1, S2, Name, InsertBefore);
248 Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
249 return Res;
250 }
251}
252
253static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name,
254 Instruction *InsertBefore, Value *FlagsOp) {
255 if (S1->getType()->isIntOrIntVectorTy())
256 return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore);
257 else {
258 BinaryOperator *Res =
259 BinaryOperator::CreateFMul(S1, S2, Name, InsertBefore);
260 Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
261 return Res;
262 }
263}
264
266 Instruction *InsertBefore, Value *FlagsOp) {
267 if (S1->getType()->isIntOrIntVectorTy())
268 return BinaryOperator::CreateNeg(S1, Name, InsertBefore);
269
270 if (auto *FMFSource = dyn_cast<Instruction>(FlagsOp))
271 return UnaryOperator::CreateFNegFMF(S1, FMFSource, Name, InsertBefore);
272
273 return UnaryOperator::CreateFNeg(S1, Name, InsertBefore);
274}
275
276/// Replace 0-X with X*-1.
278 assert((isa<UnaryOperator>(Neg) || isa<BinaryOperator>(Neg)) &&
279 "Expected a Negate!");
280 // FIXME: It's not safe to lower a unary FNeg into a FMul by -1.0.
281 unsigned OpNo = isa<BinaryOperator>(Neg) ? 1 : 0;
282 Type *Ty = Neg->getType();
283 Constant *NegOne = Ty->isIntOrIntVectorTy() ?
285
286 BinaryOperator *Res = CreateMul(Neg->getOperand(OpNo), NegOne, "", Neg, Neg);
287 Neg->setOperand(OpNo, Constant::getNullValue(Ty)); // Drop use of op.
288 Res->takeName(Neg);
289 Neg->replaceAllUsesWith(Res);
290 Res->setDebugLoc(Neg->getDebugLoc());
291 return Res;
292}
293
294/// Returns k such that lambda(2^Bitwidth) = 2^k, where lambda is the Carmichael
295/// function. This means that x^(2^k) === 1 mod 2^Bitwidth for
296/// every odd x, i.e. x^(2^k) = 1 for every odd x in Bitwidth-bit arithmetic.
297/// Note that 0 <= k < Bitwidth, and if Bitwidth > 3 then x^(2^k) = 0 for every
298/// even x in Bitwidth-bit arithmetic.
299static unsigned CarmichaelShift(unsigned Bitwidth) {
300 if (Bitwidth < 3)
301 return Bitwidth - 1;
302 return Bitwidth - 2;
303}
304
305/// Add the extra weight 'RHS' to the existing weight 'LHS',
306/// reducing the combined weight using any special properties of the operation.
307/// The existing weight LHS represents the computation X op X op ... op X where
308/// X occurs LHS times. The combined weight represents X op X op ... op X with
309/// X occurring LHS + RHS times. If op is "Xor" for example then the combined
310/// operation is equivalent to X if LHS + RHS is odd, or 0 if LHS + RHS is even;
311/// the routine returns 1 in LHS in the first case, and 0 in LHS in the second.
312static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) {
313 // If we were working with infinite precision arithmetic then the combined
314 // weight would be LHS + RHS. But we are using finite precision arithmetic,
315 // and the APInt sum LHS + RHS may not be correct if it wraps (it is correct
316 // for nilpotent operations and addition, but not for idempotent operations
317 // and multiplication), so it is important to correctly reduce the combined
318 // weight back into range if wrapping would be wrong.
319
320 // If RHS is zero then the weight didn't change.
321 if (RHS.isMinValue())
322 return;
323 // If LHS is zero then the combined weight is RHS.
324 if (LHS.isMinValue()) {
325 LHS = RHS;
326 return;
327 }
328 // From this point on we know that neither LHS nor RHS is zero.
329
330 if (Instruction::isIdempotent(Opcode)) {
331 // Idempotent means X op X === X, so any non-zero weight is equivalent to a
332 // weight of 1. Keeping weights at zero or one also means that wrapping is
333 // not a problem.
334 assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
335 return; // Return a weight of 1.
336 }
337 if (Instruction::isNilpotent(Opcode)) {
338 // Nilpotent means X op X === 0, so reduce weights modulo 2.
339 assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
340 LHS = 0; // 1 + 1 === 0 modulo 2.
341 return;
342 }
343 if (Opcode == Instruction::Add || Opcode == Instruction::FAdd) {
344 // TODO: Reduce the weight by exploiting nsw/nuw?
345 LHS += RHS;
346 return;
347 }
348
349 assert((Opcode == Instruction::Mul || Opcode == Instruction::FMul) &&
350 "Unknown associative operation!");
351 unsigned Bitwidth = LHS.getBitWidth();
352 // If CM is the Carmichael number then a weight W satisfying W >= CM+Bitwidth
353 // can be replaced with W-CM. That's because x^W=x^(W-CM) for every Bitwidth
354 // bit number x, since either x is odd in which case x^CM = 1, or x is even in
355 // which case both x^W and x^(W - CM) are zero. By subtracting off multiples
356 // of CM like this weights can always be reduced to the range [0, CM+Bitwidth)
357 // which by a happy accident means that they can always be represented using
358 // Bitwidth bits.
359 // TODO: Reduce the weight by exploiting nsw/nuw? (Could do much better than
360 // the Carmichael number).
361 if (Bitwidth > 3) {
362 /// CM - The value of Carmichael's lambda function.
363 APInt CM = APInt::getOneBitSet(Bitwidth, CarmichaelShift(Bitwidth));
364 // Any weight W >= Threshold can be replaced with W - CM.
365 APInt Threshold = CM + Bitwidth;
366 assert(LHS.ult(Threshold) && RHS.ult(Threshold) && "Weights not reduced!");
367 // For Bitwidth 4 or more the following sum does not overflow.
368 LHS += RHS;
369 while (LHS.uge(Threshold))
370 LHS -= CM;
371 } else {
372 // To avoid problems with overflow do everything the same as above but using
373 // a larger type.
374 unsigned CM = 1U << CarmichaelShift(Bitwidth);
375 unsigned Threshold = CM + Bitwidth;
376 assert(LHS.getZExtValue() < Threshold && RHS.getZExtValue() < Threshold &&
377 "Weights not reduced!");
378 unsigned Total = LHS.getZExtValue() + RHS.getZExtValue();
379 while (Total >= Threshold)
380 Total -= CM;
381 LHS = Total;
382 }
383}
384
385using RepeatedValue = std::pair<Value*, APInt>;
386
387/// Given an associative binary expression, return the leaf
388/// nodes in Ops along with their weights (how many times the leaf occurs). The
389/// original expression is the same as
390/// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times
391/// op
392/// (Ops[1].first op Ops[1].first op ... Ops[1].first) <- Ops[1].second times
393/// op
394/// ...
395/// op
396/// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times
397///
398/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct.
399///
400/// This routine may modify the function, in which case it returns 'true'. The
401/// changes it makes may well be destructive, changing the value computed by 'I'
402/// to something completely different. Thus if the routine returns 'true' then
403/// you MUST either replace I with a new expression computed from the Ops array,
404/// or use RewriteExprTree to put the values back in.
405///
406/// A leaf node is either not a binary operation of the same kind as the root
407/// node 'I' (i.e. is not a binary operator at all, or is, but with a different
408/// opcode), or is the same kind of binary operator but has a use which either
409/// does not belong to the expression, or does belong to the expression but is
410/// a leaf node. Every leaf node has at least one use that is a non-leaf node
411/// of the expression, while for non-leaf nodes (except for the root 'I') every
412/// use is a non-leaf node of the expression.
413///
414/// For example:
415/// expression graph node names
416///
417/// + | I
418/// / \ |
419/// + + | A, B
420/// / \ / \ |
421/// * + * | C, D, E
422/// / \ / \ / \ |
423/// + * | F, G
424///
425/// The leaf nodes are C, E, F and G. The Ops array will contain (maybe not in
426/// that order) (C, 1), (E, 1), (F, 2), (G, 2).
427///
428/// The expression is maximal: if some instruction is a binary operator of the
429/// same kind as 'I', and all of its uses are non-leaf nodes of the expression,
430/// then the instruction also belongs to the expression, is not a leaf node of
431/// it, and its operands also belong to the expression (but may be leaf nodes).
432///
433/// NOTE: This routine will set operands of non-leaf non-root nodes to undef in
434/// order to ensure that every non-root node in the expression has *exactly one*
435/// use by a non-leaf node of the expression. This destruction means that the
436/// caller MUST either replace 'I' with a new expression or use something like
437/// RewriteExprTree to put the values back in if the routine indicates that it
438/// made a change by returning 'true'.
439///
440/// In the above example either the right operand of A or the left operand of B
441/// will be replaced by undef. If it is B's operand then this gives:
442///
443/// + | I
444/// / \ |
445/// + + | A, B - operand of B replaced with undef
446/// / \ \ |
447/// * + * | C, D, E
448/// / \ / \ / \ |
449/// + * | F, G
450///
451/// Note that such undef operands can only be reached by passing through 'I'.
452/// For example, if you visit operands recursively starting from a leaf node
453/// then you will never see such an undef operand unless you get back to 'I',
454/// which requires passing through a phi node.
455///
456/// Note that this routine may also mutate binary operators of the wrong type
457/// that have all uses inside the expression (i.e. only used by non-leaf nodes
458/// of the expression) if it can turn them into binary operators of the right
459/// type and thus make the expression bigger.
463 assert((isa<UnaryOperator>(I) || isa<BinaryOperator>(I)) &&
464 "Expected a UnaryOperator or BinaryOperator!");
465 LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
466 unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits();
467 unsigned Opcode = I->getOpcode();
468 assert(I->isAssociative() && I->isCommutative() &&
469 "Expected an associative and commutative operation!");
470
471 // Visit all operands of the expression, keeping track of their weight (the
472 // number of paths from the expression root to the operand, or if you like
473 // the number of times that operand occurs in the linearized expression).
474 // For example, if I = X + A, where X = A + B, then I, X and B have weight 1
475 // while A has weight two.
476
477 // Worklist of non-leaf nodes (their operands are in the expression too) along
478 // with their weights, representing a certain number of paths to the operator.
479 // If an operator occurs in the worklist multiple times then we found multiple
480 // ways to get to it.
481 SmallVector<std::pair<Instruction*, APInt>, 8> Worklist; // (Op, Weight)
482 Worklist.push_back(std::make_pair(I, APInt(Bitwidth, 1)));
483 bool Changed = false;
484
485 // Leaves of the expression are values that either aren't the right kind of
486 // operation (eg: a constant, or a multiply in an add tree), or are, but have
487 // some uses that are not inside the expression. For example, in I = X + X,
488 // X = A + B, the value X has two uses (by I) that are in the expression. If
489 // X has any other uses, for example in a return instruction, then we consider
490 // X to be a leaf, and won't analyze it further. When we first visit a value,
491 // if it has more than one use then at first we conservatively consider it to
492 // be a leaf. Later, as the expression is explored, we may discover some more
493 // uses of the value from inside the expression. If all uses turn out to be
494 // from within the expression (and the value is a binary operator of the right
495 // kind) then the value is no longer considered to be a leaf, and its operands
496 // are explored.
497
498 // Leaves - Keeps track of the set of putative leaves as well as the number of
499 // paths to each leaf seen so far.
500 using LeafMap = DenseMap<Value *, APInt>;
501 LeafMap Leaves; // Leaf -> Total weight so far.
502 SmallVector<Value *, 8> LeafOrder; // Ensure deterministic leaf output order.
503
504#ifndef NDEBUG
505 SmallPtrSet<Value *, 8> Visited; // For checking the iteration scheme.
506#endif
507 while (!Worklist.empty()) {
508 std::pair<Instruction*, APInt> P = Worklist.pop_back_val();
509 I = P.first; // We examine the operands of this binary operator.
510
511 for (unsigned OpIdx = 0; OpIdx < I->getNumOperands(); ++OpIdx) { // Visit operands.
512 Value *Op = I->getOperand(OpIdx);
513 APInt Weight = P.second; // Number of paths to this operand.
514 LLVM_DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
515 assert(!Op->use_empty() && "No uses, so how did we get to it?!");
516
517 // If this is a binary operation of the right kind with only one use then
518 // add its operands to the expression.
519 if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
520 assert(Visited.insert(Op).second && "Not first visit!");
521 LLVM_DEBUG(dbgs() << "DIRECT ADD: " << *Op << " (" << Weight << ")\n");
522 Worklist.push_back(std::make_pair(BO, Weight));
523 continue;
524 }
525
526 // Appears to be a leaf. Is the operand already in the set of leaves?
527 LeafMap::iterator It = Leaves.find(Op);
528 if (It == Leaves.end()) {
529 // Not in the leaf map. Must be the first time we saw this operand.
530 assert(Visited.insert(Op).second && "Not first visit!");
531 if (!Op->hasOneUse()) {
532 // This value has uses not accounted for by the expression, so it is
533 // not safe to modify. Mark it as being a leaf.
535 << "ADD USES LEAF: " << *Op << " (" << Weight << ")\n");
536 LeafOrder.push_back(Op);
537 Leaves[Op] = Weight;
538 continue;
539 }
540 // No uses outside the expression, try morphing it.
541 } else {
542 // Already in the leaf map.
543 assert(It != Leaves.end() && Visited.count(Op) &&
544 "In leaf map but not visited!");
545
546 // Update the number of paths to the leaf.
547 IncorporateWeight(It->second, Weight, Opcode);
548
549#if 0 // TODO: Re-enable once PR13021 is fixed.
550 // The leaf already has one use from inside the expression. As we want
551 // exactly one such use, drop this new use of the leaf.
552 assert(!Op->hasOneUse() && "Only one use, but we got here twice!");
553 I->setOperand(OpIdx, UndefValue::get(I->getType()));
554 Changed = true;
555
556 // If the leaf is a binary operation of the right kind and we now see
557 // that its multiple original uses were in fact all by nodes belonging
558 // to the expression, then no longer consider it to be a leaf and add
559 // its operands to the expression.
560 if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
561 LLVM_DEBUG(dbgs() << "UNLEAF: " << *Op << " (" << It->second << ")\n");
562 Worklist.push_back(std::make_pair(BO, It->second));
563 Leaves.erase(It);
564 continue;
565 }
566#endif
567
568 // If we still have uses that are not accounted for by the expression
569 // then it is not safe to modify the value.
570 if (!Op->hasOneUse())
571 continue;
572
573 // No uses outside the expression, try morphing it.
574 Weight = It->second;
575 Leaves.erase(It); // Since the value may be morphed below.
576 }
577
578 // At this point we have a value which, first of all, is not a binary
579 // expression of the right kind, and secondly, is only used inside the
580 // expression. This means that it can safely be modified. See if we
581 // can usefully morph it into an expression of the right kind.
582 assert((!isa<Instruction>(Op) ||
583 cast<Instruction>(Op)->getOpcode() != Opcode
584 || (isa<FPMathOperator>(Op) &&
585 !hasFPAssociativeFlags(cast<Instruction>(Op)))) &&
586 "Should have been handled above!");
587 assert(Op->hasOneUse() && "Has uses outside the expression tree!");
588
589 // If this is a multiply expression, turn any internal negations into
590 // multiplies by -1 so they can be reassociated. Add any users of the
591 // newly created multiplication by -1 to the redo list, so any
592 // reassociation opportunities that are exposed will be reassociated
593 // further.
594 Instruction *Neg;
595 if (((Opcode == Instruction::Mul && match(Op, m_Neg(m_Value()))) ||
596 (Opcode == Instruction::FMul && match(Op, m_FNeg(m_Value())))) &&
597 match(Op, m_Instruction(Neg))) {
599 << "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
601 LLVM_DEBUG(dbgs() << *Mul << '\n');
602 Worklist.push_back(std::make_pair(Mul, Weight));
603 for (User *U : Mul->users()) {
604 if (BinaryOperator *UserBO = dyn_cast<BinaryOperator>(U))
605 ToRedo.insert(UserBO);
606 }
607 ToRedo.insert(Neg);
608 Changed = true;
609 continue;
610 }
611
612 // Failed to morph into an expression of the right type. This really is
613 // a leaf.
614 LLVM_DEBUG(dbgs() << "ADD LEAF: " << *Op << " (" << Weight << ")\n");
615 assert(!isReassociableOp(Op, Opcode) && "Value was morphed?");
616 LeafOrder.push_back(Op);
617 Leaves[Op] = Weight;
618 }
619 }
620
621 // The leaves, repeated according to their weights, represent the linearized
622 // form of the expression.
623 for (Value *V : LeafOrder) {
624 LeafMap::iterator It = Leaves.find(V);
625 if (It == Leaves.end())
626 // Node initially thought to be a leaf wasn't.
627 continue;
628 assert(!isReassociableOp(V, Opcode) && "Shouldn't be a leaf!");
629 APInt Weight = It->second;
630 if (Weight.isMinValue())
631 // Leaf already output or weight reduction eliminated it.
632 continue;
633 // Ensure the leaf is only output once.
634 It->second = 0;
635 Ops.push_back(std::make_pair(V, Weight));
636 }
637
638 // For nilpotent operations or addition there may be no operands, for example
639 // because the expression was "X xor X" or consisted of 2^Bitwidth additions:
640 // in both cases the weight reduces to 0 causing the value to be skipped.
641 if (Ops.empty()) {
642 Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType());
643 assert(Identity && "Associative operation without identity!");
644 Ops.emplace_back(Identity, APInt(Bitwidth, 1));
645 }
646
647 return Changed;
648}
649
650/// Now that the operands for this expression tree are
651/// linearized and optimized, emit them in-order.
652void ReassociatePass::RewriteExprTree(BinaryOperator *I,
654 assert(Ops.size() > 1 && "Single values should be used directly!");
655
656 // Since our optimizations should never increase the number of operations, the
657 // new expression can usually be written reusing the existing binary operators
658 // from the original expression tree, without creating any new instructions,
659 // though the rewritten expression may have a completely different topology.
660 // We take care to not change anything if the new expression will be the same
661 // as the original. If more than trivial changes (like commuting operands)
662 // were made then we are obliged to clear out any optional subclass data like
663 // nsw flags.
664
665 /// NodesToRewrite - Nodes from the original expression available for writing
666 /// the new expression into.
667 SmallVector<BinaryOperator*, 8> NodesToRewrite;
668 unsigned Opcode = I->getOpcode();
670
671 /// NotRewritable - The operands being written will be the leaves of the new
672 /// expression and must not be used as inner nodes (via NodesToRewrite) by
673 /// mistake. Inner nodes are always reassociable, and usually leaves are not
674 /// (if they were they would have been incorporated into the expression and so
675 /// would not be leaves), so most of the time there is no danger of this. But
676 /// in rare cases a leaf may become reassociable if an optimization kills uses
677 /// of it, or it may momentarily become reassociable during rewriting (below)
678 /// due it being removed as an operand of one of its uses. Ensure that misuse
679 /// of leaf nodes as inner nodes cannot occur by remembering all of the future
680 /// leaves and refusing to reuse any of them as inner nodes.
681 SmallPtrSet<Value*, 8> NotRewritable;
682 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
683 NotRewritable.insert(Ops[i].Op);
684
685 // ExpressionChanged - Non-null if the rewritten expression differs from the
686 // original in some non-trivial way, requiring the clearing of optional flags.
687 // Flags are cleared from the operator in ExpressionChanged up to I inclusive.
688 BinaryOperator *ExpressionChanged = nullptr;
689 for (unsigned i = 0; ; ++i) {
690 // The last operation (which comes earliest in the IR) is special as both
691 // operands will come from Ops, rather than just one with the other being
692 // a subexpression.
693 if (i+2 == Ops.size()) {
694 Value *NewLHS = Ops[i].Op;
695 Value *NewRHS = Ops[i+1].Op;
696 Value *OldLHS = Op->getOperand(0);
697 Value *OldRHS = Op->getOperand(1);
698
699 if (NewLHS == OldLHS && NewRHS == OldRHS)
700 // Nothing changed, leave it alone.
701 break;
702
703 if (NewLHS == OldRHS && NewRHS == OldLHS) {
704 // The order of the operands was reversed. Swap them.
705 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
706 Op->swapOperands();
707 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
708 MadeChange = true;
709 ++NumChanged;
710 break;
711 }
712
713 // The new operation differs non-trivially from the original. Overwrite
714 // the old operands with the new ones.
715 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
716 if (NewLHS != OldLHS) {
717 BinaryOperator *BO = isReassociableOp(OldLHS, Opcode);
718 if (BO && !NotRewritable.count(BO))
719 NodesToRewrite.push_back(BO);
720 Op->setOperand(0, NewLHS);
721 }
722 if (NewRHS != OldRHS) {
723 BinaryOperator *BO = isReassociableOp(OldRHS, Opcode);
724 if (BO && !NotRewritable.count(BO))
725 NodesToRewrite.push_back(BO);
726 Op->setOperand(1, NewRHS);
727 }
728 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
729
730 ExpressionChanged = Op;
731 MadeChange = true;
732 ++NumChanged;
733
734 break;
735 }
736
737 // Not the last operation. The left-hand side will be a sub-expression
738 // while the right-hand side will be the current element of Ops.
739 Value *NewRHS = Ops[i].Op;
740 if (NewRHS != Op->getOperand(1)) {
741 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
742 if (NewRHS == Op->getOperand(0)) {
743 // The new right-hand side was already present as the left operand. If
744 // we are lucky then swapping the operands will sort out both of them.
745 Op->swapOperands();
746 } else {
747 // Overwrite with the new right-hand side.
748 BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode);
749 if (BO && !NotRewritable.count(BO))
750 NodesToRewrite.push_back(BO);
751 Op->setOperand(1, NewRHS);
752 ExpressionChanged = Op;
753 }
754 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
755 MadeChange = true;
756 ++NumChanged;
757 }
758
759 // Now deal with the left-hand side. If this is already an operation node
760 // from the original expression then just rewrite the rest of the expression
761 // into it.
762 BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode);
763 if (BO && !NotRewritable.count(BO)) {
764 Op = BO;
765 continue;
766 }
767
768 // Otherwise, grab a spare node from the original expression and use that as
769 // the left-hand side. If there are no nodes left then the optimizers made
770 // an expression with more nodes than the original! This usually means that
771 // they did something stupid but it might mean that the problem was just too
772 // hard (finding the mimimal number of multiplications needed to realize a
773 // multiplication expression is NP-complete). Whatever the reason, smart or
774 // stupid, create a new node if there are none left.
775 BinaryOperator *NewOp;
776 if (NodesToRewrite.empty()) {
777 Constant *Undef = UndefValue::get(I->getType());
779 Undef, Undef, "", I);
780 if (isa<FPMathOperator>(NewOp))
781 NewOp->setFastMathFlags(I->getFastMathFlags());
782 } else {
783 NewOp = NodesToRewrite.pop_back_val();
784 }
785
786 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
787 Op->setOperand(0, NewOp);
788 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
789 ExpressionChanged = Op;
790 MadeChange = true;
791 ++NumChanged;
792 Op = NewOp;
793 }
794
795 // If the expression changed non-trivially then clear out all subclass data
796 // starting from the operator specified in ExpressionChanged, and compactify
797 // the operators to just before the expression root to guarantee that the
798 // expression tree is dominated by all of Ops.
799 if (ExpressionChanged)
800 do {
801 // Preserve FastMathFlags.
802 if (isa<FPMathOperator>(I)) {
803 FastMathFlags Flags = I->getFastMathFlags();
804 ExpressionChanged->clearSubclassOptionalData();
805 ExpressionChanged->setFastMathFlags(Flags);
806 } else
807 ExpressionChanged->clearSubclassOptionalData();
808
809 if (ExpressionChanged == I)
810 break;
811
812 // Discard any debug info related to the expressions that has changed (we
813 // can leave debug infor related to the root, since the result of the
814 // expression tree should be the same even after reassociation).
815 replaceDbgUsesWithUndef(ExpressionChanged);
816
817 ExpressionChanged->moveBefore(I);
818 ExpressionChanged = cast<BinaryOperator>(*ExpressionChanged->user_begin());
819 } while (true);
820
821 // Throw away any left over nodes from the original expression.
822 for (unsigned i = 0, e = NodesToRewrite.size(); i != e; ++i)
823 RedoInsts.insert(NodesToRewrite[i]);
824}
825
826/// Insert instructions before the instruction pointed to by BI,
827/// that computes the negative version of the value specified. The negative
828/// version of the value is returned, and BI is left pointing at the instruction
829/// that should be processed next by the reassociation pass.
830/// Also add intermediate instructions to the redo list that are modified while
831/// pushing the negates through adds. These will be revisited to see if
832/// additional opportunities have been exposed.
835 if (auto *C = dyn_cast<Constant>(V)) {
836 const DataLayout &DL = BI->getModule()->getDataLayout();
837 Constant *Res = C->getType()->isFPOrFPVectorTy()
838 ? ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL)
840 if (Res)
841 return Res;
842 }
843
844 // We are trying to expose opportunity for reassociation. One of the things
845 // that we want to do to achieve this is to push a negation as deep into an
846 // expression chain as possible, to expose the add instructions. In practice,
847 // this means that we turn this:
848 // X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D
849 // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate
850 // the constants. We assume that instcombine will clean up the mess later if
851 // we introduce tons of unnecessary negation instructions.
852 //
853 if (BinaryOperator *I =
854 isReassociableOp(V, Instruction::Add, Instruction::FAdd)) {
855 // Push the negates through the add.
856 I->setOperand(0, NegateValue(I->getOperand(0), BI, ToRedo));
857 I->setOperand(1, NegateValue(I->getOperand(1), BI, ToRedo));
858 if (I->getOpcode() == Instruction::Add) {
859 I->setHasNoUnsignedWrap(false);
860 I->setHasNoSignedWrap(false);
861 }
862
863 // We must move the add instruction here, because the neg instructions do
864 // not dominate the old add instruction in general. By moving it, we are
865 // assured that the neg instructions we just inserted dominate the
866 // instruction we are about to insert after them.
867 //
868 I->moveBefore(BI);
869 I->setName(I->getName()+".neg");
870
871 // Add the intermediate negates to the redo list as processing them later
872 // could expose more reassociating opportunities.
873 ToRedo.insert(I);
874 return I;
875 }
876
877 // Okay, we need to materialize a negated version of V with an instruction.
878 // Scan the use lists of V to see if we have one already.
879 for (User *U : V->users()) {
880 if (!match(U, m_Neg(m_Value())) && !match(U, m_FNeg(m_Value())))
881 continue;
882
883 // We found one! Now we have to make sure that the definition dominates
884 // this use. We do this by moving it to the entry block (if it is a
885 // non-instruction value) or right after the definition. These negates will
886 // be zapped by reassociate later, so we don't need much finesse here.
887 Instruction *TheNeg = dyn_cast<Instruction>(U);
888
889 // We can't safely propagate a vector zero constant with poison/undef lanes.
890 Constant *C;
891 if (match(TheNeg, m_BinOp(m_Constant(C), m_Value())) &&
892 C->containsUndefOrPoisonElement())
893 continue;
894
895 // Verify that the negate is in this function, V might be a constant expr.
896 if (!TheNeg ||
897 TheNeg->getParent()->getParent() != BI->getParent()->getParent())
898 continue;
899
900 Instruction *InsertPt;
901 if (Instruction *InstInput = dyn_cast<Instruction>(V)) {
902 InsertPt = InstInput->getInsertionPointAfterDef();
903 if (!InsertPt)
904 continue;
905 } else {
906 InsertPt = &*TheNeg->getFunction()->getEntryBlock().begin();
907 }
908
909 TheNeg->moveBefore(InsertPt);
910 if (TheNeg->getOpcode() == Instruction::Sub) {
911 TheNeg->setHasNoUnsignedWrap(false);
912 TheNeg->setHasNoSignedWrap(false);
913 } else {
914 TheNeg->andIRFlags(BI);
915 }
916 ToRedo.insert(TheNeg);
917 return TheNeg;
918 }
919
920 // Insert a 'neg' instruction that subtracts the value from zero to get the
921 // negation.
922 Instruction *NewNeg = CreateNeg(V, V->getName() + ".neg", BI, BI);
923 ToRedo.insert(NewNeg);
924 return NewNeg;
925}
926
927// See if this `or` looks like an load widening reduction, i.e. that it
928// consists of an `or`/`shl`/`zext`/`load` nodes only. Note that we don't
929// ensure that the pattern is *really* a load widening reduction,
930// we do not ensure that it can really be replaced with a widened load,
931// only that it mostly looks like one.
935
936 auto Enqueue = [&](Value *V) {
937 auto *I = dyn_cast<Instruction>(V);
938 // Each node of an `or` reduction must be an instruction,
939 if (!I)
940 return false; // Node is certainly not part of an `or` load reduction.
941 // Only process instructions we have never processed before.
942 if (Visited.insert(I).second)
943 Worklist.emplace_back(I);
944 return true; // Will need to look at parent nodes.
945 };
946
947 if (!Enqueue(Or))
948 return false; // Not an `or` reduction pattern.
949
950 while (!Worklist.empty()) {
951 auto *I = Worklist.pop_back_val();
952
953 // Okay, which instruction is this node?
954 switch (I->getOpcode()) {
955 case Instruction::Or:
956 // Got an `or` node. That's fine, just recurse into it's operands.
957 for (Value *Op : I->operands())
958 if (!Enqueue(Op))
959 return false; // Not an `or` reduction pattern.
960 continue;
961
962 case Instruction::Shl:
963 case Instruction::ZExt:
964 // `shl`/`zext` nodes are fine, just recurse into their base operand.
965 if (!Enqueue(I->getOperand(0)))
966 return false; // Not an `or` reduction pattern.
967 continue;
968
969 case Instruction::Load:
970 // Perfect, `load` node means we've reached an edge of the graph.
971 continue;
972
973 default: // Unknown node.
974 return false; // Not an `or` reduction pattern.
975 }
976 }
977
978 return true;
979}
980
981/// Return true if it may be profitable to convert this (X|Y) into (X+Y).
983 // Don't bother to convert this up unless either the LHS is an associable add
984 // or subtract or mul or if this is only used by one of the above.
985 // This is only a compile-time improvement, it is not needed for correctness!
986 auto isInteresting = [](Value *V) {
987 for (auto Op : {Instruction::Add, Instruction::Sub, Instruction::Mul,
988 Instruction::Shl})
989 if (isReassociableOp(V, Op))
990 return true;
991 return false;
992 };
993
994 if (any_of(Or->operands(), isInteresting))
995 return true;
996
997 Value *VB = Or->user_back();
998 if (Or->hasOneUse() && isInteresting(VB))
999 return true;
1000
1001 return false;
1002}
1003
1004/// If we have (X|Y), and iff X and Y have no common bits set,
1005/// transform this into (X+Y) to allow arithmetics reassociation.
1007 // Convert an or into an add.
1008 BinaryOperator *New =
1009 CreateAdd(Or->getOperand(0), Or->getOperand(1), "", Or, Or);
1010 New->setHasNoSignedWrap();
1011 New->setHasNoUnsignedWrap();
1012 New->takeName(Or);
1013
1014 // Everyone now refers to the add instruction.
1015 Or->replaceAllUsesWith(New);
1016 New->setDebugLoc(Or->getDebugLoc());
1017
1018 LLVM_DEBUG(dbgs() << "Converted or into an add: " << *New << '\n');
1019 return New;
1020}
1021
1022/// Return true if we should break up this subtract of X-Y into (X + -Y).
1024 // If this is a negation, we can't split it up!
1025 if (match(Sub, m_Neg(m_Value())) || match(Sub, m_FNeg(m_Value())))
1026 return false;
1027
1028 // Don't breakup X - undef.
1029 if (isa<UndefValue>(Sub->getOperand(1)))
1030 return false;
1031
1032 // Don't bother to break this up unless either the LHS is an associable add or
1033 // subtract or if this is only used by one.
1034 Value *V0 = Sub->getOperand(0);
1035 if (isReassociableOp(V0, Instruction::Add, Instruction::FAdd) ||
1036 isReassociableOp(V0, Instruction::Sub, Instruction::FSub))
1037 return true;
1038 Value *V1 = Sub->getOperand(1);
1039 if (isReassociableOp(V1, Instruction::Add, Instruction::FAdd) ||
1040 isReassociableOp(V1, Instruction::Sub, Instruction::FSub))
1041 return true;
1042 Value *VB = Sub->user_back();
1043 if (Sub->hasOneUse() &&
1044 (isReassociableOp(VB, Instruction::Add, Instruction::FAdd) ||
1045 isReassociableOp(VB, Instruction::Sub, Instruction::FSub)))
1046 return true;
1047
1048 return false;
1049}
1050
1051/// If we have (X-Y), and if either X is an add, or if this is only used by an
1052/// add, transform this into (X+(0-Y)) to promote better reassociation.
1055 // Convert a subtract into an add and a neg instruction. This allows sub
1056 // instructions to be commuted with other add instructions.
1057 //
1058 // Calculate the negative value of Operand 1 of the sub instruction,
1059 // and set it as the RHS of the add instruction we just made.
1060 Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo);
1061 BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub, Sub);
1062 Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op.
1063 Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op.
1064 New->takeName(Sub);
1065
1066 // Everyone now refers to the add instruction.
1067 Sub->replaceAllUsesWith(New);
1068 New->setDebugLoc(Sub->getDebugLoc());
1069
1070 LLVM_DEBUG(dbgs() << "Negated: " << *New << '\n');
1071 return New;
1072}
1073
1074/// If this is a shift of a reassociable multiply or is used by one, change
1075/// this into a multiply by a constant to assist with further reassociation.
1077 Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
1078 auto *SA = cast<ConstantInt>(Shl->getOperand(1));
1079 MulCst = ConstantExpr::getShl(MulCst, SA);
1080
1082 BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl);
1083 Shl->setOperand(0, PoisonValue::get(Shl->getType())); // Drop use of op.
1084 Mul->takeName(Shl);
1085
1086 // Everyone now refers to the mul instruction.
1087 Shl->replaceAllUsesWith(Mul);
1088 Mul->setDebugLoc(Shl->getDebugLoc());
1089
1090 // We can safely preserve the nuw flag in all cases. It's also safe to turn a
1091 // nuw nsw shl into a nuw nsw mul. However, nsw in isolation requires special
1092 // handling. It can be preserved as long as we're not left shifting by
1093 // bitwidth - 1.
1094 bool NSW = cast<BinaryOperator>(Shl)->hasNoSignedWrap();
1095 bool NUW = cast<BinaryOperator>(Shl)->hasNoUnsignedWrap();
1096 unsigned BitWidth = Shl->getType()->getIntegerBitWidth();
1097 if (NSW && (NUW || SA->getValue().ult(BitWidth - 1)))
1098 Mul->setHasNoSignedWrap(true);
1099 Mul->setHasNoUnsignedWrap(NUW);
1100 return Mul;
1101}
1102
1103/// Scan backwards and forwards among values with the same rank as element i
1104/// to see if X exists. If X does not exist, return i. This is useful when
1105/// scanning for 'x' when we see '-x' because they both get the same rank.
1107 unsigned i, Value *X) {
1108 unsigned XRank = Ops[i].Rank;
1109 unsigned e = Ops.size();
1110 for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j) {
1111 if (Ops[j].Op == X)
1112 return j;
1113 if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op))
1114 if (Instruction *I2 = dyn_cast<Instruction>(X))
1115 if (I1->isIdenticalTo(I2))
1116 return j;
1117 }
1118 // Scan backwards.
1119 for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j) {
1120 if (Ops[j].Op == X)
1121 return j;
1122 if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op))
1123 if (Instruction *I2 = dyn_cast<Instruction>(X))
1124 if (I1->isIdenticalTo(I2))
1125 return j;
1126 }
1127 return i;
1128}
1129
1130/// Emit a tree of add instructions, summing Ops together
1131/// and returning the result. Insert the tree before I.
1134 if (Ops.size() == 1) return Ops.back();
1135
1136 Value *V1 = Ops.pop_back_val();
1137 Value *V2 = EmitAddTreeOfValues(I, Ops);
1138 return CreateAdd(V2, V1, "reass.add", I, I);
1139}
1140
1141/// If V is an expression tree that is a multiplication sequence,
1142/// and if this sequence contains a multiply by Factor,
1143/// remove Factor from the tree and return the new tree.
1144Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) {
1145 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
1146 if (!BO)
1147 return nullptr;
1148
1150 MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts);
1152 Factors.reserve(Tree.size());
1153 for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
1154 RepeatedValue E = Tree[i];
1155 Factors.append(E.second.getZExtValue(),
1156 ValueEntry(getRank(E.first), E.first));
1157 }
1158
1159 bool FoundFactor = false;
1160 bool NeedsNegate = false;
1161 for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
1162 if (Factors[i].Op == Factor) {
1163 FoundFactor = true;
1164 Factors.erase(Factors.begin()+i);
1165 break;
1166 }
1167
1168 // If this is a negative version of this factor, remove it.
1169 if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor)) {
1170 if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op))
1171 if (FC1->getValue() == -FC2->getValue()) {
1172 FoundFactor = NeedsNegate = true;
1173 Factors.erase(Factors.begin()+i);
1174 break;
1175 }
1176 } else if (ConstantFP *FC1 = dyn_cast<ConstantFP>(Factor)) {
1177 if (ConstantFP *FC2 = dyn_cast<ConstantFP>(Factors[i].Op)) {
1178 const APFloat &F1 = FC1->getValueAPF();
1179 APFloat F2(FC2->getValueAPF());
1180 F2.changeSign();
1181 if (F1 == F2) {
1182 FoundFactor = NeedsNegate = true;
1183 Factors.erase(Factors.begin() + i);
1184 break;
1185 }
1186 }
1187 }
1188 }
1189
1190 if (!FoundFactor) {
1191 // Make sure to restore the operands to the expression tree.
1192 RewriteExprTree(BO, Factors);
1193 return nullptr;
1194 }
1195
1196 BasicBlock::iterator InsertPt = ++BO->getIterator();
1197
1198 // If this was just a single multiply, remove the multiply and return the only
1199 // remaining operand.
1200 if (Factors.size() == 1) {
1201 RedoInsts.insert(BO);
1202 V = Factors[0].Op;
1203 } else {
1204 RewriteExprTree(BO, Factors);
1205 V = BO;
1206 }
1207
1208 if (NeedsNegate)
1209 V = CreateNeg(V, "neg", &*InsertPt, BO);
1210
1211 return V;
1212}
1213
1214/// If V is a single-use multiply, recursively add its operands as factors,
1215/// otherwise add V to the list of factors.
1216///
1217/// Ops is the top-level list of add operands we're trying to factor.
1219 SmallVectorImpl<Value*> &Factors) {
1220 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
1221 if (!BO) {
1222 Factors.push_back(V);
1223 return;
1224 }
1225
1226 // Otherwise, add the LHS and RHS to the list of factors.
1229}
1230
1231/// Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
1232/// This optimizes based on identities. If it can be reduced to a single Value,
1233/// it is returned, otherwise the Ops list is mutated as necessary.
1234static Value *OptimizeAndOrXor(unsigned Opcode,
1236 // Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
1237 // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1.
1238 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1239 // First, check for X and ~X in the operand list.
1240 assert(i < Ops.size());
1241 Value *X;
1242 if (match(Ops[i].Op, m_Not(m_Value(X)))) { // Cannot occur for ^.
1243 unsigned FoundX = FindInOperandList(Ops, i, X);
1244 if (FoundX != i) {
1245 if (Opcode == Instruction::And) // ...&X&~X = 0
1246 return Constant::getNullValue(X->getType());
1247
1248 if (Opcode == Instruction::Or) // ...|X|~X = -1
1249 return Constant::getAllOnesValue(X->getType());
1250 }
1251 }
1252
1253 // Next, check for duplicate pairs of values, which we assume are next to
1254 // each other, due to our sorting criteria.
1255 assert(i < Ops.size());
1256 if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) {
1257 if (Opcode == Instruction::And || Opcode == Instruction::Or) {
1258 // Drop duplicate values for And and Or.
1259 Ops.erase(Ops.begin()+i);
1260 --i; --e;
1261 ++NumAnnihil;
1262 continue;
1263 }
1264
1265 // Drop pairs of values for Xor.
1266 assert(Opcode == Instruction::Xor);
1267 if (e == 2)
1268 return Constant::getNullValue(Ops[0].Op->getType());
1269
1270 // Y ^ X^X -> Y
1271 Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
1272 i -= 1; e -= 2;
1273 ++NumAnnihil;
1274 }
1275 }
1276 return nullptr;
1277}
1278
1279/// Helper function of CombineXorOpnd(). It creates a bitwise-and
1280/// instruction with the given two operands, and return the resulting
1281/// instruction. There are two special cases: 1) if the constant operand is 0,
1282/// it will return NULL. 2) if the constant is ~0, the symbolic operand will
1283/// be returned.
1284static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
1285 const APInt &ConstOpnd) {
1286 if (ConstOpnd.isZero())
1287 return nullptr;
1288
1289 if (ConstOpnd.isAllOnes())
1290 return Opnd;
1291
1292 Instruction *I = BinaryOperator::CreateAnd(
1293 Opnd, ConstantInt::get(Opnd->getType(), ConstOpnd), "and.ra",
1294 InsertBefore);
1295 I->setDebugLoc(InsertBefore->getDebugLoc());
1296 return I;
1297}
1298
1299// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
1300// into "R ^ C", where C would be 0, and R is a symbolic value.
1301//
1302// If it was successful, true is returned, and the "R" and "C" is returned
1303// via "Res" and "ConstOpnd", respectively; otherwise, false is returned,
1304// and both "Res" and "ConstOpnd" remain unchanged.
1305bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
1306 APInt &ConstOpnd, Value *&Res) {
1307 // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2
1308 // = ((x | c1) ^ c1) ^ (c1 ^ c2)
1309 // = (x & ~c1) ^ (c1 ^ c2)
1310 // It is useful only when c1 == c2.
1311 if (!Opnd1->isOrExpr() || Opnd1->getConstPart().isZero())
1312 return false;
1313
1314 if (!Opnd1->getValue()->hasOneUse())
1315 return false;
1316
1317 const APInt &C1 = Opnd1->getConstPart();
1318 if (C1 != ConstOpnd)
1319 return false;
1320
1321 Value *X = Opnd1->getSymbolicPart();
1322 Res = createAndInstr(I, X, ~C1);
1323 // ConstOpnd was C2, now C1 ^ C2.
1324 ConstOpnd ^= C1;
1325
1326 if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
1327 RedoInsts.insert(T);
1328 return true;
1329}
1330
1331// Helper function of OptimizeXor(). It tries to simplify
1332// "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a
1333// symbolic value.
1334//
1335// If it was successful, true is returned, and the "R" and "C" is returned
1336// via "Res" and "ConstOpnd", respectively (If the entire expression is
1337// evaluated to a constant, the Res is set to NULL); otherwise, false is
1338// returned, and both "Res" and "ConstOpnd" remain unchanged.
1339bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
1340 XorOpnd *Opnd2, APInt &ConstOpnd,
1341 Value *&Res) {
1342 Value *X = Opnd1->getSymbolicPart();
1343 if (X != Opnd2->getSymbolicPart())
1344 return false;
1345
1346 // This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.)
1347 int DeadInstNum = 1;
1348 if (Opnd1->getValue()->hasOneUse())
1349 DeadInstNum++;
1350 if (Opnd2->getValue()->hasOneUse())
1351 DeadInstNum++;
1352
1353 // Xor-Rule 2:
1354 // (x | c1) ^ (x & c2)
1355 // = (x|c1) ^ (x&c2) ^ (c1 ^ c1) = ((x|c1) ^ c1) ^ (x & c2) ^ c1
1356 // = (x & ~c1) ^ (x & c2) ^ c1 // Xor-Rule 1
1357 // = (x & c3) ^ c1, where c3 = ~c1 ^ c2 // Xor-rule 3
1358 //
1359 if (Opnd1->isOrExpr() != Opnd2->isOrExpr()) {
1360 if (Opnd2->isOrExpr())
1361 std::swap(Opnd1, Opnd2);
1362
1363 const APInt &C1 = Opnd1->getConstPart();
1364 const APInt &C2 = Opnd2->getConstPart();
1365 APInt C3((~C1) ^ C2);
1366
1367 // Do not increase code size!
1368 if (!C3.isZero() && !C3.isAllOnes()) {
1369 int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
1370 if (NewInstNum > DeadInstNum)
1371 return false;
1372 }
1373
1374 Res = createAndInstr(I, X, C3);
1375 ConstOpnd ^= C1;
1376 } else if (Opnd1->isOrExpr()) {
1377 // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
1378 //
1379 const APInt &C1 = Opnd1->getConstPart();
1380 const APInt &C2 = Opnd2->getConstPart();
1381 APInt C3 = C1 ^ C2;
1382
1383 // Do not increase code size
1384 if (!C3.isZero() && !C3.isAllOnes()) {
1385 int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
1386 if (NewInstNum > DeadInstNum)
1387 return false;
1388 }
1389
1390 Res = createAndInstr(I, X, C3);
1391 ConstOpnd ^= C3;
1392 } else {
1393 // Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
1394 //
1395 const APInt &C1 = Opnd1->getConstPart();
1396 const APInt &C2 = Opnd2->getConstPart();
1397 APInt C3 = C1 ^ C2;
1398 Res = createAndInstr(I, X, C3);
1399 }
1400
1401 // Put the original operands in the Redo list; hope they will be deleted
1402 // as dead code.
1403 if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
1404 RedoInsts.insert(T);
1405 if (Instruction *T = dyn_cast<Instruction>(Opnd2->getValue()))
1406 RedoInsts.insert(T);
1407
1408 return true;
1409}
1410
1411/// Optimize a series of operands to an 'xor' instruction. If it can be reduced
1412/// to a single Value, it is returned, otherwise the Ops list is mutated as
1413/// necessary.
1414Value *ReassociatePass::OptimizeXor(Instruction *I,
1416 if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops))
1417 return V;
1418
1419 if (Ops.size() == 1)
1420 return nullptr;
1421
1423 SmallVector<XorOpnd*, 8> OpndPtrs;
1424 Type *Ty = Ops[0].Op->getType();
1425 APInt ConstOpnd(Ty->getScalarSizeInBits(), 0);
1426
1427 // Step 1: Convert ValueEntry to XorOpnd
1428 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1429 Value *V = Ops[i].Op;
1430 const APInt *C;
1431 // TODO: Support non-splat vectors.
1432 if (match(V, m_APInt(C))) {
1433 ConstOpnd ^= *C;
1434 } else {
1435 XorOpnd O(V);
1436 O.setSymbolicRank(getRank(O.getSymbolicPart()));
1437 Opnds.push_back(O);
1438 }
1439 }
1440
1441 // NOTE: From this point on, do *NOT* add/delete element to/from "Opnds".
1442 // It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate
1443 // the "OpndPtrs" as well. For the similar reason, do not fuse this loop
1444 // with the previous loop --- the iterator of the "Opnds" may be invalidated
1445 // when new elements are added to the vector.
1446 for (unsigned i = 0, e = Opnds.size(); i != e; ++i)
1447 OpndPtrs.push_back(&Opnds[i]);
1448
1449 // Step 2: Sort the Xor-Operands in a way such that the operands containing
1450 // the same symbolic value cluster together. For instance, the input operand
1451 // sequence ("x | 123", "y & 456", "x & 789") will be sorted into:
1452 // ("x | 123", "x & 789", "y & 456").
1453 //
1454 // The purpose is twofold:
1455 // 1) Cluster together the operands sharing the same symbolic-value.
1456 // 2) Operand having smaller symbolic-value-rank is permuted earlier, which
1457 // could potentially shorten crital path, and expose more loop-invariants.
1458 // Note that values' rank are basically defined in RPO order (FIXME).
1459 // So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
1460 // than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
1461 // "z" in the order of X-Y-Z is better than any other orders.
1462 llvm::stable_sort(OpndPtrs, [](XorOpnd *LHS, XorOpnd *RHS) {
1463 return LHS->getSymbolicRank() < RHS->getSymbolicRank();
1464 });
1465
1466 // Step 3: Combine adjacent operands
1467 XorOpnd *PrevOpnd = nullptr;
1468 bool Changed = false;
1469 for (unsigned i = 0, e = Opnds.size(); i < e; i++) {
1470 XorOpnd *CurrOpnd = OpndPtrs[i];
1471 // The combined value
1472 Value *CV;
1473
1474 // Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
1475 if (!ConstOpnd.isZero() && CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) {
1476 Changed = true;
1477 if (CV)
1478 *CurrOpnd = XorOpnd(CV);
1479 else {
1480 CurrOpnd->Invalidate();
1481 continue;
1482 }
1483 }
1484
1485 if (!PrevOpnd || CurrOpnd->getSymbolicPart() != PrevOpnd->getSymbolicPart()) {
1486 PrevOpnd = CurrOpnd;
1487 continue;
1488 }
1489
1490 // step 3.2: When previous and current operands share the same symbolic
1491 // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"
1492 if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) {
1493 // Remove previous operand
1494 PrevOpnd->Invalidate();
1495 if (CV) {
1496 *CurrOpnd = XorOpnd(CV);
1497 PrevOpnd = CurrOpnd;
1498 } else {
1499 CurrOpnd->Invalidate();
1500 PrevOpnd = nullptr;
1501 }
1502 Changed = true;
1503 }
1504 }
1505
1506 // Step 4: Reassemble the Ops
1507 if (Changed) {
1508 Ops.clear();
1509 for (const XorOpnd &O : Opnds) {
1510 if (O.isInvalid())
1511 continue;
1512 ValueEntry VE(getRank(O.getValue()), O.getValue());
1513 Ops.push_back(VE);
1514 }
1515 if (!ConstOpnd.isZero()) {
1516 Value *C = ConstantInt::get(Ty, ConstOpnd);
1517 ValueEntry VE(getRank(C), C);
1518 Ops.push_back(VE);
1519 }
1520 unsigned Sz = Ops.size();
1521 if (Sz == 1)
1522 return Ops.back().Op;
1523 if (Sz == 0) {
1524 assert(ConstOpnd.isZero());
1525 return ConstantInt::get(Ty, ConstOpnd);
1526 }
1527 }
1528
1529 return nullptr;
1530}
1531
1532/// Optimize a series of operands to an 'add' instruction. This
1533/// optimizes based on identities. If it can be reduced to a single Value, it
1534/// is returned, otherwise the Ops list is mutated as necessary.
1535Value *ReassociatePass::OptimizeAdd(Instruction *I,
1537 // Scan the operand lists looking for X and -X pairs. If we find any, we
1538 // can simplify expressions like X+-X == 0 and X+~X ==-1. While we're at it,
1539 // scan for any
1540 // duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z.
1541
1542 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1543 Value *TheOp = Ops[i].Op;
1544 // Check to see if we've seen this operand before. If so, we factor all
1545 // instances of the operand together. Due to our sorting criteria, we know
1546 // that these need to be next to each other in the vector.
1547 if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) {
1548 // Rescan the list, remove all instances of this operand from the expr.
1549 unsigned NumFound = 0;
1550 do {
1551 Ops.erase(Ops.begin()+i);
1552 ++NumFound;
1553 } while (i != Ops.size() && Ops[i].Op == TheOp);
1554
1555 LLVM_DEBUG(dbgs() << "\nFACTORING [" << NumFound << "]: " << *TheOp
1556 << '\n');
1557 ++NumFactor;
1558
1559 // Insert a new multiply.
1560 Type *Ty = TheOp->getType();
1561 Constant *C = Ty->isIntOrIntVectorTy() ?
1562 ConstantInt::get(Ty, NumFound) : ConstantFP::get(Ty, NumFound);
1563 Instruction *Mul = CreateMul(TheOp, C, "factor", I, I);
1564
1565 // Now that we have inserted a multiply, optimize it. This allows us to
1566 // handle cases that require multiple factoring steps, such as this:
1567 // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
1568 RedoInsts.insert(Mul);
1569
1570 // If every add operand was a duplicate, return the multiply.
1571 if (Ops.empty())
1572 return Mul;
1573
1574 // Otherwise, we had some input that didn't have the dupe, such as
1575 // "A + A + B" -> "A*2 + B". Add the new multiply to the list of
1576 // things being added by this operation.
1577 Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul));
1578
1579 --i;
1580 e = Ops.size();
1581 continue;
1582 }
1583
1584 // Check for X and -X or X and ~X in the operand list.
1585 Value *X;
1586 if (!match(TheOp, m_Neg(m_Value(X))) && !match(TheOp, m_Not(m_Value(X))) &&
1587 !match(TheOp, m_FNeg(m_Value(X))))
1588 continue;
1589
1590 unsigned FoundX = FindInOperandList(Ops, i, X);
1591 if (FoundX == i)
1592 continue;
1593
1594 // Remove X and -X from the operand list.
1595 if (Ops.size() == 2 &&
1596 (match(TheOp, m_Neg(m_Value())) || match(TheOp, m_FNeg(m_Value()))))
1597 return Constant::getNullValue(X->getType());
1598
1599 // Remove X and ~X from the operand list.
1600 if (Ops.size() == 2 && match(TheOp, m_Not(m_Value())))
1601 return Constant::getAllOnesValue(X->getType());
1602
1603 Ops.erase(Ops.begin()+i);
1604 if (i < FoundX)
1605 --FoundX;
1606 else
1607 --i; // Need to back up an extra one.
1608 Ops.erase(Ops.begin()+FoundX);
1609 ++NumAnnihil;
1610 --i; // Revisit element.
1611 e -= 2; // Removed two elements.
1612
1613 // if X and ~X we append -1 to the operand list.
1614 if (match(TheOp, m_Not(m_Value()))) {
1615 Value *V = Constant::getAllOnesValue(X->getType());
1616 Ops.insert(Ops.end(), ValueEntry(getRank(V), V));
1617 e += 1;
1618 }
1619 }
1620
1621 // Scan the operand list, checking to see if there are any common factors
1622 // between operands. Consider something like A*A+A*B*C+D. We would like to
1623 // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies.
1624 // To efficiently find this, we count the number of times a factor occurs
1625 // for any ADD operands that are MULs.
1626 DenseMap<Value*, unsigned> FactorOccurrences;
1627
1628 // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4)
1629 // where they are actually the same multiply.
1630 unsigned MaxOcc = 0;
1631 Value *MaxOccVal = nullptr;
1632 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1633 BinaryOperator *BOp =
1634 isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
1635 if (!BOp)
1636 continue;
1637
1638 // Compute all of the factors of this added value.
1639 SmallVector<Value*, 8> Factors;
1640 FindSingleUseMultiplyFactors(BOp, Factors);
1641 assert(Factors.size() > 1 && "Bad linearize!");
1642
1643 // Add one to FactorOccurrences for each unique factor in this op.
1644 SmallPtrSet<Value*, 8> Duplicates;
1645 for (Value *Factor : Factors) {
1646 if (!Duplicates.insert(Factor).second)
1647 continue;
1648
1649 unsigned Occ = ++FactorOccurrences[Factor];
1650 if (Occ > MaxOcc) {
1651 MaxOcc = Occ;
1652 MaxOccVal = Factor;
1653 }
1654
1655 // If Factor is a negative constant, add the negated value as a factor
1656 // because we can percolate the negate out. Watch for minint, which
1657 // cannot be positivified.
1658 if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) {
1659 if (CI->isNegative() && !CI->isMinValue(true)) {
1660 Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
1661 if (!Duplicates.insert(Factor).second)
1662 continue;
1663 unsigned Occ = ++FactorOccurrences[Factor];
1664 if (Occ > MaxOcc) {
1665 MaxOcc = Occ;
1666 MaxOccVal = Factor;
1667 }
1668 }
1669 } else if (ConstantFP *CF = dyn_cast<ConstantFP>(Factor)) {
1670 if (CF->isNegative()) {
1671 APFloat F(CF->getValueAPF());
1672 F.changeSign();
1673 Factor = ConstantFP::get(CF->getContext(), F);
1674 if (!Duplicates.insert(Factor).second)
1675 continue;
1676 unsigned Occ = ++FactorOccurrences[Factor];
1677 if (Occ > MaxOcc) {
1678 MaxOcc = Occ;
1679 MaxOccVal = Factor;
1680 }
1681 }
1682 }
1683 }
1684 }
1685
1686 // If any factor occurred more than one time, we can pull it out.
1687 if (MaxOcc > 1) {
1688 LLVM_DEBUG(dbgs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal
1689 << '\n');
1690 ++NumFactor;
1691
1692 // Create a new instruction that uses the MaxOccVal twice. If we don't do
1693 // this, we could otherwise run into situations where removing a factor
1694 // from an expression will drop a use of maxocc, and this can cause
1695 // RemoveFactorFromExpression on successive values to behave differently.
1696 Instruction *DummyInst =
1697 I->getType()->isIntOrIntVectorTy()
1698 ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal)
1699 : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal);
1700
1702 for (unsigned i = 0; i != Ops.size(); ++i) {
1703 // Only try to remove factors from expressions we're allowed to.
1704 BinaryOperator *BOp =
1705 isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
1706 if (!BOp)
1707 continue;
1708
1709 if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
1710 // The factorized operand may occur several times. Convert them all in
1711 // one fell swoop.
1712 for (unsigned j = Ops.size(); j != i;) {
1713 --j;
1714 if (Ops[j].Op == Ops[i].Op) {
1715 NewMulOps.push_back(V);
1716 Ops.erase(Ops.begin()+j);
1717 }
1718 }
1719 --i;
1720 }
1721 }
1722
1723 // No need for extra uses anymore.
1724 DummyInst->deleteValue();
1725
1726 unsigned NumAddedValues = NewMulOps.size();
1727 Value *V = EmitAddTreeOfValues(I, NewMulOps);
1728
1729 // Now that we have inserted the add tree, optimize it. This allows us to
1730 // handle cases that require multiple factoring steps, such as this:
1731 // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C))
1732 assert(NumAddedValues > 1 && "Each occurrence should contribute a value");
1733 (void)NumAddedValues;
1734 if (Instruction *VI = dyn_cast<Instruction>(V))
1735 RedoInsts.insert(VI);
1736
1737 // Create the multiply.
1738 Instruction *V2 = CreateMul(V, MaxOccVal, "reass.mul", I, I);
1739
1740 // Rerun associate on the multiply in case the inner expression turned into
1741 // a multiply. We want to make sure that we keep things in canonical form.
1742 RedoInsts.insert(V2);
1743
1744 // If every add operand included the factor (e.g. "A*B + A*C"), then the
1745 // entire result expression is just the multiply "A*(B+C)".
1746 if (Ops.empty())
1747 return V2;
1748
1749 // Otherwise, we had some input that didn't have the factor, such as
1750 // "A*B + A*C + D" -> "A*(B+C) + D". Add the new multiply to the list of
1751 // things being added by this operation.
1752 Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
1753 }
1754
1755 return nullptr;
1756}
1757
1758/// Build up a vector of value/power pairs factoring a product.
1759///
1760/// Given a series of multiplication operands, build a vector of factors and
1761/// the powers each is raised to when forming the final product. Sort them in
1762/// the order of descending power.
1763///
1764/// (x*x) -> [(x, 2)]
1765/// ((x*x)*x) -> [(x, 3)]
1766/// ((((x*y)*x)*y)*x) -> [(x, 3), (y, 2)]
1767///
1768/// \returns Whether any factors have a power greater than one.
1770 SmallVectorImpl<Factor> &Factors) {
1771 // FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this.
1772 // Compute the sum of powers of simplifiable factors.
1773 unsigned FactorPowerSum = 0;
1774 for (unsigned Idx = 1, Size = Ops.size(); Idx < Size; ++Idx) {
1775 Value *Op = Ops[Idx-1].Op;
1776
1777 // Count the number of occurrences of this value.
1778 unsigned Count = 1;
1779 for (; Idx < Size && Ops[Idx].Op == Op; ++Idx)
1780 ++Count;
1781 // Track for simplification all factors which occur 2 or more times.
1782 if (Count > 1)
1783 FactorPowerSum += Count;
1784 }
1785
1786 // We can only simplify factors if the sum of the powers of our simplifiable
1787 // factors is 4 or higher. When that is the case, we will *always* have
1788 // a simplification. This is an important invariant to prevent cyclicly
1789 // trying to simplify already minimal formations.
1790 if (FactorPowerSum < 4)
1791 return false;
1792
1793 // Now gather the simplifiable factors, removing them from Ops.
1794 FactorPowerSum = 0;
1795 for (unsigned Idx = 1; Idx < Ops.size(); ++Idx) {
1796 Value *Op = Ops[Idx-1].Op;
1797
1798 // Count the number of occurrences of this value.
1799 unsigned Count = 1;
1800 for (; Idx < Ops.size() && Ops[Idx].Op == Op; ++Idx)
1801 ++Count;
1802 if (Count == 1)
1803 continue;
1804 // Move an even number of occurrences to Factors.
1805 Count &= ~1U;
1806 Idx -= Count;
1807 FactorPowerSum += Count;
1808 Factors.push_back(Factor(Op, Count));
1809 Ops.erase(Ops.begin()+Idx, Ops.begin()+Idx+Count);
1810 }
1811
1812 // None of the adjustments above should have reduced the sum of factor powers
1813 // below our mininum of '4'.
1814 assert(FactorPowerSum >= 4);
1815
1816 llvm::stable_sort(Factors, [](const Factor &LHS, const Factor &RHS) {
1817 return LHS.Power > RHS.Power;
1818 });
1819 return true;
1820}
1821
1822/// Build a tree of multiplies, computing the product of Ops.
1825 if (Ops.size() == 1)
1826 return Ops.back();
1827
1828 Value *LHS = Ops.pop_back_val();
1829 do {
1830 if (LHS->getType()->isIntOrIntVectorTy())
1831 LHS = Builder.CreateMul(LHS, Ops.pop_back_val());
1832 else
1833 LHS = Builder.CreateFMul(LHS, Ops.pop_back_val());
1834 } while (!Ops.empty());
1835
1836 return LHS;
1837}
1838
1839/// Build a minimal multiplication DAG for (a^x)*(b^y)*(c^z)*...
1840///
1841/// Given a vector of values raised to various powers, where no two values are
1842/// equal and the powers are sorted in decreasing order, compute the minimal
1843/// DAG of multiplies to compute the final product, and return that product
1844/// value.
1845Value *
1846ReassociatePass::buildMinimalMultiplyDAG(IRBuilderBase &Builder,
1847 SmallVectorImpl<Factor> &Factors) {
1848 assert(Factors[0].Power);
1849 SmallVector<Value *, 4> OuterProduct;
1850 for (unsigned LastIdx = 0, Idx = 1, Size = Factors.size();
1851 Idx < Size && Factors[Idx].Power > 0; ++Idx) {
1852 if (Factors[Idx].Power != Factors[LastIdx].Power) {
1853 LastIdx = Idx;
1854 continue;
1855 }
1856
1857 // We want to multiply across all the factors with the same power so that
1858 // we can raise them to that power as a single entity. Build a mini tree
1859 // for that.
1860 SmallVector<Value *, 4> InnerProduct;
1861 InnerProduct.push_back(Factors[LastIdx].Base);
1862 do {
1863 InnerProduct.push_back(Factors[Idx].Base);
1864 ++Idx;
1865 } while (Idx < Size && Factors[Idx].Power == Factors[LastIdx].Power);
1866
1867 // Reset the base value of the first factor to the new expression tree.
1868 // We'll remove all the factors with the same power in a second pass.
1869 Value *M = Factors[LastIdx].Base = buildMultiplyTree(Builder, InnerProduct);
1870 if (Instruction *MI = dyn_cast<Instruction>(M))
1871 RedoInsts.insert(MI);
1872
1873 LastIdx = Idx;
1874 }
1875 // Unique factors with equal powers -- we've folded them into the first one's
1876 // base.
1877 Factors.erase(std::unique(Factors.begin(), Factors.end(),
1878 [](const Factor &LHS, const Factor &RHS) {
1879 return LHS.Power == RHS.Power;
1880 }),
1881 Factors.end());
1882
1883 // Iteratively collect the base of each factor with an add power into the
1884 // outer product, and halve each power in preparation for squaring the
1885 // expression.
1886 for (Factor &F : Factors) {
1887 if (F.Power & 1)
1888 OuterProduct.push_back(F.Base);
1889 F.Power >>= 1;
1890 }
1891 if (Factors[0].Power) {
1892 Value *SquareRoot = buildMinimalMultiplyDAG(Builder, Factors);
1893 OuterProduct.push_back(SquareRoot);
1894 OuterProduct.push_back(SquareRoot);
1895 }
1896 if (OuterProduct.size() == 1)
1897 return OuterProduct.front();
1898
1899 Value *V = buildMultiplyTree(Builder, OuterProduct);
1900 return V;
1901}
1902
1903Value *ReassociatePass::OptimizeMul(BinaryOperator *I,
1905 // We can only optimize the multiplies when there is a chain of more than
1906 // three, such that a balanced tree might require fewer total multiplies.
1907 if (Ops.size() < 4)
1908 return nullptr;
1909
1910 // Try to turn linear trees of multiplies without other uses of the
1911 // intermediate stages into minimal multiply DAGs with perfect sub-expression
1912 // re-use.
1913 SmallVector<Factor, 4> Factors;
1914 if (!collectMultiplyFactors(Ops, Factors))
1915 return nullptr; // All distinct factors, so nothing left for us to do.
1916
1918 // The reassociate transformation for FP operations is performed only
1919 // if unsafe algebra is permitted by FastMathFlags. Propagate those flags
1920 // to the newly generated operations.
1921 if (auto FPI = dyn_cast<FPMathOperator>(I))
1922 Builder.setFastMathFlags(FPI->getFastMathFlags());
1923
1924 Value *V = buildMinimalMultiplyDAG(Builder, Factors);
1925 if (Ops.empty())
1926 return V;
1927
1928 ValueEntry NewEntry = ValueEntry(getRank(V), V);
1929 Ops.insert(llvm::lower_bound(Ops, NewEntry), NewEntry);
1930 return nullptr;
1931}
1932
1933Value *ReassociatePass::OptimizeExpression(BinaryOperator *I,
1935 // Now that we have the linearized expression tree, try to optimize it.
1936 // Start by folding any constants that we found.
1937 const DataLayout &DL = I->getModule()->getDataLayout();
1938 Constant *Cst = nullptr;
1939 unsigned Opcode = I->getOpcode();
1940 while (!Ops.empty()) {
1941 if (auto *C = dyn_cast<Constant>(Ops.back().Op)) {
1942 if (!Cst) {
1943 Ops.pop_back();
1944 Cst = C;
1945 continue;
1946 }
1947 if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, C, Cst, DL)) {
1948 Ops.pop_back();
1949 Cst = Res;
1950 continue;
1951 }
1952 }
1953 break;
1954 }
1955 // If there was nothing but constants then we are done.
1956 if (Ops.empty())
1957 return Cst;
1958
1959 // Put the combined constant back at the end of the operand list, except if
1960 // there is no point. For example, an add of 0 gets dropped here, while a
1961 // multiplication by zero turns the whole expression into zero.
1962 if (Cst && Cst != ConstantExpr::getBinOpIdentity(Opcode, I->getType())) {
1963 if (Cst == ConstantExpr::getBinOpAbsorber(Opcode, I->getType()))
1964 return Cst;
1965 Ops.push_back(ValueEntry(0, Cst));
1966 }
1967
1968 if (Ops.size() == 1) return Ops[0].Op;
1969
1970 // Handle destructive annihilation due to identities between elements in the
1971 // argument list here.
1972 unsigned NumOps = Ops.size();
1973 switch (Opcode) {
1974 default: break;
1975 case Instruction::And:
1976 case Instruction::Or:
1977 if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
1978 return Result;
1979 break;
1980
1981 case Instruction::Xor:
1982 if (Value *Result = OptimizeXor(I, Ops))
1983 return Result;
1984 break;
1985
1986 case Instruction::Add:
1987 case Instruction::FAdd:
1988 if (Value *Result = OptimizeAdd(I, Ops))
1989 return Result;
1990 break;
1991
1992 case Instruction::Mul:
1993 case Instruction::FMul:
1994 if (Value *Result = OptimizeMul(I, Ops))
1995 return Result;
1996 break;
1997 }
1998
1999 if (Ops.size() != NumOps)
2000 return OptimizeExpression(I, Ops);
2001 return nullptr;
2002}
2003
2004// Remove dead instructions and if any operands are trivially dead add them to
2005// Insts so they will be removed as well.
2006void ReassociatePass::RecursivelyEraseDeadInsts(Instruction *I,
2007 OrderedSet &Insts) {
2008 assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
2009 SmallVector<Value *, 4> Ops(I->operands());
2010 ValueRankMap.erase(I);
2011 Insts.remove(I);
2012 RedoInsts.remove(I);
2014 I->eraseFromParent();
2015 for (auto *Op : Ops)
2016 if (Instruction *OpInst = dyn_cast<Instruction>(Op))
2017 if (OpInst->use_empty())
2018 Insts.insert(OpInst);
2019}
2020
2021/// Zap the given instruction, adding interesting operands to the work list.
2022void ReassociatePass::EraseInst(Instruction *I) {
2023 assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
2024 LLVM_DEBUG(dbgs() << "Erasing dead inst: "; I->dump());
2025
2026 SmallVector<Value *, 8> Ops(I->operands());
2027 // Erase the dead instruction.
2028 ValueRankMap.erase(I);
2029 RedoInsts.remove(I);
2031 I->eraseFromParent();
2032 // Optimize its operands.
2033 SmallPtrSet<Instruction *, 8> Visited; // Detect self-referential nodes.
2034 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2035 if (Instruction *Op = dyn_cast<Instruction>(Ops[i])) {
2036 // If this is a node in an expression tree, climb to the expression root
2037 // and add that since that's where optimization actually happens.
2038 unsigned Opcode = Op->getOpcode();
2039 while (Op->hasOneUse() && Op->user_back()->getOpcode() == Opcode &&
2040 Visited.insert(Op).second)
2041 Op = Op->user_back();
2042
2043 // The instruction we're going to push may be coming from a
2044 // dead block, and Reassociate skips the processing of unreachable
2045 // blocks because it's a waste of time and also because it can
2046 // lead to infinite loop due to LLVM's non-standard definition
2047 // of dominance.
2048 if (ValueRankMap.contains(Op))
2049 RedoInsts.insert(Op);
2050 }
2051
2052 MadeChange = true;
2053}
2054
2055/// Recursively analyze an expression to build a list of instructions that have
2056/// negative floating-point constant operands. The caller can then transform
2057/// the list to create positive constants for better reassociation and CSE.
2059 SmallVectorImpl<Instruction *> &Candidates) {
2060 // Handle only one-use instructions. Combining negations does not justify
2061 // replicating instructions.
2062 Instruction *I;
2063 if (!match(V, m_OneUse(m_Instruction(I))))
2064 return;
2065
2066 // Handle expressions of multiplications and divisions.
2067 // TODO: This could look through floating-point casts.
2068 const APFloat *C;
2069 switch (I->getOpcode()) {
2070 case Instruction::FMul:
2071 // Not expecting non-canonical code here. Bail out and wait.
2072 if (match(I->getOperand(0), m_Constant()))
2073 break;
2074
2075 if (match(I->getOperand(1), m_APFloat(C)) && C->isNegative()) {
2076 Candidates.push_back(I);
2077 LLVM_DEBUG(dbgs() << "FMul with negative constant: " << *I << '\n');
2078 }
2079 getNegatibleInsts(I->getOperand(0), Candidates);
2080 getNegatibleInsts(I->getOperand(1), Candidates);
2081 break;
2082 case Instruction::FDiv:
2083 // Not expecting non-canonical code here. Bail out and wait.
2084 if (match(I->getOperand(0), m_Constant()) &&
2085 match(I->getOperand(1), m_Constant()))
2086 break;
2087
2088 if ((match(I->getOperand(0), m_APFloat(C)) && C->isNegative()) ||
2089 (match(I->getOperand(1), m_APFloat(C)) && C->isNegative())) {
2090 Candidates.push_back(I);
2091 LLVM_DEBUG(dbgs() << "FDiv with negative constant: " << *I << '\n');
2092 }
2093 getNegatibleInsts(I->getOperand(0), Candidates);
2094 getNegatibleInsts(I->getOperand(1), Candidates);
2095 break;
2096 default:
2097 break;
2098 }
2099}
2100
2101/// Given an fadd/fsub with an operand that is a one-use instruction
2102/// (the fadd/fsub), try to change negative floating-point constants into
2103/// positive constants to increase potential for reassociation and CSE.
2104Instruction *ReassociatePass::canonicalizeNegFPConstantsForOp(Instruction *I,
2105 Instruction *Op,
2106 Value *OtherOp) {
2107 assert((I->getOpcode() == Instruction::FAdd ||
2108 I->getOpcode() == Instruction::FSub) && "Expected fadd/fsub");
2109
2110 // Collect instructions with negative FP constants from the subtree that ends
2111 // in Op.
2113 getNegatibleInsts(Op, Candidates);
2114 if (Candidates.empty())
2115 return nullptr;
2116
2117 // Don't canonicalize x + (-Constant * y) -> x - (Constant * y), if the
2118 // resulting subtract will be broken up later. This can get us into an
2119 // infinite loop during reassociation.
2120 bool IsFSub = I->getOpcode() == Instruction::FSub;
2121 bool NeedsSubtract = !IsFSub && Candidates.size() % 2 == 1;
2122 if (NeedsSubtract && ShouldBreakUpSubtract(I))
2123 return nullptr;
2124
2125 for (Instruction *Negatible : Candidates) {
2126 const APFloat *C;
2127 if (match(Negatible->getOperand(0), m_APFloat(C))) {
2128 assert(!match(Negatible->getOperand(1), m_Constant()) &&
2129 "Expecting only 1 constant operand");
2130 assert(C->isNegative() && "Expected negative FP constant");
2131 Negatible->setOperand(0, ConstantFP::get(Negatible->getType(), abs(*C)));
2132 MadeChange = true;
2133 }
2134 if (match(Negatible->getOperand(1), m_APFloat(C))) {
2135 assert(!match(Negatible->getOperand(0), m_Constant()) &&
2136 "Expecting only 1 constant operand");
2137 assert(C->isNegative() && "Expected negative FP constant");
2138 Negatible->setOperand(1, ConstantFP::get(Negatible->getType(), abs(*C)));
2139 MadeChange = true;
2140 }
2141 }
2142 assert(MadeChange == true && "Negative constant candidate was not changed");
2143
2144 // Negations cancelled out.
2145 if (Candidates.size() % 2 == 0)
2146 return I;
2147
2148 // Negate the final operand in the expression by flipping the opcode of this
2149 // fadd/fsub.
2150 assert(Candidates.size() % 2 == 1 && "Expected odd number");
2152 Value *NewInst = IsFSub ? Builder.CreateFAddFMF(OtherOp, Op, I)
2153 : Builder.CreateFSubFMF(OtherOp, Op, I);
2154 I->replaceAllUsesWith(NewInst);
2155 RedoInsts.insert(I);
2156 return dyn_cast<Instruction>(NewInst);
2157}
2158
2159/// Canonicalize expressions that contain a negative floating-point constant
2160/// of the following form:
2161/// OtherOp + (subtree) -> OtherOp {+/-} (canonical subtree)
2162/// (subtree) + OtherOp -> OtherOp {+/-} (canonical subtree)
2163/// OtherOp - (subtree) -> OtherOp {+/-} (canonical subtree)
2164///
2165/// The fadd/fsub opcode may be switched to allow folding a negation into the
2166/// input instruction.
2167Instruction *ReassociatePass::canonicalizeNegFPConstants(Instruction *I) {
2168 LLVM_DEBUG(dbgs() << "Combine negations for: " << *I << '\n');
2169 Value *X;
2170 Instruction *Op;
2172 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2173 I = R;
2175 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2176 I = R;
2178 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2179 I = R;
2180 return I;
2181}
2182
2183/// Inspect and optimize the given instruction. Note that erasing
2184/// instructions is not allowed.
2185void ReassociatePass::OptimizeInst(Instruction *I) {
2186 // Only consider operations that we understand.
2187 if (!isa<UnaryOperator>(I) && !isa<BinaryOperator>(I))
2188 return;
2189
2190 if (I->getOpcode() == Instruction::Shl && isa<ConstantInt>(I->getOperand(1)))
2191 // If an operand of this shift is a reassociable multiply, or if the shift
2192 // is used by a reassociable multiply or add, turn into a multiply.
2193 if (isReassociableOp(I->getOperand(0), Instruction::Mul) ||
2194 (I->hasOneUse() &&
2195 (isReassociableOp(I->user_back(), Instruction::Mul) ||
2196 isReassociableOp(I->user_back(), Instruction::Add)))) {
2198 RedoInsts.insert(I);
2199 MadeChange = true;
2200 I = NI;
2201 }
2202
2203 // Commute binary operators, to canonicalize the order of their operands.
2204 // This can potentially expose more CSE opportunities, and makes writing other
2205 // transformations simpler.
2206 if (I->isCommutative())
2207 canonicalizeOperands(I);
2208
2209 // Canonicalize negative constants out of expressions.
2210 if (Instruction *Res = canonicalizeNegFPConstants(I))
2211 I = Res;
2212
2213 // Don't optimize floating-point instructions unless they have the
2214 // appropriate FastMathFlags for reassociation enabled.
2215 if (isa<FPMathOperator>(I) && !hasFPAssociativeFlags(I))
2216 return;
2217
2218 // Do not reassociate boolean (i1) expressions. We want to preserve the
2219 // original order of evaluation for short-circuited comparisons that
2220 // SimplifyCFG has folded to AND/OR expressions. If the expression
2221 // is not further optimized, it is likely to be transformed back to a
2222 // short-circuited form for code gen, and the source order may have been
2223 // optimized for the most likely conditions.
2224 if (I->getType()->isIntegerTy(1))
2225 return;
2226
2227 // If this is a bitwise or instruction of operands
2228 // with no common bits set, convert it to X+Y.
2229 if (I->getOpcode() == Instruction::Or &&
2231 haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1),
2232 I->getModule()->getDataLayout(), /*AC=*/nullptr, I,
2233 /*DT=*/nullptr)) {
2235 RedoInsts.insert(I);
2236 MadeChange = true;
2237 I = NI;
2238 }
2239
2240 // If this is a subtract instruction which is not already in negate form,
2241 // see if we can convert it to X+-Y.
2242 if (I->getOpcode() == Instruction::Sub) {
2243 if (ShouldBreakUpSubtract(I)) {
2244 Instruction *NI = BreakUpSubtract(I, RedoInsts);
2245 RedoInsts.insert(I);
2246 MadeChange = true;
2247 I = NI;
2248 } else if (match(I, m_Neg(m_Value()))) {
2249 // Otherwise, this is a negation. See if the operand is a multiply tree
2250 // and if this is not an inner node of a multiply tree.
2251 if (isReassociableOp(I->getOperand(1), Instruction::Mul) &&
2252 (!I->hasOneUse() ||
2253 !isReassociableOp(I->user_back(), Instruction::Mul))) {
2255 // If the negate was simplified, revisit the users to see if we can
2256 // reassociate further.
2257 for (User *U : NI->users()) {
2258 if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
2259 RedoInsts.insert(Tmp);
2260 }
2261 RedoInsts.insert(I);
2262 MadeChange = true;
2263 I = NI;
2264 }
2265 }
2266 } else if (I->getOpcode() == Instruction::FNeg ||
2267 I->getOpcode() == Instruction::FSub) {
2268 if (ShouldBreakUpSubtract(I)) {
2269 Instruction *NI = BreakUpSubtract(I, RedoInsts);
2270 RedoInsts.insert(I);
2271 MadeChange = true;
2272 I = NI;
2273 } else if (match(I, m_FNeg(m_Value()))) {
2274 // Otherwise, this is a negation. See if the operand is a multiply tree
2275 // and if this is not an inner node of a multiply tree.
2276 Value *Op = isa<BinaryOperator>(I) ? I->getOperand(1) :
2277 I->getOperand(0);
2278 if (isReassociableOp(Op, Instruction::FMul) &&
2279 (!I->hasOneUse() ||
2280 !isReassociableOp(I->user_back(), Instruction::FMul))) {
2281 // If the negate was simplified, revisit the users to see if we can
2282 // reassociate further.
2284 for (User *U : NI->users()) {
2285 if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
2286 RedoInsts.insert(Tmp);
2287 }
2288 RedoInsts.insert(I);
2289 MadeChange = true;
2290 I = NI;
2291 }
2292 }
2293 }
2294
2295 // If this instruction is an associative binary operator, process it.
2296 if (!I->isAssociative()) return;
2297 BinaryOperator *BO = cast<BinaryOperator>(I);
2298
2299 // If this is an interior node of a reassociable tree, ignore it until we
2300 // get to the root of the tree, to avoid N^2 analysis.
2301 unsigned Opcode = BO->getOpcode();
2302 if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) {
2303 // During the initial run we will get to the root of the tree.
2304 // But if we get here while we are redoing instructions, there is no
2305 // guarantee that the root will be visited. So Redo later
2306 if (BO->user_back() != BO &&
2307 BO->getParent() == BO->user_back()->getParent())
2308 RedoInsts.insert(BO->user_back());
2309 return;
2310 }
2311
2312 // If this is an add tree that is used by a sub instruction, ignore it
2313 // until we process the subtract.
2314 if (BO->hasOneUse() && BO->getOpcode() == Instruction::Add &&
2315 cast<Instruction>(BO->user_back())->getOpcode() == Instruction::Sub)
2316 return;
2317 if (BO->hasOneUse() && BO->getOpcode() == Instruction::FAdd &&
2318 cast<Instruction>(BO->user_back())->getOpcode() == Instruction::FSub)
2319 return;
2320
2321 ReassociateExpression(BO);
2322}
2323
2324void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
2325 // First, walk the expression tree, linearizing the tree, collecting the
2326 // operand information.
2328 MadeChange |= LinearizeExprTree(I, Tree, RedoInsts);
2330 Ops.reserve(Tree.size());
2331 for (const RepeatedValue &E : Tree)
2332 Ops.append(E.second.getZExtValue(), ValueEntry(getRank(E.first), E.first));
2333
2334 LLVM_DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n');
2335
2336 // Now that we have linearized the tree to a list and have gathered all of
2337 // the operands and their ranks, sort the operands by their rank. Use a
2338 // stable_sort so that values with equal ranks will have their relative
2339 // positions maintained (and so the compiler is deterministic). Note that
2340 // this sorts so that the highest ranking values end up at the beginning of
2341 // the vector.
2342 llvm::stable_sort(Ops);
2343
2344 // Now that we have the expression tree in a convenient
2345 // sorted form, optimize it globally if possible.
2346 if (Value *V = OptimizeExpression(I, Ops)) {
2347 if (V == I)
2348 // Self-referential expression in unreachable code.
2349 return;
2350 // This expression tree simplified to something that isn't a tree,
2351 // eliminate it.
2352 LLVM_DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n');
2353 I->replaceAllUsesWith(V);
2354 if (Instruction *VI = dyn_cast<Instruction>(V))
2355 if (I->getDebugLoc())
2356 VI->setDebugLoc(I->getDebugLoc());
2357 RedoInsts.insert(I);
2358 ++NumAnnihil;
2359 return;
2360 }
2361
2362 // We want to sink immediates as deeply as possible except in the case where
2363 // this is a multiply tree used only by an add, and the immediate is a -1.
2364 // In this case we reassociate to put the negation on the outside so that we
2365 // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y
2366 if (I->hasOneUse()) {
2367 if (I->getOpcode() == Instruction::Mul &&
2368 cast<Instruction>(I->user_back())->getOpcode() == Instruction::Add &&
2369 isa<ConstantInt>(Ops.back().Op) &&
2370 cast<ConstantInt>(Ops.back().Op)->isMinusOne()) {
2371 ValueEntry Tmp = Ops.pop_back_val();
2372 Ops.insert(Ops.begin(), Tmp);
2373 } else if (I->getOpcode() == Instruction::FMul &&
2374 cast<Instruction>(I->user_back())->getOpcode() ==
2375 Instruction::FAdd &&
2376 isa<ConstantFP>(Ops.back().Op) &&
2377 cast<ConstantFP>(Ops.back().Op)->isExactlyValue(-1.0)) {
2378 ValueEntry Tmp = Ops.pop_back_val();
2379 Ops.insert(Ops.begin(), Tmp);
2380 }
2381 }
2382
2383 LLVM_DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n');
2384
2385 if (Ops.size() == 1) {
2386 if (Ops[0].Op == I)
2387 // Self-referential expression in unreachable code.
2388 return;
2389
2390 // This expression tree simplified to something that isn't a tree,
2391 // eliminate it.
2392 I->replaceAllUsesWith(Ops[0].Op);
2393 if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op))
2394 OI->setDebugLoc(I->getDebugLoc());
2395 RedoInsts.insert(I);
2396 return;
2397 }
2398
2399 if (Ops.size() > 2 && Ops.size() <= GlobalReassociateLimit) {
2400 // Find the pair with the highest count in the pairmap and move it to the
2401 // back of the list so that it can later be CSE'd.
2402 // example:
2403 // a*b*c*d*e
2404 // if c*e is the most "popular" pair, we can express this as
2405 // (((c*e)*d)*b)*a
2406 unsigned Max = 1;
2407 unsigned BestRank = 0;
2408 std::pair<unsigned, unsigned> BestPair;
2409 unsigned Idx = I->getOpcode() - Instruction::BinaryOpsBegin;
2410 for (unsigned i = 0; i < Ops.size() - 1; ++i)
2411 for (unsigned j = i + 1; j < Ops.size(); ++j) {
2412 unsigned Score = 0;
2413 Value *Op0 = Ops[i].Op;
2414 Value *Op1 = Ops[j].Op;
2415 if (std::less<Value *>()(Op1, Op0))
2416 std::swap(Op0, Op1);
2417 auto it = PairMap[Idx].find({Op0, Op1});
2418 if (it != PairMap[Idx].end()) {
2419 // Functions like BreakUpSubtract() can erase the Values we're using
2420 // as keys and create new Values after we built the PairMap. There's a
2421 // small chance that the new nodes can have the same address as
2422 // something already in the table. We shouldn't accumulate the stored
2423 // score in that case as it refers to the wrong Value.
2424 if (it->second.isValid())
2425 Score += it->second.Score;
2426 }
2427
2428 unsigned MaxRank = std::max(Ops[i].Rank, Ops[j].Rank);
2429 if (Score > Max || (Score == Max && MaxRank < BestRank)) {
2430 BestPair = {i, j};
2431 Max = Score;
2432 BestRank = MaxRank;
2433 }
2434 }
2435 if (Max > 1) {
2436 auto Op0 = Ops[BestPair.first];
2437 auto Op1 = Ops[BestPair.second];
2438 Ops.erase(&Ops[BestPair.second]);
2439 Ops.erase(&Ops[BestPair.first]);
2440 Ops.push_back(Op0);
2441 Ops.push_back(Op1);
2442 }
2443 }
2444 // Now that we ordered and optimized the expressions, splat them back into
2445 // the expression tree, removing any unneeded nodes.
2446 RewriteExprTree(I, Ops);
2447}
2448
2449void
2450ReassociatePass::BuildPairMap(ReversePostOrderTraversal<Function *> &RPOT) {
2451 // Make a "pairmap" of how often each operand pair occurs.
2452 for (BasicBlock *BI : RPOT) {
2453 for (Instruction &I : *BI) {
2454 if (!I.isAssociative())
2455 continue;
2456
2457 // Ignore nodes that aren't at the root of trees.
2458 if (I.hasOneUse() && I.user_back()->getOpcode() == I.getOpcode())
2459 continue;
2460
2461 // Collect all operands in a single reassociable expression.
2462 // Since Reassociate has already been run once, we can assume things
2463 // are already canonical according to Reassociation's regime.
2464 SmallVector<Value *, 8> Worklist = { I.getOperand(0), I.getOperand(1) };
2466 while (!Worklist.empty() && Ops.size() <= GlobalReassociateLimit) {
2467 Value *Op = Worklist.pop_back_val();
2468 Instruction *OpI = dyn_cast<Instruction>(Op);
2469 if (!OpI || OpI->getOpcode() != I.getOpcode() || !OpI->hasOneUse()) {
2470 Ops.push_back(Op);
2471 continue;
2472 }
2473 // Be paranoid about self-referencing expressions in unreachable code.
2474 if (OpI->getOperand(0) != OpI)
2475 Worklist.push_back(OpI->getOperand(0));
2476 if (OpI->getOperand(1) != OpI)
2477 Worklist.push_back(OpI->getOperand(1));
2478 }
2479 // Skip extremely long expressions.
2480 if (Ops.size() > GlobalReassociateLimit)
2481 continue;
2482
2483 // Add all pairwise combinations of operands to the pair map.
2484 unsigned BinaryIdx = I.getOpcode() - Instruction::BinaryOpsBegin;
2486 for (unsigned i = 0; i < Ops.size() - 1; ++i) {
2487 for (unsigned j = i + 1; j < Ops.size(); ++j) {
2488 // Canonicalize operand orderings.
2489 Value *Op0 = Ops[i];
2490 Value *Op1 = Ops[j];
2491 if (std::less<Value *>()(Op1, Op0))
2492 std::swap(Op0, Op1);
2493 if (!Visited.insert({Op0, Op1}).second)
2494 continue;
2495 auto res = PairMap[BinaryIdx].insert({{Op0, Op1}, {Op0, Op1, 1}});
2496 if (!res.second) {
2497 // If either key value has been erased then we've got the same
2498 // address by coincidence. That can't happen here because nothing is
2499 // erasing values but it can happen by the time we're querying the
2500 // map.
2501 assert(res.first->second.isValid() && "WeakVH invalidated");
2502 ++res.first->second.Score;
2503 }
2504 }
2505 }
2506 }
2507 }
2508}
2509
2511 // Get the functions basic blocks in Reverse Post Order. This order is used by
2512 // BuildRankMap to pre calculate ranks correctly. It also excludes dead basic
2513 // blocks (it has been seen that the analysis in this pass could hang when
2514 // analysing dead basic blocks).
2516
2517 // Calculate the rank map for F.
2518 BuildRankMap(F, RPOT);
2519
2520 // Build the pair map before running reassociate.
2521 // Technically this would be more accurate if we did it after one round
2522 // of reassociation, but in practice it doesn't seem to help much on
2523 // real-world code, so don't waste the compile time running reassociate
2524 // twice.
2525 // If a user wants, they could expicitly run reassociate twice in their
2526 // pass pipeline for further potential gains.
2527 // It might also be possible to update the pair map during runtime, but the
2528 // overhead of that may be large if there's many reassociable chains.
2529 BuildPairMap(RPOT);
2530
2531 MadeChange = false;
2532
2533 // Traverse the same blocks that were analysed by BuildRankMap.
2534 for (BasicBlock *BI : RPOT) {
2535 assert(RankMap.count(&*BI) && "BB should be ranked.");
2536 // Optimize every instruction in the basic block.
2537 for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;)
2538 if (isInstructionTriviallyDead(&*II)) {
2539 EraseInst(&*II++);
2540 } else {
2541 OptimizeInst(&*II);
2542 assert(II->getParent() == &*BI && "Moved to a different block!");
2543 ++II;
2544 }
2545
2546 // Make a copy of all the instructions to be redone so we can remove dead
2547 // instructions.
2548 OrderedSet ToRedo(RedoInsts);
2549 // Iterate over all instructions to be reevaluated and remove trivially dead
2550 // instructions. If any operand of the trivially dead instruction becomes
2551 // dead mark it for deletion as well. Continue this process until all
2552 // trivially dead instructions have been removed.
2553 while (!ToRedo.empty()) {
2554 Instruction *I = ToRedo.pop_back_val();
2556 RecursivelyEraseDeadInsts(I, ToRedo);
2557 MadeChange = true;
2558 }
2559 }
2560
2561 // Now that we have removed dead instructions, we can reoptimize the
2562 // remaining instructions.
2563 while (!RedoInsts.empty()) {
2564 Instruction *I = RedoInsts.front();
2565 RedoInsts.erase(RedoInsts.begin());
2567 EraseInst(I);
2568 else
2569 OptimizeInst(I);
2570 }
2571 }
2572
2573 // We are done with the rank map and pair map.
2574 RankMap.clear();
2575 ValueRankMap.clear();
2576 for (auto &Entry : PairMap)
2577 Entry.clear();
2578
2579 if (MadeChange) {
2582 return PA;
2583 }
2584
2585 return PreservedAnalyses::all();
2586}
2587
2588namespace {
2589
2590 class ReassociateLegacyPass : public FunctionPass {
2591 ReassociatePass Impl;
2592
2593 public:
2594 static char ID; // Pass identification, replacement for typeid
2595
2596 ReassociateLegacyPass() : FunctionPass(ID) {
2598 }
2599
2600 bool runOnFunction(Function &F) override {
2601 if (skipFunction(F))
2602 return false;
2603
2604 FunctionAnalysisManager DummyFAM;
2605 auto PA = Impl.run(F, DummyFAM);
2606 return !PA.areAllPreserved();
2607 }
2608
2609 void getAnalysisUsage(AnalysisUsage &AU) const override {
2610 AU.setPreservesCFG();
2614 }
2615 };
2616
2617} // end anonymous namespace
2618
2619char ReassociateLegacyPass::ID = 0;
2620
2621INITIALIZE_PASS(ReassociateLegacyPass, "reassociate",
2622 "Reassociate expressions", false, false)
2623
2624// Public interface to the Reassociate pass
2626 return new ReassociateLegacyPass();
2627}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
assume Assume Builder
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
std::string Name
uint64_t Size
static bool runOnFunction(Function &F, bool PostInlining)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
IRTranslator LLVM IR MI
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, ScalarEvolution *SE, LoopInfo *LI)
isInteresting - Test whether the given expression is "interesting" when used by the given expression,...
Definition: IVUsers.cpp:56
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
nary reassociate
#define P(N)
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static void PrintOps(Instruction *I, const SmallVectorImpl< ValueEntry > &Ops)
Print out the expression identified in the Ops list.
Definition: Reassociate.cpp:75
static bool ShouldBreakUpSubtract(Instruction *Sub)
Return true if we should break up this subtract of X-Y into (X + -Y).
static Value * buildMultiplyTree(IRBuilderBase &Builder, SmallVectorImpl< Value * > &Ops)
Build a tree of multiplies, computing the product of Ops.
static void getNegatibleInsts(Value *V, SmallVectorImpl< Instruction * > &Candidates)
Recursively analyze an expression to build a list of instructions that have negative floating-point c...
static unsigned CarmichaelShift(unsigned Bitwidth)
Returns k such that lambda(2^Bitwidth) = 2^k, where lambda is the Carmichael function.
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
static BinaryOperator * BreakUpSubtract(Instruction *Sub, ReassociatePass::OrderedSet &ToRedo)
If we have (X-Y), and if either X is an add, or if this is only used by an add, transform this into (...
static void FindSingleUseMultiplyFactors(Value *V, SmallVectorImpl< Value * > &Factors)
If V is a single-use multiply, recursively add its operands as factors, otherwise add V to the list o...
static Value * OptimizeAndOrXor(unsigned Opcode, SmallVectorImpl< ValueEntry > &Ops)
Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
static BinaryOperator * convertOrWithNoCommonBitsToAdd(Instruction *Or)
If we have (X|Y), and iff X and Y have no common bits set, transform this into (X+Y) to allow arithme...
static Instruction * CreateNeg(Value *S1, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
static bool collectMultiplyFactors(SmallVectorImpl< ValueEntry > &Ops, SmallVectorImpl< Factor > &Factors)
Build up a vector of value/power pairs factoring a product.
static BinaryOperator * ConvertShiftToMul(Instruction *Shl)
If this is a shift of a reassociable multiply or is used by one, change this into a multiply by a con...
static unsigned FindInOperandList(const SmallVectorImpl< ValueEntry > &Ops, unsigned i, Value *X)
Scan backwards and forwards among values with the same rank as element i to see if X exists.
static BinaryOperator * LowerNegateToMultiply(Instruction *Neg)
Replace 0-X with X*-1.
static BinaryOperator * isReassociableOp(Value *V, unsigned Opcode)
Return true if V is an instruction of the specified opcode and if it only has one use.
static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode)
Add the extra weight 'RHS' to the existing weight 'LHS', reducing the combined weight using any speci...
static bool LinearizeExprTree(Instruction *I, SmallVectorImpl< RepeatedValue > &Ops, ReassociatePass::OrderedSet &ToRedo)
Given an associative binary expression, return the leaf nodes in Ops along with their weights (how ma...
std::pair< Value *, APInt > RepeatedValue
static bool hasFPAssociativeFlags(Instruction *I)
Return true if I is an instruction with the FastMathFlags that are needed for general reassociation s...
static Value * NegateValue(Value *V, Instruction *BI, ReassociatePass::OrderedSet &ToRedo)
Insert instructions before the instruction pointed to by BI, that computes the negative version of th...
static bool shouldConvertOrWithNoCommonBitsToAdd(Instruction *Or)
Return true if it may be profitable to convert this (X|Y) into (X+Y).
static Value * createAndInstr(Instruction *InsertBefore, Value *Opnd, const APInt &ConstOpnd)
Helper function of CombineXorOpnd().
static BinaryOperator * CreateMul(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
static bool isLoadCombineCandidate(Instruction *Or)
static Value * EmitAddTreeOfValues(Instruction *I, SmallVectorImpl< WeakTrackingVH > &Ops)
Emit a tree of add instructions, summing Ops together and returning the result.
@ VI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
@ Flags
Definition: TextStubV5.cpp:93
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
Value * RHS
Value * LHS
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition: APInt.h:75
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:354
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:366
bool isMinValue() const
Determine if this is the smallest unsigned value.
Definition: APInt.h:409
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:459
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:177
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:222
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
Represent the analysis usage information of a pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:326
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", Instruction *InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
BinaryOps getOpcode() const
Definition: InstrTypes.h:391
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
static Constant * getBinOpAbsorber(unsigned Opcode, Type *Ty)
Return the absorbing element for the given binary operation, i.e.
Definition: Constants.cpp:2717
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2626
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2672
static Constant * getNeg(Constant *C, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2581
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:260
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:927
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:403
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:356
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
const BasicBlock & getEntryBlock() const
Definition: Function.h:745
Legacy wrapper pass to provide the GlobalsAAResult object.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2564
void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:365
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:70
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
const BasicBlock * getParent() const
Definition: Instruction.h:90
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:87
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:74
bool isNilpotent() const
Return true if the instruction is nilpotent:
Definition: Instruction.h:621
const char * getOpcodeName() const
Definition: Instruction.h:170
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:168
Instruction * getInsertionPointAfterDef()
Get the first insertion point at which the result of this instruction is defined.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:362
bool isIdempotent() const
Return true if the instruction is idempotent:
Definition: Instruction.h:607
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1743
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
bool areAllPreserved() const
Test whether all analyses are preserved (and none are abandoned).
Definition: PassManager.h:327
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
Reassociate commutative expressions.
Definition: Reassociate.h:71
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:152
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:83
value_type pop_back_val()
Definition: SetVector.h:236
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
void reserve(size_type N)
Definition: SmallVector.h:667
iterator erase(const_iterator CI)
Definition: SmallVector.h:741
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:809
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:235
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:163
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1724
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
user_iterator user_begin()
Definition: Value.h:397
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:535
iterator_range< user_iterator > users()
Definition: Value.h:421
void clearSubclassOptionalData()
Clear the optional flags contained in this value.
Definition: Value.h:544
void deleteValue()
Delete a pointer to a generic Value.
Definition: Value.cpp:110
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:384
self_iterator getIterator()
Definition: ilist_node.h:82
Utility class representing a non-constant Xor-operand.
Definition: Reassociate.cpp:95
Value * getSymbolicPart() const
unsigned getSymbolicRank() const
void setSymbolicRank(unsigned R)
const APInt & getConstPart() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:997
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:144
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:716
BinaryOp_match< LHS, RHS, Instruction::FAdd > m_FAdd(const LHS &L, const RHS &R)
Definition: PatternMatch.h:985
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:278
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
apfloat_match m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
Definition: PatternMatch.h:295
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
@ Undef
Value of the register doesn't matter.
constexpr double e
Definition: MathExtras.h:31
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void stable_sort(R &&Range)
Definition: STLExtras.h:2063
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1364
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1345
FunctionPass * createReassociatePass()
void initializeReassociateLegacyPassPass(PassRegistry &)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1826
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:398
Constant * ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op, const DataLayout &DL)
Attempt to constant fold a unary operation with the specified operand.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if LHS and RHS have no common bits set.
bool replaceDbgUsesWithUndef(Instruction *I)
Replace all the uses of an SSA value in @llvm.dbg intrinsics with undef.
Definition: Local.cpp:606
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:2038
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:184
bool mayHaveNonDefUseDependency(const Instruction &I)
Returns true if the result or effects of the given instructions I depend values not reachable through...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
Utility class representing a base and exponent pair which form one factor of some product.
Definition: Reassociate.h:59