LLVM 19.0.0git
Reassociate.cpp
Go to the documentation of this file.
1//===- Reassociate.cpp - Reassociate binary expressions -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass reassociates commutative expressions in an order that is designed
10// to promote better constant propagation, GCSE, LICM, PRE, etc.
11//
12// For example: 4 + (x + 5) -> x + (4 + 5)
13//
14// In the implementation of this algorithm, constants are assigned rank = 0,
15// function arguments are rank = 1, and other values are assigned ranks
16// corresponding to the reverse post order traversal of current function
17// (starting at 2), which effectively gives values in deep loops higher rank
18// than values not in loops.
19//
20//===----------------------------------------------------------------------===//
21
23#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/APInt.h"
25#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
35#include "llvm/IR/Argument.h"
36#include "llvm/IR/BasicBlock.h"
37#include "llvm/IR/CFG.h"
38#include "llvm/IR/Constant.h"
39#include "llvm/IR/Constants.h"
40#include "llvm/IR/Function.h"
41#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/InstrTypes.h"
43#include "llvm/IR/Instruction.h"
45#include "llvm/IR/Operator.h"
46#include "llvm/IR/PassManager.h"
48#include "llvm/IR/Type.h"
49#include "llvm/IR/User.h"
50#include "llvm/IR/Value.h"
51#include "llvm/IR/ValueHandle.h"
53#include "llvm/Pass.h"
56#include "llvm/Support/Debug.h"
60#include <algorithm>
61#include <cassert>
62#include <utility>
63
64using namespace llvm;
65using namespace reassociate;
66using namespace PatternMatch;
67
68#define DEBUG_TYPE "reassociate"
69
70STATISTIC(NumChanged, "Number of insts reassociated");
71STATISTIC(NumAnnihil, "Number of expr tree annihilated");
72STATISTIC(NumFactor , "Number of multiplies factored");
73
74static cl::opt<bool>
75 UseCSELocalOpt(DEBUG_TYPE "-use-cse-local",
76 cl::desc("Only reorder expressions within a basic block "
77 "when exposing CSE opportunities"),
78 cl::init(true), cl::Hidden);
79
80#ifndef NDEBUG
81/// Print out the expression identified in the Ops list.
83 Module *M = I->getModule();
84 dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
85 << *Ops[0].Op->getType() << '\t';
86 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
87 dbgs() << "[ ";
88 Ops[i].Op->printAsOperand(dbgs(), false, M);
89 dbgs() << ", #" << Ops[i].Rank << "] ";
90 }
91}
92#endif
93
94/// Utility class representing a non-constant Xor-operand. We classify
95/// non-constant Xor-Operands into two categories:
96/// C1) The operand is in the form "X & C", where C is a constant and C != ~0
97/// C2)
98/// C2.1) The operand is in the form of "X | C", where C is a non-zero
99/// constant.
100/// C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this
101/// operand as "E | 0"
103public:
104 XorOpnd(Value *V);
105
106 bool isInvalid() const { return SymbolicPart == nullptr; }
107 bool isOrExpr() const { return isOr; }
108 Value *getValue() const { return OrigVal; }
109 Value *getSymbolicPart() const { return SymbolicPart; }
110 unsigned getSymbolicRank() const { return SymbolicRank; }
111 const APInt &getConstPart() const { return ConstPart; }
112
113 void Invalidate() { SymbolicPart = OrigVal = nullptr; }
114 void setSymbolicRank(unsigned R) { SymbolicRank = R; }
115
116private:
117 Value *OrigVal;
118 Value *SymbolicPart;
119 APInt ConstPart;
120 unsigned SymbolicRank;
121 bool isOr;
122};
123
125 assert(!isa<ConstantInt>(V) && "No ConstantInt");
126 OrigVal = V;
127 Instruction *I = dyn_cast<Instruction>(V);
128 SymbolicRank = 0;
129
130 if (I && (I->getOpcode() == Instruction::Or ||
131 I->getOpcode() == Instruction::And)) {
132 Value *V0 = I->getOperand(0);
133 Value *V1 = I->getOperand(1);
134 const APInt *C;
135 if (match(V0, m_APInt(C)))
136 std::swap(V0, V1);
137
138 if (match(V1, m_APInt(C))) {
139 ConstPart = *C;
140 SymbolicPart = V0;
141 isOr = (I->getOpcode() == Instruction::Or);
142 return;
143 }
144 }
145
146 // view the operand as "V | 0"
147 SymbolicPart = V;
148 ConstPart = APInt::getZero(V->getType()->getScalarSizeInBits());
149 isOr = true;
150}
151
152/// Return true if I is an instruction with the FastMathFlags that are needed
153/// for general reassociation set. This is not the same as testing
154/// Instruction::isAssociative() because it includes operations like fsub.
155/// (This routine is only intended to be called for floating-point operations.)
157 assert(I && isa<FPMathOperator>(I) && "Should only check FP ops");
158 return I->hasAllowReassoc() && I->hasNoSignedZeros();
159}
160
161/// Return true if V is an instruction of the specified opcode and if it
162/// only has one use.
163static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
164 auto *BO = dyn_cast<BinaryOperator>(V);
165 if (BO && BO->hasOneUse() && BO->getOpcode() == Opcode)
166 if (!isa<FPMathOperator>(BO) || hasFPAssociativeFlags(BO))
167 return BO;
168 return nullptr;
169}
170
171static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
172 unsigned Opcode2) {
173 auto *BO = dyn_cast<BinaryOperator>(V);
174 if (BO && BO->hasOneUse() &&
175 (BO->getOpcode() == Opcode1 || BO->getOpcode() == Opcode2))
176 if (!isa<FPMathOperator>(BO) || hasFPAssociativeFlags(BO))
177 return BO;
178 return nullptr;
179}
180
181void ReassociatePass::BuildRankMap(Function &F,
183 unsigned Rank = 2;
184
185 // Assign distinct ranks to function arguments.
186 for (auto &Arg : F.args()) {
187 ValueRankMap[&Arg] = ++Rank;
188 LLVM_DEBUG(dbgs() << "Calculated Rank[" << Arg.getName() << "] = " << Rank
189 << "\n");
190 }
191
192 // Traverse basic blocks in ReversePostOrder.
193 for (BasicBlock *BB : RPOT) {
194 unsigned BBRank = RankMap[BB] = ++Rank << 16;
195
196 // Walk the basic block, adding precomputed ranks for any instructions that
197 // we cannot move. This ensures that the ranks for these instructions are
198 // all different in the block.
199 for (Instruction &I : *BB)
201 ValueRankMap[&I] = ++BBRank;
202 }
203}
204
205unsigned ReassociatePass::getRank(Value *V) {
206 Instruction *I = dyn_cast<Instruction>(V);
207 if (!I) {
208 if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument.
209 return 0; // Otherwise it's a global or constant, rank 0.
210 }
211
212 if (unsigned Rank = ValueRankMap[I])
213 return Rank; // Rank already known?
214
215 // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that
216 // we can reassociate expressions for code motion! Since we do not recurse
217 // for PHI nodes, we cannot have infinite recursion here, because there
218 // cannot be loops in the value graph that do not go through PHI nodes.
219 unsigned Rank = 0, MaxRank = RankMap[I->getParent()];
220 for (unsigned i = 0, e = I->getNumOperands(); i != e && Rank != MaxRank; ++i)
221 Rank = std::max(Rank, getRank(I->getOperand(i)));
222
223 // If this is a 'not' or 'neg' instruction, do not count it for rank. This
224 // assures us that X and ~X will have the same rank.
225 if (!match(I, m_Not(m_Value())) && !match(I, m_Neg(m_Value())) &&
226 !match(I, m_FNeg(m_Value())))
227 ++Rank;
228
229 LLVM_DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank
230 << "\n");
231
232 return ValueRankMap[I] = Rank;
233}
234
235// Canonicalize constants to RHS. Otherwise, sort the operands by rank.
236void ReassociatePass::canonicalizeOperands(Instruction *I) {
237 assert(isa<BinaryOperator>(I) && "Expected binary operator.");
238 assert(I->isCommutative() && "Expected commutative operator.");
239
240 Value *LHS = I->getOperand(0);
241 Value *RHS = I->getOperand(1);
242 if (LHS == RHS || isa<Constant>(RHS))
243 return;
244 if (isa<Constant>(LHS) || getRank(RHS) < getRank(LHS))
245 cast<BinaryOperator>(I)->swapOperands();
246}
247
249 BasicBlock::iterator InsertBefore,
250 Value *FlagsOp) {
251 if (S1->getType()->isIntOrIntVectorTy())
252 return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore);
253 else {
254 BinaryOperator *Res =
255 BinaryOperator::CreateFAdd(S1, S2, Name, InsertBefore);
256 Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
257 return Res;
258 }
259}
260
262 BasicBlock::iterator InsertBefore,
263 Value *FlagsOp) {
264 if (S1->getType()->isIntOrIntVectorTy())
265 return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore);
266 else {
267 BinaryOperator *Res =
268 BinaryOperator::CreateFMul(S1, S2, Name, InsertBefore);
269 Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
270 return Res;
271 }
272}
273
275 BasicBlock::iterator InsertBefore,
276 Value *FlagsOp) {
277 if (S1->getType()->isIntOrIntVectorTy())
278 return BinaryOperator::CreateNeg(S1, Name, InsertBefore);
279
280 if (auto *FMFSource = dyn_cast<Instruction>(FlagsOp))
281 return UnaryOperator::CreateFNegFMF(S1, FMFSource, Name, InsertBefore);
282
283 return UnaryOperator::CreateFNeg(S1, Name, InsertBefore);
284}
285
286/// Replace 0-X with X*-1.
288 assert((isa<UnaryOperator>(Neg) || isa<BinaryOperator>(Neg)) &&
289 "Expected a Negate!");
290 // FIXME: It's not safe to lower a unary FNeg into a FMul by -1.0.
291 unsigned OpNo = isa<BinaryOperator>(Neg) ? 1 : 0;
292 Type *Ty = Neg->getType();
293 Constant *NegOne = Ty->isIntOrIntVectorTy() ?
294 ConstantInt::getAllOnesValue(Ty) : ConstantFP::get(Ty, -1.0);
295
296 BinaryOperator *Res =
297 CreateMul(Neg->getOperand(OpNo), NegOne, "", Neg->getIterator(), Neg);
298 Neg->setOperand(OpNo, Constant::getNullValue(Ty)); // Drop use of op.
299 Res->takeName(Neg);
300 Neg->replaceAllUsesWith(Res);
301 Res->setDebugLoc(Neg->getDebugLoc());
302 return Res;
303}
304
305/// Returns k such that lambda(2^Bitwidth) = 2^k, where lambda is the Carmichael
306/// function. This means that x^(2^k) === 1 mod 2^Bitwidth for
307/// every odd x, i.e. x^(2^k) = 1 for every odd x in Bitwidth-bit arithmetic.
308/// Note that 0 <= k < Bitwidth, and if Bitwidth > 3 then x^(2^k) = 0 for every
309/// even x in Bitwidth-bit arithmetic.
310static unsigned CarmichaelShift(unsigned Bitwidth) {
311 if (Bitwidth < 3)
312 return Bitwidth - 1;
313 return Bitwidth - 2;
314}
315
316/// Add the extra weight 'RHS' to the existing weight 'LHS',
317/// reducing the combined weight using any special properties of the operation.
318/// The existing weight LHS represents the computation X op X op ... op X where
319/// X occurs LHS times. The combined weight represents X op X op ... op X with
320/// X occurring LHS + RHS times. If op is "Xor" for example then the combined
321/// operation is equivalent to X if LHS + RHS is odd, or 0 if LHS + RHS is even;
322/// the routine returns 1 in LHS in the first case, and 0 in LHS in the second.
323static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) {
324 // If we were working with infinite precision arithmetic then the combined
325 // weight would be LHS + RHS. But we are using finite precision arithmetic,
326 // and the APInt sum LHS + RHS may not be correct if it wraps (it is correct
327 // for nilpotent operations and addition, but not for idempotent operations
328 // and multiplication), so it is important to correctly reduce the combined
329 // weight back into range if wrapping would be wrong.
330
331 // If RHS is zero then the weight didn't change.
332 if (RHS.isMinValue())
333 return;
334 // If LHS is zero then the combined weight is RHS.
335 if (LHS.isMinValue()) {
336 LHS = RHS;
337 return;
338 }
339 // From this point on we know that neither LHS nor RHS is zero.
340
341 if (Instruction::isIdempotent(Opcode)) {
342 // Idempotent means X op X === X, so any non-zero weight is equivalent to a
343 // weight of 1. Keeping weights at zero or one also means that wrapping is
344 // not a problem.
345 assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
346 return; // Return a weight of 1.
347 }
348 if (Instruction::isNilpotent(Opcode)) {
349 // Nilpotent means X op X === 0, so reduce weights modulo 2.
350 assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
351 LHS = 0; // 1 + 1 === 0 modulo 2.
352 return;
353 }
354 if (Opcode == Instruction::Add || Opcode == Instruction::FAdd) {
355 // TODO: Reduce the weight by exploiting nsw/nuw?
356 LHS += RHS;
357 return;
358 }
359
360 assert((Opcode == Instruction::Mul || Opcode == Instruction::FMul) &&
361 "Unknown associative operation!");
362 unsigned Bitwidth = LHS.getBitWidth();
363 // If CM is the Carmichael number then a weight W satisfying W >= CM+Bitwidth
364 // can be replaced with W-CM. That's because x^W=x^(W-CM) for every Bitwidth
365 // bit number x, since either x is odd in which case x^CM = 1, or x is even in
366 // which case both x^W and x^(W - CM) are zero. By subtracting off multiples
367 // of CM like this weights can always be reduced to the range [0, CM+Bitwidth)
368 // which by a happy accident means that they can always be represented using
369 // Bitwidth bits.
370 // TODO: Reduce the weight by exploiting nsw/nuw? (Could do much better than
371 // the Carmichael number).
372 if (Bitwidth > 3) {
373 /// CM - The value of Carmichael's lambda function.
374 APInt CM = APInt::getOneBitSet(Bitwidth, CarmichaelShift(Bitwidth));
375 // Any weight W >= Threshold can be replaced with W - CM.
376 APInt Threshold = CM + Bitwidth;
377 assert(LHS.ult(Threshold) && RHS.ult(Threshold) && "Weights not reduced!");
378 // For Bitwidth 4 or more the following sum does not overflow.
379 LHS += RHS;
380 while (LHS.uge(Threshold))
381 LHS -= CM;
382 } else {
383 // To avoid problems with overflow do everything the same as above but using
384 // a larger type.
385 unsigned CM = 1U << CarmichaelShift(Bitwidth);
386 unsigned Threshold = CM + Bitwidth;
387 assert(LHS.getZExtValue() < Threshold && RHS.getZExtValue() < Threshold &&
388 "Weights not reduced!");
389 unsigned Total = LHS.getZExtValue() + RHS.getZExtValue();
390 while (Total >= Threshold)
391 Total -= CM;
392 LHS = Total;
393 }
394}
395
396using RepeatedValue = std::pair<Value*, APInt>;
397
398/// Given an associative binary expression, return the leaf
399/// nodes in Ops along with their weights (how many times the leaf occurs). The
400/// original expression is the same as
401/// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times
402/// op
403/// (Ops[1].first op Ops[1].first op ... Ops[1].first) <- Ops[1].second times
404/// op
405/// ...
406/// op
407/// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times
408///
409/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct.
410///
411/// This routine may modify the function, in which case it returns 'true'. The
412/// changes it makes may well be destructive, changing the value computed by 'I'
413/// to something completely different. Thus if the routine returns 'true' then
414/// you MUST either replace I with a new expression computed from the Ops array,
415/// or use RewriteExprTree to put the values back in.
416///
417/// A leaf node is either not a binary operation of the same kind as the root
418/// node 'I' (i.e. is not a binary operator at all, or is, but with a different
419/// opcode), or is the same kind of binary operator but has a use which either
420/// does not belong to the expression, or does belong to the expression but is
421/// a leaf node. Every leaf node has at least one use that is a non-leaf node
422/// of the expression, while for non-leaf nodes (except for the root 'I') every
423/// use is a non-leaf node of the expression.
424///
425/// For example:
426/// expression graph node names
427///
428/// + | I
429/// / \ |
430/// + + | A, B
431/// / \ / \ |
432/// * + * | C, D, E
433/// / \ / \ / \ |
434/// + * | F, G
435///
436/// The leaf nodes are C, E, F and G. The Ops array will contain (maybe not in
437/// that order) (C, 1), (E, 1), (F, 2), (G, 2).
438///
439/// The expression is maximal: if some instruction is a binary operator of the
440/// same kind as 'I', and all of its uses are non-leaf nodes of the expression,
441/// then the instruction also belongs to the expression, is not a leaf node of
442/// it, and its operands also belong to the expression (but may be leaf nodes).
443///
444/// NOTE: This routine will set operands of non-leaf non-root nodes to undef in
445/// order to ensure that every non-root node in the expression has *exactly one*
446/// use by a non-leaf node of the expression. This destruction means that the
447/// caller MUST either replace 'I' with a new expression or use something like
448/// RewriteExprTree to put the values back in if the routine indicates that it
449/// made a change by returning 'true'.
450///
451/// In the above example either the right operand of A or the left operand of B
452/// will be replaced by undef. If it is B's operand then this gives:
453///
454/// + | I
455/// / \ |
456/// + + | A, B - operand of B replaced with undef
457/// / \ \ |
458/// * + * | C, D, E
459/// / \ / \ / \ |
460/// + * | F, G
461///
462/// Note that such undef operands can only be reached by passing through 'I'.
463/// For example, if you visit operands recursively starting from a leaf node
464/// then you will never see such an undef operand unless you get back to 'I',
465/// which requires passing through a phi node.
466///
467/// Note that this routine may also mutate binary operators of the wrong type
468/// that have all uses inside the expression (i.e. only used by non-leaf nodes
469/// of the expression) if it can turn them into binary operators of the right
470/// type and thus make the expression bigger.
474 bool &HasNUW) {
475 assert((isa<UnaryOperator>(I) || isa<BinaryOperator>(I)) &&
476 "Expected a UnaryOperator or BinaryOperator!");
477 LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
478 unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits();
479 unsigned Opcode = I->getOpcode();
480 assert(I->isAssociative() && I->isCommutative() &&
481 "Expected an associative and commutative operation!");
482
483 // Visit all operands of the expression, keeping track of their weight (the
484 // number of paths from the expression root to the operand, or if you like
485 // the number of times that operand occurs in the linearized expression).
486 // For example, if I = X + A, where X = A + B, then I, X and B have weight 1
487 // while A has weight two.
488
489 // Worklist of non-leaf nodes (their operands are in the expression too) along
490 // with their weights, representing a certain number of paths to the operator.
491 // If an operator occurs in the worklist multiple times then we found multiple
492 // ways to get to it.
493 SmallVector<std::pair<Instruction*, APInt>, 8> Worklist; // (Op, Weight)
494 Worklist.push_back(std::make_pair(I, APInt(Bitwidth, 1)));
495 bool Changed = false;
496
497 // Leaves of the expression are values that either aren't the right kind of
498 // operation (eg: a constant, or a multiply in an add tree), or are, but have
499 // some uses that are not inside the expression. For example, in I = X + X,
500 // X = A + B, the value X has two uses (by I) that are in the expression. If
501 // X has any other uses, for example in a return instruction, then we consider
502 // X to be a leaf, and won't analyze it further. When we first visit a value,
503 // if it has more than one use then at first we conservatively consider it to
504 // be a leaf. Later, as the expression is explored, we may discover some more
505 // uses of the value from inside the expression. If all uses turn out to be
506 // from within the expression (and the value is a binary operator of the right
507 // kind) then the value is no longer considered to be a leaf, and its operands
508 // are explored.
509
510 // Leaves - Keeps track of the set of putative leaves as well as the number of
511 // paths to each leaf seen so far.
512 using LeafMap = DenseMap<Value *, APInt>;
513 LeafMap Leaves; // Leaf -> Total weight so far.
514 SmallVector<Value *, 8> LeafOrder; // Ensure deterministic leaf output order.
515
516#ifndef NDEBUG
517 SmallPtrSet<Value *, 8> Visited; // For checking the iteration scheme.
518#endif
519 while (!Worklist.empty()) {
520 std::pair<Instruction*, APInt> P = Worklist.pop_back_val();
521 I = P.first; // We examine the operands of this binary operator.
522
523 if (isa<OverflowingBinaryOperator>(I))
524 HasNUW &= I->hasNoUnsignedWrap();
525
526 for (unsigned OpIdx = 0; OpIdx < I->getNumOperands(); ++OpIdx) { // Visit operands.
527 Value *Op = I->getOperand(OpIdx);
528 APInt Weight = P.second; // Number of paths to this operand.
529 LLVM_DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
530 assert(!Op->use_empty() && "No uses, so how did we get to it?!");
531
532 // If this is a binary operation of the right kind with only one use then
533 // add its operands to the expression.
534 if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
535 assert(Visited.insert(Op).second && "Not first visit!");
536 LLVM_DEBUG(dbgs() << "DIRECT ADD: " << *Op << " (" << Weight << ")\n");
537 Worklist.push_back(std::make_pair(BO, Weight));
538 continue;
539 }
540
541 // Appears to be a leaf. Is the operand already in the set of leaves?
542 LeafMap::iterator It = Leaves.find(Op);
543 if (It == Leaves.end()) {
544 // Not in the leaf map. Must be the first time we saw this operand.
545 assert(Visited.insert(Op).second && "Not first visit!");
546 if (!Op->hasOneUse()) {
547 // This value has uses not accounted for by the expression, so it is
548 // not safe to modify. Mark it as being a leaf.
550 << "ADD USES LEAF: " << *Op << " (" << Weight << ")\n");
551 LeafOrder.push_back(Op);
552 Leaves[Op] = Weight;
553 continue;
554 }
555 // No uses outside the expression, try morphing it.
556 } else {
557 // Already in the leaf map.
558 assert(It != Leaves.end() && Visited.count(Op) &&
559 "In leaf map but not visited!");
560
561 // Update the number of paths to the leaf.
562 IncorporateWeight(It->second, Weight, Opcode);
563
564#if 0 // TODO: Re-enable once PR13021 is fixed.
565 // The leaf already has one use from inside the expression. As we want
566 // exactly one such use, drop this new use of the leaf.
567 assert(!Op->hasOneUse() && "Only one use, but we got here twice!");
568 I->setOperand(OpIdx, UndefValue::get(I->getType()));
569 Changed = true;
570
571 // If the leaf is a binary operation of the right kind and we now see
572 // that its multiple original uses were in fact all by nodes belonging
573 // to the expression, then no longer consider it to be a leaf and add
574 // its operands to the expression.
575 if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
576 LLVM_DEBUG(dbgs() << "UNLEAF: " << *Op << " (" << It->second << ")\n");
577 Worklist.push_back(std::make_pair(BO, It->second));
578 Leaves.erase(It);
579 continue;
580 }
581#endif
582
583 // If we still have uses that are not accounted for by the expression
584 // then it is not safe to modify the value.
585 if (!Op->hasOneUse())
586 continue;
587
588 // No uses outside the expression, try morphing it.
589 Weight = It->second;
590 Leaves.erase(It); // Since the value may be morphed below.
591 }
592
593 // At this point we have a value which, first of all, is not a binary
594 // expression of the right kind, and secondly, is only used inside the
595 // expression. This means that it can safely be modified. See if we
596 // can usefully morph it into an expression of the right kind.
597 assert((!isa<Instruction>(Op) ||
598 cast<Instruction>(Op)->getOpcode() != Opcode
599 || (isa<FPMathOperator>(Op) &&
600 !hasFPAssociativeFlags(cast<Instruction>(Op)))) &&
601 "Should have been handled above!");
602 assert(Op->hasOneUse() && "Has uses outside the expression tree!");
603
604 // If this is a multiply expression, turn any internal negations into
605 // multiplies by -1 so they can be reassociated. Add any users of the
606 // newly created multiplication by -1 to the redo list, so any
607 // reassociation opportunities that are exposed will be reassociated
608 // further.
609 Instruction *Neg;
610 if (((Opcode == Instruction::Mul && match(Op, m_Neg(m_Value()))) ||
611 (Opcode == Instruction::FMul && match(Op, m_FNeg(m_Value())))) &&
612 match(Op, m_Instruction(Neg))) {
614 << "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
616 LLVM_DEBUG(dbgs() << *Mul << '\n');
617 Worklist.push_back(std::make_pair(Mul, Weight));
618 for (User *U : Mul->users()) {
619 if (BinaryOperator *UserBO = dyn_cast<BinaryOperator>(U))
620 ToRedo.insert(UserBO);
621 }
622 ToRedo.insert(Neg);
623 Changed = true;
624 continue;
625 }
626
627 // Failed to morph into an expression of the right type. This really is
628 // a leaf.
629 LLVM_DEBUG(dbgs() << "ADD LEAF: " << *Op << " (" << Weight << ")\n");
630 assert(!isReassociableOp(Op, Opcode) && "Value was morphed?");
631 LeafOrder.push_back(Op);
632 Leaves[Op] = Weight;
633 }
634 }
635
636 // The leaves, repeated according to their weights, represent the linearized
637 // form of the expression.
638 for (Value *V : LeafOrder) {
639 LeafMap::iterator It = Leaves.find(V);
640 if (It == Leaves.end())
641 // Node initially thought to be a leaf wasn't.
642 continue;
643 assert(!isReassociableOp(V, Opcode) && "Shouldn't be a leaf!");
644 APInt Weight = It->second;
645 if (Weight.isMinValue())
646 // Leaf already output or weight reduction eliminated it.
647 continue;
648 // Ensure the leaf is only output once.
649 It->second = 0;
650 Ops.push_back(std::make_pair(V, Weight));
651 }
652
653 // For nilpotent operations or addition there may be no operands, for example
654 // because the expression was "X xor X" or consisted of 2^Bitwidth additions:
655 // in both cases the weight reduces to 0 causing the value to be skipped.
656 if (Ops.empty()) {
657 Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType());
658 assert(Identity && "Associative operation without identity!");
659 Ops.emplace_back(Identity, APInt(Bitwidth, 1));
660 }
661
662 return Changed;
663}
664
665/// Now that the operands for this expression tree are
666/// linearized and optimized, emit them in-order.
667void ReassociatePass::RewriteExprTree(BinaryOperator *I,
669 bool HasNUW) {
670 assert(Ops.size() > 1 && "Single values should be used directly!");
671
672 // Since our optimizations should never increase the number of operations, the
673 // new expression can usually be written reusing the existing binary operators
674 // from the original expression tree, without creating any new instructions,
675 // though the rewritten expression may have a completely different topology.
676 // We take care to not change anything if the new expression will be the same
677 // as the original. If more than trivial changes (like commuting operands)
678 // were made then we are obliged to clear out any optional subclass data like
679 // nsw flags.
680
681 /// NodesToRewrite - Nodes from the original expression available for writing
682 /// the new expression into.
683 SmallVector<BinaryOperator*, 8> NodesToRewrite;
684 unsigned Opcode = I->getOpcode();
686
687 /// NotRewritable - The operands being written will be the leaves of the new
688 /// expression and must not be used as inner nodes (via NodesToRewrite) by
689 /// mistake. Inner nodes are always reassociable, and usually leaves are not
690 /// (if they were they would have been incorporated into the expression and so
691 /// would not be leaves), so most of the time there is no danger of this. But
692 /// in rare cases a leaf may become reassociable if an optimization kills uses
693 /// of it, or it may momentarily become reassociable during rewriting (below)
694 /// due it being removed as an operand of one of its uses. Ensure that misuse
695 /// of leaf nodes as inner nodes cannot occur by remembering all of the future
696 /// leaves and refusing to reuse any of them as inner nodes.
697 SmallPtrSet<Value*, 8> NotRewritable;
698 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
699 NotRewritable.insert(Ops[i].Op);
700
701 // ExpressionChangedStart - Non-null if the rewritten expression differs from
702 // the original in some non-trivial way, requiring the clearing of optional
703 // flags. Flags are cleared from the operator in ExpressionChangedStart up to
704 // ExpressionChangedEnd inclusive.
705 BinaryOperator *ExpressionChangedStart = nullptr,
706 *ExpressionChangedEnd = nullptr;
707 for (unsigned i = 0; ; ++i) {
708 // The last operation (which comes earliest in the IR) is special as both
709 // operands will come from Ops, rather than just one with the other being
710 // a subexpression.
711 if (i+2 == Ops.size()) {
712 Value *NewLHS = Ops[i].Op;
713 Value *NewRHS = Ops[i+1].Op;
714 Value *OldLHS = Op->getOperand(0);
715 Value *OldRHS = Op->getOperand(1);
716
717 if (NewLHS == OldLHS && NewRHS == OldRHS)
718 // Nothing changed, leave it alone.
719 break;
720
721 if (NewLHS == OldRHS && NewRHS == OldLHS) {
722 // The order of the operands was reversed. Swap them.
723 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
724 Op->swapOperands();
725 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
726 MadeChange = true;
727 ++NumChanged;
728 break;
729 }
730
731 // The new operation differs non-trivially from the original. Overwrite
732 // the old operands with the new ones.
733 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
734 if (NewLHS != OldLHS) {
735 BinaryOperator *BO = isReassociableOp(OldLHS, Opcode);
736 if (BO && !NotRewritable.count(BO))
737 NodesToRewrite.push_back(BO);
738 Op->setOperand(0, NewLHS);
739 }
740 if (NewRHS != OldRHS) {
741 BinaryOperator *BO = isReassociableOp(OldRHS, Opcode);
742 if (BO && !NotRewritable.count(BO))
743 NodesToRewrite.push_back(BO);
744 Op->setOperand(1, NewRHS);
745 }
746 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
747
748 ExpressionChangedStart = Op;
749 if (!ExpressionChangedEnd)
750 ExpressionChangedEnd = Op;
751 MadeChange = true;
752 ++NumChanged;
753
754 break;
755 }
756
757 // Not the last operation. The left-hand side will be a sub-expression
758 // while the right-hand side will be the current element of Ops.
759 Value *NewRHS = Ops[i].Op;
760 if (NewRHS != Op->getOperand(1)) {
761 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
762 if (NewRHS == Op->getOperand(0)) {
763 // The new right-hand side was already present as the left operand. If
764 // we are lucky then swapping the operands will sort out both of them.
765 Op->swapOperands();
766 } else {
767 // Overwrite with the new right-hand side.
768 BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode);
769 if (BO && !NotRewritable.count(BO))
770 NodesToRewrite.push_back(BO);
771 Op->setOperand(1, NewRHS);
772 ExpressionChangedStart = Op;
773 if (!ExpressionChangedEnd)
774 ExpressionChangedEnd = Op;
775 }
776 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
777 MadeChange = true;
778 ++NumChanged;
779 }
780
781 // Now deal with the left-hand side. If this is already an operation node
782 // from the original expression then just rewrite the rest of the expression
783 // into it.
784 BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode);
785 if (BO && !NotRewritable.count(BO)) {
786 Op = BO;
787 continue;
788 }
789
790 // Otherwise, grab a spare node from the original expression and use that as
791 // the left-hand side. If there are no nodes left then the optimizers made
792 // an expression with more nodes than the original! This usually means that
793 // they did something stupid but it might mean that the problem was just too
794 // hard (finding the mimimal number of multiplications needed to realize a
795 // multiplication expression is NP-complete). Whatever the reason, smart or
796 // stupid, create a new node if there are none left.
797 BinaryOperator *NewOp;
798 if (NodesToRewrite.empty()) {
799 Constant *Undef = UndefValue::get(I->getType());
800 NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode), Undef,
801 Undef, "", I->getIterator());
802 if (isa<FPMathOperator>(NewOp))
803 NewOp->setFastMathFlags(I->getFastMathFlags());
804 } else {
805 NewOp = NodesToRewrite.pop_back_val();
806 }
807
808 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
809 Op->setOperand(0, NewOp);
810 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
811 ExpressionChangedStart = Op;
812 if (!ExpressionChangedEnd)
813 ExpressionChangedEnd = Op;
814 MadeChange = true;
815 ++NumChanged;
816 Op = NewOp;
817 }
818
819 // If the expression changed non-trivially then clear out all subclass data
820 // starting from the operator specified in ExpressionChanged, and compactify
821 // the operators to just before the expression root to guarantee that the
822 // expression tree is dominated by all of Ops.
823 if (ExpressionChangedStart) {
824 bool ClearFlags = true;
825 do {
826 // Preserve flags.
827 if (ClearFlags) {
828 if (isa<FPMathOperator>(I)) {
829 FastMathFlags Flags = I->getFastMathFlags();
830 ExpressionChangedStart->clearSubclassOptionalData();
831 ExpressionChangedStart->setFastMathFlags(Flags);
832 } else {
833 ExpressionChangedStart->clearSubclassOptionalData();
834 // Note that it doesn't hold for mul if one of the operands is zero.
835 // TODO: We can preserve NUW flag if we prove that all mul operands
836 // are non-zero.
837 if (HasNUW && ExpressionChangedStart->getOpcode() == Instruction::Add)
838 ExpressionChangedStart->setHasNoUnsignedWrap();
839 }
840 }
841
842 if (ExpressionChangedStart == ExpressionChangedEnd)
843 ClearFlags = false;
844 if (ExpressionChangedStart == I)
845 break;
846
847 // Discard any debug info related to the expressions that has changed (we
848 // can leave debug info related to the root and any operation that didn't
849 // change, since the result of the expression tree should be the same
850 // even after reassociation).
851 if (ClearFlags)
852 replaceDbgUsesWithUndef(ExpressionChangedStart);
853
854 ExpressionChangedStart->moveBefore(I);
855 ExpressionChangedStart =
856 cast<BinaryOperator>(*ExpressionChangedStart->user_begin());
857 } while (true);
858 }
859
860 // Throw away any left over nodes from the original expression.
861 for (unsigned i = 0, e = NodesToRewrite.size(); i != e; ++i)
862 RedoInsts.insert(NodesToRewrite[i]);
863}
864
865/// Insert instructions before the instruction pointed to by BI,
866/// that computes the negative version of the value specified. The negative
867/// version of the value is returned, and BI is left pointing at the instruction
868/// that should be processed next by the reassociation pass.
869/// Also add intermediate instructions to the redo list that are modified while
870/// pushing the negates through adds. These will be revisited to see if
871/// additional opportunities have been exposed.
874 if (auto *C = dyn_cast<Constant>(V)) {
875 const DataLayout &DL = BI->getModule()->getDataLayout();
876 Constant *Res = C->getType()->isFPOrFPVectorTy()
877 ? ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL)
879 if (Res)
880 return Res;
881 }
882
883 // We are trying to expose opportunity for reassociation. One of the things
884 // that we want to do to achieve this is to push a negation as deep into an
885 // expression chain as possible, to expose the add instructions. In practice,
886 // this means that we turn this:
887 // X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D
888 // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate
889 // the constants. We assume that instcombine will clean up the mess later if
890 // we introduce tons of unnecessary negation instructions.
891 //
892 if (BinaryOperator *I =
893 isReassociableOp(V, Instruction::Add, Instruction::FAdd)) {
894 // Push the negates through the add.
895 I->setOperand(0, NegateValue(I->getOperand(0), BI, ToRedo));
896 I->setOperand(1, NegateValue(I->getOperand(1), BI, ToRedo));
897 if (I->getOpcode() == Instruction::Add) {
898 I->setHasNoUnsignedWrap(false);
899 I->setHasNoSignedWrap(false);
900 }
901
902 // We must move the add instruction here, because the neg instructions do
903 // not dominate the old add instruction in general. By moving it, we are
904 // assured that the neg instructions we just inserted dominate the
905 // instruction we are about to insert after them.
906 //
907 I->moveBefore(BI);
908 I->setName(I->getName()+".neg");
909
910 // Add the intermediate negates to the redo list as processing them later
911 // could expose more reassociating opportunities.
912 ToRedo.insert(I);
913 return I;
914 }
915
916 // Okay, we need to materialize a negated version of V with an instruction.
917 // Scan the use lists of V to see if we have one already.
918 for (User *U : V->users()) {
919 if (!match(U, m_Neg(m_Value())) && !match(U, m_FNeg(m_Value())))
920 continue;
921
922 // We found one! Now we have to make sure that the definition dominates
923 // this use. We do this by moving it to the entry block (if it is a
924 // non-instruction value) or right after the definition. These negates will
925 // be zapped by reassociate later, so we don't need much finesse here.
926 Instruction *TheNeg = dyn_cast<Instruction>(U);
927
928 // We can't safely propagate a vector zero constant with poison/undef lanes.
929 Constant *C;
930 if (match(TheNeg, m_BinOp(m_Constant(C), m_Value())) &&
931 C->containsUndefOrPoisonElement())
932 continue;
933
934 // Verify that the negate is in this function, V might be a constant expr.
935 if (!TheNeg ||
936 TheNeg->getParent()->getParent() != BI->getParent()->getParent())
937 continue;
938
939 BasicBlock::iterator InsertPt;
940 if (Instruction *InstInput = dyn_cast<Instruction>(V)) {
941 auto InsertPtOpt = InstInput->getInsertionPointAfterDef();
942 if (!InsertPtOpt)
943 continue;
944 InsertPt = *InsertPtOpt;
945 } else {
946 InsertPt = TheNeg->getFunction()
947 ->getEntryBlock()
949 ->getIterator();
950 }
951
952 TheNeg->moveBefore(*InsertPt->getParent(), InsertPt);
953 if (TheNeg->getOpcode() == Instruction::Sub) {
954 TheNeg->setHasNoUnsignedWrap(false);
955 TheNeg->setHasNoSignedWrap(false);
956 } else {
957 TheNeg->andIRFlags(BI);
958 }
959 ToRedo.insert(TheNeg);
960 return TheNeg;
961 }
962
963 // Insert a 'neg' instruction that subtracts the value from zero to get the
964 // negation.
965 Instruction *NewNeg =
966 CreateNeg(V, V->getName() + ".neg", BI->getIterator(), BI);
967 ToRedo.insert(NewNeg);
968 return NewNeg;
969}
970
971// See if this `or` looks like an load widening reduction, i.e. that it
972// consists of an `or`/`shl`/`zext`/`load` nodes only. Note that we don't
973// ensure that the pattern is *really* a load widening reduction,
974// we do not ensure that it can really be replaced with a widened load,
975// only that it mostly looks like one.
979
980 auto Enqueue = [&](Value *V) {
981 auto *I = dyn_cast<Instruction>(V);
982 // Each node of an `or` reduction must be an instruction,
983 if (!I)
984 return false; // Node is certainly not part of an `or` load reduction.
985 // Only process instructions we have never processed before.
986 if (Visited.insert(I).second)
987 Worklist.emplace_back(I);
988 return true; // Will need to look at parent nodes.
989 };
990
991 if (!Enqueue(Or))
992 return false; // Not an `or` reduction pattern.
993
994 while (!Worklist.empty()) {
995 auto *I = Worklist.pop_back_val();
996
997 // Okay, which instruction is this node?
998 switch (I->getOpcode()) {
999 case Instruction::Or:
1000 // Got an `or` node. That's fine, just recurse into it's operands.
1001 for (Value *Op : I->operands())
1002 if (!Enqueue(Op))
1003 return false; // Not an `or` reduction pattern.
1004 continue;
1005
1006 case Instruction::Shl:
1007 case Instruction::ZExt:
1008 // `shl`/`zext` nodes are fine, just recurse into their base operand.
1009 if (!Enqueue(I->getOperand(0)))
1010 return false; // Not an `or` reduction pattern.
1011 continue;
1012
1013 case Instruction::Load:
1014 // Perfect, `load` node means we've reached an edge of the graph.
1015 continue;
1016
1017 default: // Unknown node.
1018 return false; // Not an `or` reduction pattern.
1019 }
1020 }
1021
1022 return true;
1023}
1024
1025/// Return true if it may be profitable to convert this (X|Y) into (X+Y).
1027 // Don't bother to convert this up unless either the LHS is an associable add
1028 // or subtract or mul or if this is only used by one of the above.
1029 // This is only a compile-time improvement, it is not needed for correctness!
1030 auto isInteresting = [](Value *V) {
1031 for (auto Op : {Instruction::Add, Instruction::Sub, Instruction::Mul,
1032 Instruction::Shl})
1033 if (isReassociableOp(V, Op))
1034 return true;
1035 return false;
1036 };
1037
1038 if (any_of(Or->operands(), isInteresting))
1039 return true;
1040
1041 Value *VB = Or->user_back();
1042 if (Or->hasOneUse() && isInteresting(VB))
1043 return true;
1044
1045 return false;
1046}
1047
1048/// If we have (X|Y), and iff X and Y have no common bits set,
1049/// transform this into (X+Y) to allow arithmetics reassociation.
1051 // Convert an or into an add.
1052 BinaryOperator *New = CreateAdd(Or->getOperand(0), Or->getOperand(1), "",
1053 Or->getIterator(), Or);
1054 New->setHasNoSignedWrap();
1055 New->setHasNoUnsignedWrap();
1056 New->takeName(Or);
1057
1058 // Everyone now refers to the add instruction.
1059 Or->replaceAllUsesWith(New);
1060 New->setDebugLoc(Or->getDebugLoc());
1061
1062 LLVM_DEBUG(dbgs() << "Converted or into an add: " << *New << '\n');
1063 return New;
1064}
1065
1066/// Return true if we should break up this subtract of X-Y into (X + -Y).
1068 // If this is a negation, we can't split it up!
1069 if (match(Sub, m_Neg(m_Value())) || match(Sub, m_FNeg(m_Value())))
1070 return false;
1071
1072 // Don't breakup X - undef.
1073 if (isa<UndefValue>(Sub->getOperand(1)))
1074 return false;
1075
1076 // Don't bother to break this up unless either the LHS is an associable add or
1077 // subtract or if this is only used by one.
1078 Value *V0 = Sub->getOperand(0);
1079 if (isReassociableOp(V0, Instruction::Add, Instruction::FAdd) ||
1080 isReassociableOp(V0, Instruction::Sub, Instruction::FSub))
1081 return true;
1082 Value *V1 = Sub->getOperand(1);
1083 if (isReassociableOp(V1, Instruction::Add, Instruction::FAdd) ||
1084 isReassociableOp(V1, Instruction::Sub, Instruction::FSub))
1085 return true;
1086 Value *VB = Sub->user_back();
1087 if (Sub->hasOneUse() &&
1088 (isReassociableOp(VB, Instruction::Add, Instruction::FAdd) ||
1089 isReassociableOp(VB, Instruction::Sub, Instruction::FSub)))
1090 return true;
1091
1092 return false;
1093}
1094
1095/// If we have (X-Y), and if either X is an add, or if this is only used by an
1096/// add, transform this into (X+(0-Y)) to promote better reassociation.
1099 // Convert a subtract into an add and a neg instruction. This allows sub
1100 // instructions to be commuted with other add instructions.
1101 //
1102 // Calculate the negative value of Operand 1 of the sub instruction,
1103 // and set it as the RHS of the add instruction we just made.
1104 Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo);
1105 BinaryOperator *New =
1106 CreateAdd(Sub->getOperand(0), NegVal, "", Sub->getIterator(), Sub);
1107 Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op.
1108 Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op.
1109 New->takeName(Sub);
1110
1111 // Everyone now refers to the add instruction.
1112 Sub->replaceAllUsesWith(New);
1113 New->setDebugLoc(Sub->getDebugLoc());
1114
1115 LLVM_DEBUG(dbgs() << "Negated: " << *New << '\n');
1116 return New;
1117}
1118
1119/// If this is a shift of a reassociable multiply or is used by one, change
1120/// this into a multiply by a constant to assist with further reassociation.
1122 Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
1123 auto *SA = cast<ConstantInt>(Shl->getOperand(1));
1124 MulCst = ConstantExpr::getShl(MulCst, SA);
1125
1126 BinaryOperator *Mul = BinaryOperator::CreateMul(Shl->getOperand(0), MulCst,
1127 "", Shl->getIterator());
1128 Shl->setOperand(0, PoisonValue::get(Shl->getType())); // Drop use of op.
1129 Mul->takeName(Shl);
1130
1131 // Everyone now refers to the mul instruction.
1132 Shl->replaceAllUsesWith(Mul);
1133 Mul->setDebugLoc(Shl->getDebugLoc());
1134
1135 // We can safely preserve the nuw flag in all cases. It's also safe to turn a
1136 // nuw nsw shl into a nuw nsw mul. However, nsw in isolation requires special
1137 // handling. It can be preserved as long as we're not left shifting by
1138 // bitwidth - 1.
1139 bool NSW = cast<BinaryOperator>(Shl)->hasNoSignedWrap();
1140 bool NUW = cast<BinaryOperator>(Shl)->hasNoUnsignedWrap();
1141 unsigned BitWidth = Shl->getType()->getIntegerBitWidth();
1142 if (NSW && (NUW || SA->getValue().ult(BitWidth - 1)))
1143 Mul->setHasNoSignedWrap(true);
1144 Mul->setHasNoUnsignedWrap(NUW);
1145 return Mul;
1146}
1147
1148/// Scan backwards and forwards among values with the same rank as element i
1149/// to see if X exists. If X does not exist, return i. This is useful when
1150/// scanning for 'x' when we see '-x' because they both get the same rank.
1152 unsigned i, Value *X) {
1153 unsigned XRank = Ops[i].Rank;
1154 unsigned e = Ops.size();
1155 for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j) {
1156 if (Ops[j].Op == X)
1157 return j;
1158 if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op))
1159 if (Instruction *I2 = dyn_cast<Instruction>(X))
1160 if (I1->isIdenticalTo(I2))
1161 return j;
1162 }
1163 // Scan backwards.
1164 for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j) {
1165 if (Ops[j].Op == X)
1166 return j;
1167 if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op))
1168 if (Instruction *I2 = dyn_cast<Instruction>(X))
1169 if (I1->isIdenticalTo(I2))
1170 return j;
1171 }
1172 return i;
1173}
1174
1175/// Emit a tree of add instructions, summing Ops together
1176/// and returning the result. Insert the tree before I.
1179 if (Ops.size() == 1) return Ops.back();
1180
1181 Value *V1 = Ops.pop_back_val();
1182 Value *V2 = EmitAddTreeOfValues(It, Ops);
1183 return CreateAdd(V2, V1, "reass.add", It, &*It);
1184}
1185
1186/// If V is an expression tree that is a multiplication sequence,
1187/// and if this sequence contains a multiply by Factor,
1188/// remove Factor from the tree and return the new tree.
1189Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) {
1190 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
1191 if (!BO)
1192 return nullptr;
1193
1195 bool HasNUW = true;
1196 MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts, HasNUW);
1198 Factors.reserve(Tree.size());
1199 for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
1200 RepeatedValue E = Tree[i];
1201 Factors.append(E.second.getZExtValue(),
1202 ValueEntry(getRank(E.first), E.first));
1203 }
1204
1205 bool FoundFactor = false;
1206 bool NeedsNegate = false;
1207 for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
1208 if (Factors[i].Op == Factor) {
1209 FoundFactor = true;
1210 Factors.erase(Factors.begin()+i);
1211 break;
1212 }
1213
1214 // If this is a negative version of this factor, remove it.
1215 if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor)) {
1216 if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op))
1217 if (FC1->getValue() == -FC2->getValue()) {
1218 FoundFactor = NeedsNegate = true;
1219 Factors.erase(Factors.begin()+i);
1220 break;
1221 }
1222 } else if (ConstantFP *FC1 = dyn_cast<ConstantFP>(Factor)) {
1223 if (ConstantFP *FC2 = dyn_cast<ConstantFP>(Factors[i].Op)) {
1224 const APFloat &F1 = FC1->getValueAPF();
1225 APFloat F2(FC2->getValueAPF());
1226 F2.changeSign();
1227 if (F1 == F2) {
1228 FoundFactor = NeedsNegate = true;
1229 Factors.erase(Factors.begin() + i);
1230 break;
1231 }
1232 }
1233 }
1234 }
1235
1236 if (!FoundFactor) {
1237 // Make sure to restore the operands to the expression tree.
1238 RewriteExprTree(BO, Factors, HasNUW);
1239 return nullptr;
1240 }
1241
1242 BasicBlock::iterator InsertPt = ++BO->getIterator();
1243
1244 // If this was just a single multiply, remove the multiply and return the only
1245 // remaining operand.
1246 if (Factors.size() == 1) {
1247 RedoInsts.insert(BO);
1248 V = Factors[0].Op;
1249 } else {
1250 RewriteExprTree(BO, Factors, HasNUW);
1251 V = BO;
1252 }
1253
1254 if (NeedsNegate)
1255 V = CreateNeg(V, "neg", InsertPt, BO);
1256
1257 return V;
1258}
1259
1260/// If V is a single-use multiply, recursively add its operands as factors,
1261/// otherwise add V to the list of factors.
1262///
1263/// Ops is the top-level list of add operands we're trying to factor.
1265 SmallVectorImpl<Value*> &Factors) {
1266 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
1267 if (!BO) {
1268 Factors.push_back(V);
1269 return;
1270 }
1271
1272 // Otherwise, add the LHS and RHS to the list of factors.
1275}
1276
1277/// Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
1278/// This optimizes based on identities. If it can be reduced to a single Value,
1279/// it is returned, otherwise the Ops list is mutated as necessary.
1280static Value *OptimizeAndOrXor(unsigned Opcode,
1282 // Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
1283 // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1.
1284 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1285 // First, check for X and ~X in the operand list.
1286 assert(i < Ops.size());
1287 Value *X;
1288 if (match(Ops[i].Op, m_Not(m_Value(X)))) { // Cannot occur for ^.
1289 unsigned FoundX = FindInOperandList(Ops, i, X);
1290 if (FoundX != i) {
1291 if (Opcode == Instruction::And) // ...&X&~X = 0
1292 return Constant::getNullValue(X->getType());
1293
1294 if (Opcode == Instruction::Or) // ...|X|~X = -1
1295 return Constant::getAllOnesValue(X->getType());
1296 }
1297 }
1298
1299 // Next, check for duplicate pairs of values, which we assume are next to
1300 // each other, due to our sorting criteria.
1301 assert(i < Ops.size());
1302 if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) {
1303 if (Opcode == Instruction::And || Opcode == Instruction::Or) {
1304 // Drop duplicate values for And and Or.
1305 Ops.erase(Ops.begin()+i);
1306 --i; --e;
1307 ++NumAnnihil;
1308 continue;
1309 }
1310
1311 // Drop pairs of values for Xor.
1312 assert(Opcode == Instruction::Xor);
1313 if (e == 2)
1314 return Constant::getNullValue(Ops[0].Op->getType());
1315
1316 // Y ^ X^X -> Y
1317 Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
1318 i -= 1; e -= 2;
1319 ++NumAnnihil;
1320 }
1321 }
1322 return nullptr;
1323}
1324
1325/// Helper function of CombineXorOpnd(). It creates a bitwise-and
1326/// instruction with the given two operands, and return the resulting
1327/// instruction. There are two special cases: 1) if the constant operand is 0,
1328/// it will return NULL. 2) if the constant is ~0, the symbolic operand will
1329/// be returned.
1331 const APInt &ConstOpnd) {
1332 if (ConstOpnd.isZero())
1333 return nullptr;
1334
1335 if (ConstOpnd.isAllOnes())
1336 return Opnd;
1337
1338 Instruction *I = BinaryOperator::CreateAnd(
1339 Opnd, ConstantInt::get(Opnd->getType(), ConstOpnd), "and.ra",
1340 InsertBefore);
1341 I->setDebugLoc(InsertBefore->getDebugLoc());
1342 return I;
1343}
1344
1345// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
1346// into "R ^ C", where C would be 0, and R is a symbolic value.
1347//
1348// If it was successful, true is returned, and the "R" and "C" is returned
1349// via "Res" and "ConstOpnd", respectively; otherwise, false is returned,
1350// and both "Res" and "ConstOpnd" remain unchanged.
1351bool ReassociatePass::CombineXorOpnd(BasicBlock::iterator It, XorOpnd *Opnd1,
1352 APInt &ConstOpnd, Value *&Res) {
1353 // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2
1354 // = ((x | c1) ^ c1) ^ (c1 ^ c2)
1355 // = (x & ~c1) ^ (c1 ^ c2)
1356 // It is useful only when c1 == c2.
1357 if (!Opnd1->isOrExpr() || Opnd1->getConstPart().isZero())
1358 return false;
1359
1360 if (!Opnd1->getValue()->hasOneUse())
1361 return false;
1362
1363 const APInt &C1 = Opnd1->getConstPart();
1364 if (C1 != ConstOpnd)
1365 return false;
1366
1367 Value *X = Opnd1->getSymbolicPart();
1368 Res = createAndInstr(It, X, ~C1);
1369 // ConstOpnd was C2, now C1 ^ C2.
1370 ConstOpnd ^= C1;
1371
1372 if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
1373 RedoInsts.insert(T);
1374 return true;
1375}
1376
1377// Helper function of OptimizeXor(). It tries to simplify
1378// "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a
1379// symbolic value.
1380//
1381// If it was successful, true is returned, and the "R" and "C" is returned
1382// via "Res" and "ConstOpnd", respectively (If the entire expression is
1383// evaluated to a constant, the Res is set to NULL); otherwise, false is
1384// returned, and both "Res" and "ConstOpnd" remain unchanged.
1385bool ReassociatePass::CombineXorOpnd(BasicBlock::iterator It, XorOpnd *Opnd1,
1386 XorOpnd *Opnd2, APInt &ConstOpnd,
1387 Value *&Res) {
1388 Value *X = Opnd1->getSymbolicPart();
1389 if (X != Opnd2->getSymbolicPart())
1390 return false;
1391
1392 // This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.)
1393 int DeadInstNum = 1;
1394 if (Opnd1->getValue()->hasOneUse())
1395 DeadInstNum++;
1396 if (Opnd2->getValue()->hasOneUse())
1397 DeadInstNum++;
1398
1399 // Xor-Rule 2:
1400 // (x | c1) ^ (x & c2)
1401 // = (x|c1) ^ (x&c2) ^ (c1 ^ c1) = ((x|c1) ^ c1) ^ (x & c2) ^ c1
1402 // = (x & ~c1) ^ (x & c2) ^ c1 // Xor-Rule 1
1403 // = (x & c3) ^ c1, where c3 = ~c1 ^ c2 // Xor-rule 3
1404 //
1405 if (Opnd1->isOrExpr() != Opnd2->isOrExpr()) {
1406 if (Opnd2->isOrExpr())
1407 std::swap(Opnd1, Opnd2);
1408
1409 const APInt &C1 = Opnd1->getConstPart();
1410 const APInt &C2 = Opnd2->getConstPart();
1411 APInt C3((~C1) ^ C2);
1412
1413 // Do not increase code size!
1414 if (!C3.isZero() && !C3.isAllOnes()) {
1415 int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
1416 if (NewInstNum > DeadInstNum)
1417 return false;
1418 }
1419
1420 Res = createAndInstr(It, X, C3);
1421 ConstOpnd ^= C1;
1422 } else if (Opnd1->isOrExpr()) {
1423 // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
1424 //
1425 const APInt &C1 = Opnd1->getConstPart();
1426 const APInt &C2 = Opnd2->getConstPart();
1427 APInt C3 = C1 ^ C2;
1428
1429 // Do not increase code size
1430 if (!C3.isZero() && !C3.isAllOnes()) {
1431 int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
1432 if (NewInstNum > DeadInstNum)
1433 return false;
1434 }
1435
1436 Res = createAndInstr(It, X, C3);
1437 ConstOpnd ^= C3;
1438 } else {
1439 // Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
1440 //
1441 const APInt &C1 = Opnd1->getConstPart();
1442 const APInt &C2 = Opnd2->getConstPart();
1443 APInt C3 = C1 ^ C2;
1444 Res = createAndInstr(It, X, C3);
1445 }
1446
1447 // Put the original operands in the Redo list; hope they will be deleted
1448 // as dead code.
1449 if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
1450 RedoInsts.insert(T);
1451 if (Instruction *T = dyn_cast<Instruction>(Opnd2->getValue()))
1452 RedoInsts.insert(T);
1453
1454 return true;
1455}
1456
1457/// Optimize a series of operands to an 'xor' instruction. If it can be reduced
1458/// to a single Value, it is returned, otherwise the Ops list is mutated as
1459/// necessary.
1460Value *ReassociatePass::OptimizeXor(Instruction *I,
1462 if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops))
1463 return V;
1464
1465 if (Ops.size() == 1)
1466 return nullptr;
1467
1469 SmallVector<XorOpnd*, 8> OpndPtrs;
1470 Type *Ty = Ops[0].Op->getType();
1471 APInt ConstOpnd(Ty->getScalarSizeInBits(), 0);
1472
1473 // Step 1: Convert ValueEntry to XorOpnd
1474 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1475 Value *V = Ops[i].Op;
1476 const APInt *C;
1477 // TODO: Support non-splat vectors.
1478 if (match(V, m_APInt(C))) {
1479 ConstOpnd ^= *C;
1480 } else {
1481 XorOpnd O(V);
1482 O.setSymbolicRank(getRank(O.getSymbolicPart()));
1483 Opnds.push_back(O);
1484 }
1485 }
1486
1487 // NOTE: From this point on, do *NOT* add/delete element to/from "Opnds".
1488 // It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate
1489 // the "OpndPtrs" as well. For the similar reason, do not fuse this loop
1490 // with the previous loop --- the iterator of the "Opnds" may be invalidated
1491 // when new elements are added to the vector.
1492 for (unsigned i = 0, e = Opnds.size(); i != e; ++i)
1493 OpndPtrs.push_back(&Opnds[i]);
1494
1495 // Step 2: Sort the Xor-Operands in a way such that the operands containing
1496 // the same symbolic value cluster together. For instance, the input operand
1497 // sequence ("x | 123", "y & 456", "x & 789") will be sorted into:
1498 // ("x | 123", "x & 789", "y & 456").
1499 //
1500 // The purpose is twofold:
1501 // 1) Cluster together the operands sharing the same symbolic-value.
1502 // 2) Operand having smaller symbolic-value-rank is permuted earlier, which
1503 // could potentially shorten crital path, and expose more loop-invariants.
1504 // Note that values' rank are basically defined in RPO order (FIXME).
1505 // So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
1506 // than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
1507 // "z" in the order of X-Y-Z is better than any other orders.
1508 llvm::stable_sort(OpndPtrs, [](XorOpnd *LHS, XorOpnd *RHS) {
1509 return LHS->getSymbolicRank() < RHS->getSymbolicRank();
1510 });
1511
1512 // Step 3: Combine adjacent operands
1513 XorOpnd *PrevOpnd = nullptr;
1514 bool Changed = false;
1515 for (unsigned i = 0, e = Opnds.size(); i < e; i++) {
1516 XorOpnd *CurrOpnd = OpndPtrs[i];
1517 // The combined value
1518 Value *CV;
1519
1520 // Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
1521 if (!ConstOpnd.isZero() &&
1522 CombineXorOpnd(I->getIterator(), CurrOpnd, ConstOpnd, CV)) {
1523 Changed = true;
1524 if (CV)
1525 *CurrOpnd = XorOpnd(CV);
1526 else {
1527 CurrOpnd->Invalidate();
1528 continue;
1529 }
1530 }
1531
1532 if (!PrevOpnd || CurrOpnd->getSymbolicPart() != PrevOpnd->getSymbolicPart()) {
1533 PrevOpnd = CurrOpnd;
1534 continue;
1535 }
1536
1537 // step 3.2: When previous and current operands share the same symbolic
1538 // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"
1539 if (CombineXorOpnd(I->getIterator(), CurrOpnd, PrevOpnd, ConstOpnd, CV)) {
1540 // Remove previous operand
1541 PrevOpnd->Invalidate();
1542 if (CV) {
1543 *CurrOpnd = XorOpnd(CV);
1544 PrevOpnd = CurrOpnd;
1545 } else {
1546 CurrOpnd->Invalidate();
1547 PrevOpnd = nullptr;
1548 }
1549 Changed = true;
1550 }
1551 }
1552
1553 // Step 4: Reassemble the Ops
1554 if (Changed) {
1555 Ops.clear();
1556 for (const XorOpnd &O : Opnds) {
1557 if (O.isInvalid())
1558 continue;
1559 ValueEntry VE(getRank(O.getValue()), O.getValue());
1560 Ops.push_back(VE);
1561 }
1562 if (!ConstOpnd.isZero()) {
1563 Value *C = ConstantInt::get(Ty, ConstOpnd);
1564 ValueEntry VE(getRank(C), C);
1565 Ops.push_back(VE);
1566 }
1567 unsigned Sz = Ops.size();
1568 if (Sz == 1)
1569 return Ops.back().Op;
1570 if (Sz == 0) {
1571 assert(ConstOpnd.isZero());
1572 return ConstantInt::get(Ty, ConstOpnd);
1573 }
1574 }
1575
1576 return nullptr;
1577}
1578
1579/// Optimize a series of operands to an 'add' instruction. This
1580/// optimizes based on identities. If it can be reduced to a single Value, it
1581/// is returned, otherwise the Ops list is mutated as necessary.
1582Value *ReassociatePass::OptimizeAdd(Instruction *I,
1584 // Scan the operand lists looking for X and -X pairs. If we find any, we
1585 // can simplify expressions like X+-X == 0 and X+~X ==-1. While we're at it,
1586 // scan for any
1587 // duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z.
1588
1589 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1590 Value *TheOp = Ops[i].Op;
1591 // Check to see if we've seen this operand before. If so, we factor all
1592 // instances of the operand together. Due to our sorting criteria, we know
1593 // that these need to be next to each other in the vector.
1594 if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) {
1595 // Rescan the list, remove all instances of this operand from the expr.
1596 unsigned NumFound = 0;
1597 do {
1598 Ops.erase(Ops.begin()+i);
1599 ++NumFound;
1600 } while (i != Ops.size() && Ops[i].Op == TheOp);
1601
1602 LLVM_DEBUG(dbgs() << "\nFACTORING [" << NumFound << "]: " << *TheOp
1603 << '\n');
1604 ++NumFactor;
1605
1606 // Insert a new multiply.
1607 Type *Ty = TheOp->getType();
1608 Constant *C = Ty->isIntOrIntVectorTy() ?
1609 ConstantInt::get(Ty, NumFound) : ConstantFP::get(Ty, NumFound);
1610 Instruction *Mul = CreateMul(TheOp, C, "factor", I->getIterator(), I);
1611
1612 // Now that we have inserted a multiply, optimize it. This allows us to
1613 // handle cases that require multiple factoring steps, such as this:
1614 // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
1615 RedoInsts.insert(Mul);
1616
1617 // If every add operand was a duplicate, return the multiply.
1618 if (Ops.empty())
1619 return Mul;
1620
1621 // Otherwise, we had some input that didn't have the dupe, such as
1622 // "A + A + B" -> "A*2 + B". Add the new multiply to the list of
1623 // things being added by this operation.
1624 Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul));
1625
1626 --i;
1627 e = Ops.size();
1628 continue;
1629 }
1630
1631 // Check for X and -X or X and ~X in the operand list.
1632 Value *X;
1633 if (!match(TheOp, m_Neg(m_Value(X))) && !match(TheOp, m_Not(m_Value(X))) &&
1634 !match(TheOp, m_FNeg(m_Value(X))))
1635 continue;
1636
1637 unsigned FoundX = FindInOperandList(Ops, i, X);
1638 if (FoundX == i)
1639 continue;
1640
1641 // Remove X and -X from the operand list.
1642 if (Ops.size() == 2 &&
1643 (match(TheOp, m_Neg(m_Value())) || match(TheOp, m_FNeg(m_Value()))))
1644 return Constant::getNullValue(X->getType());
1645
1646 // Remove X and ~X from the operand list.
1647 if (Ops.size() == 2 && match(TheOp, m_Not(m_Value())))
1648 return Constant::getAllOnesValue(X->getType());
1649
1650 Ops.erase(Ops.begin()+i);
1651 if (i < FoundX)
1652 --FoundX;
1653 else
1654 --i; // Need to back up an extra one.
1655 Ops.erase(Ops.begin()+FoundX);
1656 ++NumAnnihil;
1657 --i; // Revisit element.
1658 e -= 2; // Removed two elements.
1659
1660 // if X and ~X we append -1 to the operand list.
1661 if (match(TheOp, m_Not(m_Value()))) {
1662 Value *V = Constant::getAllOnesValue(X->getType());
1663 Ops.insert(Ops.end(), ValueEntry(getRank(V), V));
1664 e += 1;
1665 }
1666 }
1667
1668 // Scan the operand list, checking to see if there are any common factors
1669 // between operands. Consider something like A*A+A*B*C+D. We would like to
1670 // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies.
1671 // To efficiently find this, we count the number of times a factor occurs
1672 // for any ADD operands that are MULs.
1673 DenseMap<Value*, unsigned> FactorOccurrences;
1674
1675 // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4)
1676 // where they are actually the same multiply.
1677 unsigned MaxOcc = 0;
1678 Value *MaxOccVal = nullptr;
1679 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1680 BinaryOperator *BOp =
1681 isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
1682 if (!BOp)
1683 continue;
1684
1685 // Compute all of the factors of this added value.
1686 SmallVector<Value*, 8> Factors;
1687 FindSingleUseMultiplyFactors(BOp, Factors);
1688 assert(Factors.size() > 1 && "Bad linearize!");
1689
1690 // Add one to FactorOccurrences for each unique factor in this op.
1691 SmallPtrSet<Value*, 8> Duplicates;
1692 for (Value *Factor : Factors) {
1693 if (!Duplicates.insert(Factor).second)
1694 continue;
1695
1696 unsigned Occ = ++FactorOccurrences[Factor];
1697 if (Occ > MaxOcc) {
1698 MaxOcc = Occ;
1699 MaxOccVal = Factor;
1700 }
1701
1702 // If Factor is a negative constant, add the negated value as a factor
1703 // because we can percolate the negate out. Watch for minint, which
1704 // cannot be positivified.
1705 if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) {
1706 if (CI->isNegative() && !CI->isMinValue(true)) {
1707 Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
1708 if (!Duplicates.insert(Factor).second)
1709 continue;
1710 unsigned Occ = ++FactorOccurrences[Factor];
1711 if (Occ > MaxOcc) {
1712 MaxOcc = Occ;
1713 MaxOccVal = Factor;
1714 }
1715 }
1716 } else if (ConstantFP *CF = dyn_cast<ConstantFP>(Factor)) {
1717 if (CF->isNegative()) {
1718 APFloat F(CF->getValueAPF());
1719 F.changeSign();
1720 Factor = ConstantFP::get(CF->getContext(), F);
1721 if (!Duplicates.insert(Factor).second)
1722 continue;
1723 unsigned Occ = ++FactorOccurrences[Factor];
1724 if (Occ > MaxOcc) {
1725 MaxOcc = Occ;
1726 MaxOccVal = Factor;
1727 }
1728 }
1729 }
1730 }
1731 }
1732
1733 // If any factor occurred more than one time, we can pull it out.
1734 if (MaxOcc > 1) {
1735 LLVM_DEBUG(dbgs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal
1736 << '\n');
1737 ++NumFactor;
1738
1739 // Create a new instruction that uses the MaxOccVal twice. If we don't do
1740 // this, we could otherwise run into situations where removing a factor
1741 // from an expression will drop a use of maxocc, and this can cause
1742 // RemoveFactorFromExpression on successive values to behave differently.
1743 Instruction *DummyInst =
1744 I->getType()->isIntOrIntVectorTy()
1745 ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal)
1746 : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal);
1747
1749 for (unsigned i = 0; i != Ops.size(); ++i) {
1750 // Only try to remove factors from expressions we're allowed to.
1751 BinaryOperator *BOp =
1752 isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
1753 if (!BOp)
1754 continue;
1755
1756 if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
1757 // The factorized operand may occur several times. Convert them all in
1758 // one fell swoop.
1759 for (unsigned j = Ops.size(); j != i;) {
1760 --j;
1761 if (Ops[j].Op == Ops[i].Op) {
1762 NewMulOps.push_back(V);
1763 Ops.erase(Ops.begin()+j);
1764 }
1765 }
1766 --i;
1767 }
1768 }
1769
1770 // No need for extra uses anymore.
1771 DummyInst->deleteValue();
1772
1773 unsigned NumAddedValues = NewMulOps.size();
1774 Value *V = EmitAddTreeOfValues(I->getIterator(), NewMulOps);
1775
1776 // Now that we have inserted the add tree, optimize it. This allows us to
1777 // handle cases that require multiple factoring steps, such as this:
1778 // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C))
1779 assert(NumAddedValues > 1 && "Each occurrence should contribute a value");
1780 (void)NumAddedValues;
1781 if (Instruction *VI = dyn_cast<Instruction>(V))
1782 RedoInsts.insert(VI);
1783
1784 // Create the multiply.
1785 Instruction *V2 = CreateMul(V, MaxOccVal, "reass.mul", I->getIterator(), I);
1786
1787 // Rerun associate on the multiply in case the inner expression turned into
1788 // a multiply. We want to make sure that we keep things in canonical form.
1789 RedoInsts.insert(V2);
1790
1791 // If every add operand included the factor (e.g. "A*B + A*C"), then the
1792 // entire result expression is just the multiply "A*(B+C)".
1793 if (Ops.empty())
1794 return V2;
1795
1796 // Otherwise, we had some input that didn't have the factor, such as
1797 // "A*B + A*C + D" -> "A*(B+C) + D". Add the new multiply to the list of
1798 // things being added by this operation.
1799 Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
1800 }
1801
1802 return nullptr;
1803}
1804
1805/// Build up a vector of value/power pairs factoring a product.
1806///
1807/// Given a series of multiplication operands, build a vector of factors and
1808/// the powers each is raised to when forming the final product. Sort them in
1809/// the order of descending power.
1810///
1811/// (x*x) -> [(x, 2)]
1812/// ((x*x)*x) -> [(x, 3)]
1813/// ((((x*y)*x)*y)*x) -> [(x, 3), (y, 2)]
1814///
1815/// \returns Whether any factors have a power greater than one.
1817 SmallVectorImpl<Factor> &Factors) {
1818 // FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this.
1819 // Compute the sum of powers of simplifiable factors.
1820 unsigned FactorPowerSum = 0;
1821 for (unsigned Idx = 1, Size = Ops.size(); Idx < Size; ++Idx) {
1822 Value *Op = Ops[Idx-1].Op;
1823
1824 // Count the number of occurrences of this value.
1825 unsigned Count = 1;
1826 for (; Idx < Size && Ops[Idx].Op == Op; ++Idx)
1827 ++Count;
1828 // Track for simplification all factors which occur 2 or more times.
1829 if (Count > 1)
1830 FactorPowerSum += Count;
1831 }
1832
1833 // We can only simplify factors if the sum of the powers of our simplifiable
1834 // factors is 4 or higher. When that is the case, we will *always* have
1835 // a simplification. This is an important invariant to prevent cyclicly
1836 // trying to simplify already minimal formations.
1837 if (FactorPowerSum < 4)
1838 return false;
1839
1840 // Now gather the simplifiable factors, removing them from Ops.
1841 FactorPowerSum = 0;
1842 for (unsigned Idx = 1; Idx < Ops.size(); ++Idx) {
1843 Value *Op = Ops[Idx-1].Op;
1844
1845 // Count the number of occurrences of this value.
1846 unsigned Count = 1;
1847 for (; Idx < Ops.size() && Ops[Idx].Op == Op; ++Idx)
1848 ++Count;
1849 if (Count == 1)
1850 continue;
1851 // Move an even number of occurrences to Factors.
1852 Count &= ~1U;
1853 Idx -= Count;
1854 FactorPowerSum += Count;
1855 Factors.push_back(Factor(Op, Count));
1856 Ops.erase(Ops.begin()+Idx, Ops.begin()+Idx+Count);
1857 }
1858
1859 // None of the adjustments above should have reduced the sum of factor powers
1860 // below our mininum of '4'.
1861 assert(FactorPowerSum >= 4);
1862
1863 llvm::stable_sort(Factors, [](const Factor &LHS, const Factor &RHS) {
1864 return LHS.Power > RHS.Power;
1865 });
1866 return true;
1867}
1868
1869/// Build a tree of multiplies, computing the product of Ops.
1872 if (Ops.size() == 1)
1873 return Ops.back();
1874
1875 Value *LHS = Ops.pop_back_val();
1876 do {
1877 if (LHS->getType()->isIntOrIntVectorTy())
1878 LHS = Builder.CreateMul(LHS, Ops.pop_back_val());
1879 else
1880 LHS = Builder.CreateFMul(LHS, Ops.pop_back_val());
1881 } while (!Ops.empty());
1882
1883 return LHS;
1884}
1885
1886/// Build a minimal multiplication DAG for (a^x)*(b^y)*(c^z)*...
1887///
1888/// Given a vector of values raised to various powers, where no two values are
1889/// equal and the powers are sorted in decreasing order, compute the minimal
1890/// DAG of multiplies to compute the final product, and return that product
1891/// value.
1892Value *
1893ReassociatePass::buildMinimalMultiplyDAG(IRBuilderBase &Builder,
1894 SmallVectorImpl<Factor> &Factors) {
1895 assert(Factors[0].Power);
1896 SmallVector<Value *, 4> OuterProduct;
1897 for (unsigned LastIdx = 0, Idx = 1, Size = Factors.size();
1898 Idx < Size && Factors[Idx].Power > 0; ++Idx) {
1899 if (Factors[Idx].Power != Factors[LastIdx].Power) {
1900 LastIdx = Idx;
1901 continue;
1902 }
1903
1904 // We want to multiply across all the factors with the same power so that
1905 // we can raise them to that power as a single entity. Build a mini tree
1906 // for that.
1907 SmallVector<Value *, 4> InnerProduct;
1908 InnerProduct.push_back(Factors[LastIdx].Base);
1909 do {
1910 InnerProduct.push_back(Factors[Idx].Base);
1911 ++Idx;
1912 } while (Idx < Size && Factors[Idx].Power == Factors[LastIdx].Power);
1913
1914 // Reset the base value of the first factor to the new expression tree.
1915 // We'll remove all the factors with the same power in a second pass.
1916 Value *M = Factors[LastIdx].Base = buildMultiplyTree(Builder, InnerProduct);
1917 if (Instruction *MI = dyn_cast<Instruction>(M))
1918 RedoInsts.insert(MI);
1919
1920 LastIdx = Idx;
1921 }
1922 // Unique factors with equal powers -- we've folded them into the first one's
1923 // base.
1924 Factors.erase(std::unique(Factors.begin(), Factors.end(),
1925 [](const Factor &LHS, const Factor &RHS) {
1926 return LHS.Power == RHS.Power;
1927 }),
1928 Factors.end());
1929
1930 // Iteratively collect the base of each factor with an add power into the
1931 // outer product, and halve each power in preparation for squaring the
1932 // expression.
1933 for (Factor &F : Factors) {
1934 if (F.Power & 1)
1935 OuterProduct.push_back(F.Base);
1936 F.Power >>= 1;
1937 }
1938 if (Factors[0].Power) {
1939 Value *SquareRoot = buildMinimalMultiplyDAG(Builder, Factors);
1940 OuterProduct.push_back(SquareRoot);
1941 OuterProduct.push_back(SquareRoot);
1942 }
1943 if (OuterProduct.size() == 1)
1944 return OuterProduct.front();
1945
1946 Value *V = buildMultiplyTree(Builder, OuterProduct);
1947 return V;
1948}
1949
1950Value *ReassociatePass::OptimizeMul(BinaryOperator *I,
1952 // We can only optimize the multiplies when there is a chain of more than
1953 // three, such that a balanced tree might require fewer total multiplies.
1954 if (Ops.size() < 4)
1955 return nullptr;
1956
1957 // Try to turn linear trees of multiplies without other uses of the
1958 // intermediate stages into minimal multiply DAGs with perfect sub-expression
1959 // re-use.
1960 SmallVector<Factor, 4> Factors;
1961 if (!collectMultiplyFactors(Ops, Factors))
1962 return nullptr; // All distinct factors, so nothing left for us to do.
1963
1964 IRBuilder<> Builder(I);
1965 // The reassociate transformation for FP operations is performed only
1966 // if unsafe algebra is permitted by FastMathFlags. Propagate those flags
1967 // to the newly generated operations.
1968 if (auto FPI = dyn_cast<FPMathOperator>(I))
1969 Builder.setFastMathFlags(FPI->getFastMathFlags());
1970
1971 Value *V = buildMinimalMultiplyDAG(Builder, Factors);
1972 if (Ops.empty())
1973 return V;
1974
1975 ValueEntry NewEntry = ValueEntry(getRank(V), V);
1976 Ops.insert(llvm::lower_bound(Ops, NewEntry), NewEntry);
1977 return nullptr;
1978}
1979
1980Value *ReassociatePass::OptimizeExpression(BinaryOperator *I,
1982 // Now that we have the linearized expression tree, try to optimize it.
1983 // Start by folding any constants that we found.
1984 const DataLayout &DL = I->getModule()->getDataLayout();
1985 Constant *Cst = nullptr;
1986 unsigned Opcode = I->getOpcode();
1987 while (!Ops.empty()) {
1988 if (auto *C = dyn_cast<Constant>(Ops.back().Op)) {
1989 if (!Cst) {
1990 Ops.pop_back();
1991 Cst = C;
1992 continue;
1993 }
1994 if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, C, Cst, DL)) {
1995 Ops.pop_back();
1996 Cst = Res;
1997 continue;
1998 }
1999 }
2000 break;
2001 }
2002 // If there was nothing but constants then we are done.
2003 if (Ops.empty())
2004 return Cst;
2005
2006 // Put the combined constant back at the end of the operand list, except if
2007 // there is no point. For example, an add of 0 gets dropped here, while a
2008 // multiplication by zero turns the whole expression into zero.
2009 if (Cst && Cst != ConstantExpr::getBinOpIdentity(Opcode, I->getType())) {
2010 if (Cst == ConstantExpr::getBinOpAbsorber(Opcode, I->getType()))
2011 return Cst;
2012 Ops.push_back(ValueEntry(0, Cst));
2013 }
2014
2015 if (Ops.size() == 1) return Ops[0].Op;
2016
2017 // Handle destructive annihilation due to identities between elements in the
2018 // argument list here.
2019 unsigned NumOps = Ops.size();
2020 switch (Opcode) {
2021 default: break;
2022 case Instruction::And:
2023 case Instruction::Or:
2024 if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
2025 return Result;
2026 break;
2027
2028 case Instruction::Xor:
2029 if (Value *Result = OptimizeXor(I, Ops))
2030 return Result;
2031 break;
2032
2033 case Instruction::Add:
2034 case Instruction::FAdd:
2035 if (Value *Result = OptimizeAdd(I, Ops))
2036 return Result;
2037 break;
2038
2039 case Instruction::Mul:
2040 case Instruction::FMul:
2041 if (Value *Result = OptimizeMul(I, Ops))
2042 return Result;
2043 break;
2044 }
2045
2046 if (Ops.size() != NumOps)
2047 return OptimizeExpression(I, Ops);
2048 return nullptr;
2049}
2050
2051// Remove dead instructions and if any operands are trivially dead add them to
2052// Insts so they will be removed as well.
2053void ReassociatePass::RecursivelyEraseDeadInsts(Instruction *I,
2054 OrderedSet &Insts) {
2055 assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
2056 SmallVector<Value *, 4> Ops(I->operands());
2057 ValueRankMap.erase(I);
2058 Insts.remove(I);
2059 RedoInsts.remove(I);
2061 I->eraseFromParent();
2062 for (auto *Op : Ops)
2063 if (Instruction *OpInst = dyn_cast<Instruction>(Op))
2064 if (OpInst->use_empty())
2065 Insts.insert(OpInst);
2066}
2067
2068/// Zap the given instruction, adding interesting operands to the work list.
2069void ReassociatePass::EraseInst(Instruction *I) {
2070 assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
2071 LLVM_DEBUG(dbgs() << "Erasing dead inst: "; I->dump());
2072
2073 SmallVector<Value *, 8> Ops(I->operands());
2074 // Erase the dead instruction.
2075 ValueRankMap.erase(I);
2076 RedoInsts.remove(I);
2078 I->eraseFromParent();
2079 // Optimize its operands.
2080 SmallPtrSet<Instruction *, 8> Visited; // Detect self-referential nodes.
2081 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2082 if (Instruction *Op = dyn_cast<Instruction>(Ops[i])) {
2083 // If this is a node in an expression tree, climb to the expression root
2084 // and add that since that's where optimization actually happens.
2085 unsigned Opcode = Op->getOpcode();
2086 while (Op->hasOneUse() && Op->user_back()->getOpcode() == Opcode &&
2087 Visited.insert(Op).second)
2088 Op = Op->user_back();
2089
2090 // The instruction we're going to push may be coming from a
2091 // dead block, and Reassociate skips the processing of unreachable
2092 // blocks because it's a waste of time and also because it can
2093 // lead to infinite loop due to LLVM's non-standard definition
2094 // of dominance.
2095 if (ValueRankMap.contains(Op))
2096 RedoInsts.insert(Op);
2097 }
2098
2099 MadeChange = true;
2100}
2101
2102/// Recursively analyze an expression to build a list of instructions that have
2103/// negative floating-point constant operands. The caller can then transform
2104/// the list to create positive constants for better reassociation and CSE.
2106 SmallVectorImpl<Instruction *> &Candidates) {
2107 // Handle only one-use instructions. Combining negations does not justify
2108 // replicating instructions.
2109 Instruction *I;
2110 if (!match(V, m_OneUse(m_Instruction(I))))
2111 return;
2112
2113 // Handle expressions of multiplications and divisions.
2114 // TODO: This could look through floating-point casts.
2115 const APFloat *C;
2116 switch (I->getOpcode()) {
2117 case Instruction::FMul:
2118 // Not expecting non-canonical code here. Bail out and wait.
2119 if (match(I->getOperand(0), m_Constant()))
2120 break;
2121
2122 if (match(I->getOperand(1), m_APFloat(C)) && C->isNegative()) {
2123 Candidates.push_back(I);
2124 LLVM_DEBUG(dbgs() << "FMul with negative constant: " << *I << '\n');
2125 }
2126 getNegatibleInsts(I->getOperand(0), Candidates);
2127 getNegatibleInsts(I->getOperand(1), Candidates);
2128 break;
2129 case Instruction::FDiv:
2130 // Not expecting non-canonical code here. Bail out and wait.
2131 if (match(I->getOperand(0), m_Constant()) &&
2132 match(I->getOperand(1), m_Constant()))
2133 break;
2134
2135 if ((match(I->getOperand(0), m_APFloat(C)) && C->isNegative()) ||
2136 (match(I->getOperand(1), m_APFloat(C)) && C->isNegative())) {
2137 Candidates.push_back(I);
2138 LLVM_DEBUG(dbgs() << "FDiv with negative constant: " << *I << '\n');
2139 }
2140 getNegatibleInsts(I->getOperand(0), Candidates);
2141 getNegatibleInsts(I->getOperand(1), Candidates);
2142 break;
2143 default:
2144 break;
2145 }
2146}
2147
2148/// Given an fadd/fsub with an operand that is a one-use instruction
2149/// (the fadd/fsub), try to change negative floating-point constants into
2150/// positive constants to increase potential for reassociation and CSE.
2151Instruction *ReassociatePass::canonicalizeNegFPConstantsForOp(Instruction *I,
2152 Instruction *Op,
2153 Value *OtherOp) {
2154 assert((I->getOpcode() == Instruction::FAdd ||
2155 I->getOpcode() == Instruction::FSub) && "Expected fadd/fsub");
2156
2157 // Collect instructions with negative FP constants from the subtree that ends
2158 // in Op.
2160 getNegatibleInsts(Op, Candidates);
2161 if (Candidates.empty())
2162 return nullptr;
2163
2164 // Don't canonicalize x + (-Constant * y) -> x - (Constant * y), if the
2165 // resulting subtract will be broken up later. This can get us into an
2166 // infinite loop during reassociation.
2167 bool IsFSub = I->getOpcode() == Instruction::FSub;
2168 bool NeedsSubtract = !IsFSub && Candidates.size() % 2 == 1;
2169 if (NeedsSubtract && ShouldBreakUpSubtract(I))
2170 return nullptr;
2171
2172 for (Instruction *Negatible : Candidates) {
2173 const APFloat *C;
2174 if (match(Negatible->getOperand(0), m_APFloat(C))) {
2175 assert(!match(Negatible->getOperand(1), m_Constant()) &&
2176 "Expecting only 1 constant operand");
2177 assert(C->isNegative() && "Expected negative FP constant");
2178 Negatible->setOperand(0, ConstantFP::get(Negatible->getType(), abs(*C)));
2179 MadeChange = true;
2180 }
2181 if (match(Negatible->getOperand(1), m_APFloat(C))) {
2182 assert(!match(Negatible->getOperand(0), m_Constant()) &&
2183 "Expecting only 1 constant operand");
2184 assert(C->isNegative() && "Expected negative FP constant");
2185 Negatible->setOperand(1, ConstantFP::get(Negatible->getType(), abs(*C)));
2186 MadeChange = true;
2187 }
2188 }
2189 assert(MadeChange == true && "Negative constant candidate was not changed");
2190
2191 // Negations cancelled out.
2192 if (Candidates.size() % 2 == 0)
2193 return I;
2194
2195 // Negate the final operand in the expression by flipping the opcode of this
2196 // fadd/fsub.
2197 assert(Candidates.size() % 2 == 1 && "Expected odd number");
2198 IRBuilder<> Builder(I);
2199 Value *NewInst = IsFSub ? Builder.CreateFAddFMF(OtherOp, Op, I)
2200 : Builder.CreateFSubFMF(OtherOp, Op, I);
2201 I->replaceAllUsesWith(NewInst);
2202 RedoInsts.insert(I);
2203 return dyn_cast<Instruction>(NewInst);
2204}
2205
2206/// Canonicalize expressions that contain a negative floating-point constant
2207/// of the following form:
2208/// OtherOp + (subtree) -> OtherOp {+/-} (canonical subtree)
2209/// (subtree) + OtherOp -> OtherOp {+/-} (canonical subtree)
2210/// OtherOp - (subtree) -> OtherOp {+/-} (canonical subtree)
2211///
2212/// The fadd/fsub opcode may be switched to allow folding a negation into the
2213/// input instruction.
2214Instruction *ReassociatePass::canonicalizeNegFPConstants(Instruction *I) {
2215 LLVM_DEBUG(dbgs() << "Combine negations for: " << *I << '\n');
2216 Value *X;
2217 Instruction *Op;
2219 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2220 I = R;
2222 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2223 I = R;
2225 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2226 I = R;
2227 return I;
2228}
2229
2230/// Inspect and optimize the given instruction. Note that erasing
2231/// instructions is not allowed.
2232void ReassociatePass::OptimizeInst(Instruction *I) {
2233 // Only consider operations that we understand.
2234 if (!isa<UnaryOperator>(I) && !isa<BinaryOperator>(I))
2235 return;
2236
2237 if (I->getOpcode() == Instruction::Shl && isa<ConstantInt>(I->getOperand(1)))
2238 // If an operand of this shift is a reassociable multiply, or if the shift
2239 // is used by a reassociable multiply or add, turn into a multiply.
2240 if (isReassociableOp(I->getOperand(0), Instruction::Mul) ||
2241 (I->hasOneUse() &&
2242 (isReassociableOp(I->user_back(), Instruction::Mul) ||
2243 isReassociableOp(I->user_back(), Instruction::Add)))) {
2245 RedoInsts.insert(I);
2246 MadeChange = true;
2247 I = NI;
2248 }
2249
2250 // Commute binary operators, to canonicalize the order of their operands.
2251 // This can potentially expose more CSE opportunities, and makes writing other
2252 // transformations simpler.
2253 if (I->isCommutative())
2254 canonicalizeOperands(I);
2255
2256 // Canonicalize negative constants out of expressions.
2257 if (Instruction *Res = canonicalizeNegFPConstants(I))
2258 I = Res;
2259
2260 // Don't optimize floating-point instructions unless they have the
2261 // appropriate FastMathFlags for reassociation enabled.
2262 if (isa<FPMathOperator>(I) && !hasFPAssociativeFlags(I))
2263 return;
2264
2265 // Do not reassociate boolean (i1) expressions. We want to preserve the
2266 // original order of evaluation for short-circuited comparisons that
2267 // SimplifyCFG has folded to AND/OR expressions. If the expression
2268 // is not further optimized, it is likely to be transformed back to a
2269 // short-circuited form for code gen, and the source order may have been
2270 // optimized for the most likely conditions.
2271 if (I->getType()->isIntegerTy(1))
2272 return;
2273
2274 // If this is a bitwise or instruction of operands
2275 // with no common bits set, convert it to X+Y.
2276 if (I->getOpcode() == Instruction::Or &&
2278 (cast<PossiblyDisjointInst>(I)->isDisjoint() ||
2279 haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1),
2280 SimplifyQuery(I->getModule()->getDataLayout(),
2281 /*DT=*/nullptr, /*AC=*/nullptr, I)))) {
2283 RedoInsts.insert(I);
2284 MadeChange = true;
2285 I = NI;
2286 }
2287
2288 // If this is a subtract instruction which is not already in negate form,
2289 // see if we can convert it to X+-Y.
2290 if (I->getOpcode() == Instruction::Sub) {
2291 if (ShouldBreakUpSubtract(I)) {
2292 Instruction *NI = BreakUpSubtract(I, RedoInsts);
2293 RedoInsts.insert(I);
2294 MadeChange = true;
2295 I = NI;
2296 } else if (match(I, m_Neg(m_Value()))) {
2297 // Otherwise, this is a negation. See if the operand is a multiply tree
2298 // and if this is not an inner node of a multiply tree.
2299 if (isReassociableOp(I->getOperand(1), Instruction::Mul) &&
2300 (!I->hasOneUse() ||
2301 !isReassociableOp(I->user_back(), Instruction::Mul))) {
2303 // If the negate was simplified, revisit the users to see if we can
2304 // reassociate further.
2305 for (User *U : NI->users()) {
2306 if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
2307 RedoInsts.insert(Tmp);
2308 }
2309 RedoInsts.insert(I);
2310 MadeChange = true;
2311 I = NI;
2312 }
2313 }
2314 } else if (I->getOpcode() == Instruction::FNeg ||
2315 I->getOpcode() == Instruction::FSub) {
2316 if (ShouldBreakUpSubtract(I)) {
2317 Instruction *NI = BreakUpSubtract(I, RedoInsts);
2318 RedoInsts.insert(I);
2319 MadeChange = true;
2320 I = NI;
2321 } else if (match(I, m_FNeg(m_Value()))) {
2322 // Otherwise, this is a negation. See if the operand is a multiply tree
2323 // and if this is not an inner node of a multiply tree.
2324 Value *Op = isa<BinaryOperator>(I) ? I->getOperand(1) :
2325 I->getOperand(0);
2326 if (isReassociableOp(Op, Instruction::FMul) &&
2327 (!I->hasOneUse() ||
2328 !isReassociableOp(I->user_back(), Instruction::FMul))) {
2329 // If the negate was simplified, revisit the users to see if we can
2330 // reassociate further.
2332 for (User *U : NI->users()) {
2333 if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
2334 RedoInsts.insert(Tmp);
2335 }
2336 RedoInsts.insert(I);
2337 MadeChange = true;
2338 I = NI;
2339 }
2340 }
2341 }
2342
2343 // If this instruction is an associative binary operator, process it.
2344 if (!I->isAssociative()) return;
2345 BinaryOperator *BO = cast<BinaryOperator>(I);
2346
2347 // If this is an interior node of a reassociable tree, ignore it until we
2348 // get to the root of the tree, to avoid N^2 analysis.
2349 unsigned Opcode = BO->getOpcode();
2350 if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) {
2351 // During the initial run we will get to the root of the tree.
2352 // But if we get here while we are redoing instructions, there is no
2353 // guarantee that the root will be visited. So Redo later
2354 if (BO->user_back() != BO &&
2355 BO->getParent() == BO->user_back()->getParent())
2356 RedoInsts.insert(BO->user_back());
2357 return;
2358 }
2359
2360 // If this is an add tree that is used by a sub instruction, ignore it
2361 // until we process the subtract.
2362 if (BO->hasOneUse() && BO->getOpcode() == Instruction::Add &&
2363 cast<Instruction>(BO->user_back())->getOpcode() == Instruction::Sub)
2364 return;
2365 if (BO->hasOneUse() && BO->getOpcode() == Instruction::FAdd &&
2366 cast<Instruction>(BO->user_back())->getOpcode() == Instruction::FSub)
2367 return;
2368
2369 ReassociateExpression(BO);
2370}
2371
2372void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
2373 // First, walk the expression tree, linearizing the tree, collecting the
2374 // operand information.
2376 bool HasNUW = true;
2377 MadeChange |= LinearizeExprTree(I, Tree, RedoInsts, HasNUW);
2379 Ops.reserve(Tree.size());
2380 for (const RepeatedValue &E : Tree)
2381 Ops.append(E.second.getZExtValue(), ValueEntry(getRank(E.first), E.first));
2382
2383 LLVM_DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n');
2384
2385 // Now that we have linearized the tree to a list and have gathered all of
2386 // the operands and their ranks, sort the operands by their rank. Use a
2387 // stable_sort so that values with equal ranks will have their relative
2388 // positions maintained (and so the compiler is deterministic). Note that
2389 // this sorts so that the highest ranking values end up at the beginning of
2390 // the vector.
2391 llvm::stable_sort(Ops);
2392
2393 // Now that we have the expression tree in a convenient
2394 // sorted form, optimize it globally if possible.
2395 if (Value *V = OptimizeExpression(I, Ops)) {
2396 if (V == I)
2397 // Self-referential expression in unreachable code.
2398 return;
2399 // This expression tree simplified to something that isn't a tree,
2400 // eliminate it.
2401 LLVM_DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n');
2402 I->replaceAllUsesWith(V);
2403 if (Instruction *VI = dyn_cast<Instruction>(V))
2404 if (I->getDebugLoc())
2405 VI->setDebugLoc(I->getDebugLoc());
2406 RedoInsts.insert(I);
2407 ++NumAnnihil;
2408 return;
2409 }
2410
2411 // We want to sink immediates as deeply as possible except in the case where
2412 // this is a multiply tree used only by an add, and the immediate is a -1.
2413 // In this case we reassociate to put the negation on the outside so that we
2414 // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y
2415 if (I->hasOneUse()) {
2416 if (I->getOpcode() == Instruction::Mul &&
2417 cast<Instruction>(I->user_back())->getOpcode() == Instruction::Add &&
2418 isa<ConstantInt>(Ops.back().Op) &&
2419 cast<ConstantInt>(Ops.back().Op)->isMinusOne()) {
2420 ValueEntry Tmp = Ops.pop_back_val();
2421 Ops.insert(Ops.begin(), Tmp);
2422 } else if (I->getOpcode() == Instruction::FMul &&
2423 cast<Instruction>(I->user_back())->getOpcode() ==
2424 Instruction::FAdd &&
2425 isa<ConstantFP>(Ops.back().Op) &&
2426 cast<ConstantFP>(Ops.back().Op)->isExactlyValue(-1.0)) {
2427 ValueEntry Tmp = Ops.pop_back_val();
2428 Ops.insert(Ops.begin(), Tmp);
2429 }
2430 }
2431
2432 LLVM_DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n');
2433
2434 if (Ops.size() == 1) {
2435 if (Ops[0].Op == I)
2436 // Self-referential expression in unreachable code.
2437 return;
2438
2439 // This expression tree simplified to something that isn't a tree,
2440 // eliminate it.
2441 I->replaceAllUsesWith(Ops[0].Op);
2442 if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op))
2443 OI->setDebugLoc(I->getDebugLoc());
2444 RedoInsts.insert(I);
2445 return;
2446 }
2447
2448 if (Ops.size() > 2 && Ops.size() <= GlobalReassociateLimit) {
2449 // Find the pair with the highest count in the pairmap and move it to the
2450 // back of the list so that it can later be CSE'd.
2451 // example:
2452 // a*b*c*d*e
2453 // if c*e is the most "popular" pair, we can express this as
2454 // (((c*e)*d)*b)*a
2455 unsigned Max = 1;
2456 unsigned BestRank = 0;
2457 std::pair<unsigned, unsigned> BestPair;
2458 unsigned Idx = I->getOpcode() - Instruction::BinaryOpsBegin;
2459 unsigned LimitIdx = 0;
2460 // With the CSE-driven heuristic, we are about to slap two values at the
2461 // beginning of the expression whereas they could live very late in the CFG.
2462 // When using the CSE-local heuristic we avoid creating dependences from
2463 // completely unrelated part of the CFG by limiting the expression
2464 // reordering on the values that live in the first seen basic block.
2465 // The main idea is that we want to avoid forming expressions that would
2466 // become loop dependent.
2467 if (UseCSELocalOpt) {
2468 const BasicBlock *FirstSeenBB = nullptr;
2469 int StartIdx = Ops.size() - 1;
2470 // Skip the first value of the expression since we need at least two
2471 // values to materialize an expression. I.e., even if this value is
2472 // anchored in a different basic block, the actual first sub expression
2473 // will be anchored on the second value.
2474 for (int i = StartIdx - 1; i != -1; --i) {
2475 const Value *Val = Ops[i].Op;
2476 const auto *CurrLeafInstr = dyn_cast<Instruction>(Val);
2477 const BasicBlock *SeenBB = nullptr;
2478 if (!CurrLeafInstr) {
2479 // The value is free of any CFG dependencies.
2480 // Do as if it lives in the entry block.
2481 //
2482 // We do this to make sure all the values falling on this path are
2483 // seen through the same anchor point. The rationale is these values
2484 // can be combined together to from a sub expression free of any CFG
2485 // dependencies so we want them to stay together.
2486 // We could be cleverer and postpone the anchor down to the first
2487 // anchored value, but that's likely complicated to get right.
2488 // E.g., we wouldn't want to do that if that means being stuck in a
2489 // loop.
2490 //
2491 // For instance, we wouldn't want to change:
2492 // res = arg1 op arg2 op arg3 op ... op loop_val1 op loop_val2 ...
2493 // into
2494 // res = loop_val1 op arg1 op arg2 op arg3 op ... op loop_val2 ...
2495 // Because all the sub expressions with arg2..N would be stuck between
2496 // two loop dependent values.
2497 SeenBB = &I->getParent()->getParent()->getEntryBlock();
2498 } else {
2499 SeenBB = CurrLeafInstr->getParent();
2500 }
2501
2502 if (!FirstSeenBB) {
2503 FirstSeenBB = SeenBB;
2504 continue;
2505 }
2506 if (FirstSeenBB != SeenBB) {
2507 // ith value is in a different basic block.
2508 // Rewind the index once to point to the last value on the same basic
2509 // block.
2510 LimitIdx = i + 1;
2511 LLVM_DEBUG(dbgs() << "CSE reordering: Consider values between ["
2512 << LimitIdx << ", " << StartIdx << "]\n");
2513 break;
2514 }
2515 }
2516 }
2517 for (unsigned i = Ops.size() - 1; i > LimitIdx; --i) {
2518 // We must use int type to go below zero when LimitIdx is 0.
2519 for (int j = i - 1; j >= (int)LimitIdx; --j) {
2520 unsigned Score = 0;
2521 Value *Op0 = Ops[i].Op;
2522 Value *Op1 = Ops[j].Op;
2523 if (std::less<Value *>()(Op1, Op0))
2524 std::swap(Op0, Op1);
2525 auto it = PairMap[Idx].find({Op0, Op1});
2526 if (it != PairMap[Idx].end()) {
2527 // Functions like BreakUpSubtract() can erase the Values we're using
2528 // as keys and create new Values after we built the PairMap. There's a
2529 // small chance that the new nodes can have the same address as
2530 // something already in the table. We shouldn't accumulate the stored
2531 // score in that case as it refers to the wrong Value.
2532 if (it->second.isValid())
2533 Score += it->second.Score;
2534 }
2535
2536 unsigned MaxRank = std::max(Ops[i].Rank, Ops[j].Rank);
2537
2538 // By construction, the operands are sorted in reverse order of their
2539 // topological order.
2540 // So we tend to form (sub) expressions with values that are close to
2541 // each other.
2542 //
2543 // Now to expose more CSE opportunities we want to expose the pair of
2544 // operands that occur the most (as statically computed in
2545 // BuildPairMap.) as the first sub-expression.
2546 //
2547 // If two pairs occur as many times, we pick the one with the
2548 // lowest rank, meaning the one with both operands appearing first in
2549 // the topological order.
2550 if (Score > Max || (Score == Max && MaxRank < BestRank)) {
2551 BestPair = {j, i};
2552 Max = Score;
2553 BestRank = MaxRank;
2554 }
2555 }
2556 }
2557 if (Max > 1) {
2558 auto Op0 = Ops[BestPair.first];
2559 auto Op1 = Ops[BestPair.second];
2560 Ops.erase(&Ops[BestPair.second]);
2561 Ops.erase(&Ops[BestPair.first]);
2562 Ops.push_back(Op0);
2563 Ops.push_back(Op1);
2564 }
2565 }
2566 LLVM_DEBUG(dbgs() << "RAOut after CSE reorder:\t"; PrintOps(I, Ops);
2567 dbgs() << '\n');
2568 // Now that we ordered and optimized the expressions, splat them back into
2569 // the expression tree, removing any unneeded nodes.
2570 RewriteExprTree(I, Ops, HasNUW);
2571}
2572
2573void
2574ReassociatePass::BuildPairMap(ReversePostOrderTraversal<Function *> &RPOT) {
2575 // Make a "pairmap" of how often each operand pair occurs.
2576 for (BasicBlock *BI : RPOT) {
2577 for (Instruction &I : *BI) {
2578 if (!I.isAssociative() || !I.isBinaryOp())
2579 continue;
2580
2581 // Ignore nodes that aren't at the root of trees.
2582 if (I.hasOneUse() && I.user_back()->getOpcode() == I.getOpcode())
2583 continue;
2584
2585 // Collect all operands in a single reassociable expression.
2586 // Since Reassociate has already been run once, we can assume things
2587 // are already canonical according to Reassociation's regime.
2588 SmallVector<Value *, 8> Worklist = { I.getOperand(0), I.getOperand(1) };
2590 while (!Worklist.empty() && Ops.size() <= GlobalReassociateLimit) {
2591 Value *Op = Worklist.pop_back_val();
2592 Instruction *OpI = dyn_cast<Instruction>(Op);
2593 if (!OpI || OpI->getOpcode() != I.getOpcode() || !OpI->hasOneUse()) {
2594 Ops.push_back(Op);
2595 continue;
2596 }
2597 // Be paranoid about self-referencing expressions in unreachable code.
2598 if (OpI->getOperand(0) != OpI)
2599 Worklist.push_back(OpI->getOperand(0));
2600 if (OpI->getOperand(1) != OpI)
2601 Worklist.push_back(OpI->getOperand(1));
2602 }
2603 // Skip extremely long expressions.
2604 if (Ops.size() > GlobalReassociateLimit)
2605 continue;
2606
2607 // Add all pairwise combinations of operands to the pair map.
2608 unsigned BinaryIdx = I.getOpcode() - Instruction::BinaryOpsBegin;
2610 for (unsigned i = 0; i < Ops.size() - 1; ++i) {
2611 for (unsigned j = i + 1; j < Ops.size(); ++j) {
2612 // Canonicalize operand orderings.
2613 Value *Op0 = Ops[i];
2614 Value *Op1 = Ops[j];
2615 if (std::less<Value *>()(Op1, Op0))
2616 std::swap(Op0, Op1);
2617 if (!Visited.insert({Op0, Op1}).second)
2618 continue;
2619 auto res = PairMap[BinaryIdx].insert({{Op0, Op1}, {Op0, Op1, 1}});
2620 if (!res.second) {
2621 // If either key value has been erased then we've got the same
2622 // address by coincidence. That can't happen here because nothing is
2623 // erasing values but it can happen by the time we're querying the
2624 // map.
2625 assert(res.first->second.isValid() && "WeakVH invalidated");
2626 ++res.first->second.Score;
2627 }
2628 }
2629 }
2630 }
2631 }
2632}
2633
2635 // Get the functions basic blocks in Reverse Post Order. This order is used by
2636 // BuildRankMap to pre calculate ranks correctly. It also excludes dead basic
2637 // blocks (it has been seen that the analysis in this pass could hang when
2638 // analysing dead basic blocks).
2640
2641 // Calculate the rank map for F.
2642 BuildRankMap(F, RPOT);
2643
2644 // Build the pair map before running reassociate.
2645 // Technically this would be more accurate if we did it after one round
2646 // of reassociation, but in practice it doesn't seem to help much on
2647 // real-world code, so don't waste the compile time running reassociate
2648 // twice.
2649 // If a user wants, they could expicitly run reassociate twice in their
2650 // pass pipeline for further potential gains.
2651 // It might also be possible to update the pair map during runtime, but the
2652 // overhead of that may be large if there's many reassociable chains.
2653 BuildPairMap(RPOT);
2654
2655 MadeChange = false;
2656
2657 // Traverse the same blocks that were analysed by BuildRankMap.
2658 for (BasicBlock *BI : RPOT) {
2659 assert(RankMap.count(&*BI) && "BB should be ranked.");
2660 // Optimize every instruction in the basic block.
2661 for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;)
2662 if (isInstructionTriviallyDead(&*II)) {
2663 EraseInst(&*II++);
2664 } else {
2665 OptimizeInst(&*II);
2666 assert(II->getParent() == &*BI && "Moved to a different block!");
2667 ++II;
2668 }
2669
2670 // Make a copy of all the instructions to be redone so we can remove dead
2671 // instructions.
2672 OrderedSet ToRedo(RedoInsts);
2673 // Iterate over all instructions to be reevaluated and remove trivially dead
2674 // instructions. If any operand of the trivially dead instruction becomes
2675 // dead mark it for deletion as well. Continue this process until all
2676 // trivially dead instructions have been removed.
2677 while (!ToRedo.empty()) {
2678 Instruction *I = ToRedo.pop_back_val();
2680 RecursivelyEraseDeadInsts(I, ToRedo);
2681 MadeChange = true;
2682 }
2683 }
2684
2685 // Now that we have removed dead instructions, we can reoptimize the
2686 // remaining instructions.
2687 while (!RedoInsts.empty()) {
2688 Instruction *I = RedoInsts.front();
2689 RedoInsts.erase(RedoInsts.begin());
2691 EraseInst(I);
2692 else
2693 OptimizeInst(I);
2694 }
2695 }
2696
2697 // We are done with the rank map and pair map.
2698 RankMap.clear();
2699 ValueRankMap.clear();
2700 for (auto &Entry : PairMap)
2701 Entry.clear();
2702
2703 if (MadeChange) {
2706 return PA;
2707 }
2708
2709 return PreservedAnalyses::all();
2710}
2711
2712namespace {
2713
2714 class ReassociateLegacyPass : public FunctionPass {
2715 ReassociatePass Impl;
2716
2717 public:
2718 static char ID; // Pass identification, replacement for typeid
2719
2720 ReassociateLegacyPass() : FunctionPass(ID) {
2722 }
2723
2724 bool runOnFunction(Function &F) override {
2725 if (skipFunction(F))
2726 return false;
2727
2728 FunctionAnalysisManager DummyFAM;
2729 auto PA = Impl.run(F, DummyFAM);
2730 return !PA.areAllPreserved();
2731 }
2732
2733 void getAnalysisUsage(AnalysisUsage &AU) const override {
2734 AU.setPreservesCFG();
2738 }
2739 };
2740
2741} // end anonymous namespace
2742
2743char ReassociateLegacyPass::ID = 0;
2744
2745INITIALIZE_PASS(ReassociateLegacyPass, "reassociate",
2746 "Reassociate expressions", false, false)
2747
2748// Public interface to the Reassociate pass
2750 return new ReassociateLegacyPass();
2751}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const LLT S1
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This is the interface for LLVM's primary stateless and local alias analysis.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
std::string Name
uint64_t Size
static bool runOnFunction(Function &F, bool PostInlining)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
IRTranslator LLVM IR MI
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, ScalarEvolution *SE, LoopInfo *LI)
isInteresting - Test whether the given expression is "interesting" when used by the given expression,...
Definition: IVUsers.cpp:56
static bool isReassociableOp(Instruction *I, unsigned IntOpcode, unsigned FPOpcode)
Definition: LICM.cpp:2675
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
nary reassociate
#define P(N)
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static void PrintOps(Instruction *I, const SmallVectorImpl< ValueEntry > &Ops)
Print out the expression identified in the Ops list.
Definition: Reassociate.cpp:82
static Value * EmitAddTreeOfValues(BasicBlock::iterator It, SmallVectorImpl< WeakTrackingVH > &Ops)
Emit a tree of add instructions, summing Ops together and returning the result.
static bool ShouldBreakUpSubtract(Instruction *Sub)
Return true if we should break up this subtract of X-Y into (X + -Y).
static Value * buildMultiplyTree(IRBuilderBase &Builder, SmallVectorImpl< Value * > &Ops)
Build a tree of multiplies, computing the product of Ops.
static void getNegatibleInsts(Value *V, SmallVectorImpl< Instruction * > &Candidates)
Recursively analyze an expression to build a list of instructions that have negative floating-point c...
static BinaryOperator * CreateMul(Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore, Value *FlagsOp)
static unsigned CarmichaelShift(unsigned Bitwidth)
Returns k such that lambda(2^Bitwidth) = 2^k, where lambda is the Carmichael function.
static BinaryOperator * BreakUpSubtract(Instruction *Sub, ReassociatePass::OrderedSet &ToRedo)
If we have (X-Y), and if either X is an add, or if this is only used by an add, transform this into (...
static void FindSingleUseMultiplyFactors(Value *V, SmallVectorImpl< Value * > &Factors)
If V is a single-use multiply, recursively add its operands as factors, otherwise add V to the list o...
static Value * OptimizeAndOrXor(unsigned Opcode, SmallVectorImpl< ValueEntry > &Ops)
Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
static BinaryOperator * convertOrWithNoCommonBitsToAdd(Instruction *Or)
If we have (X|Y), and iff X and Y have no common bits set, transform this into (X+Y) to allow arithme...
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore, Value *FlagsOp)
static bool collectMultiplyFactors(SmallVectorImpl< ValueEntry > &Ops, SmallVectorImpl< Factor > &Factors)
Build up a vector of value/power pairs factoring a product.
static BinaryOperator * ConvertShiftToMul(Instruction *Shl)
If this is a shift of a reassociable multiply or is used by one, change this into a multiply by a con...
static cl::opt< bool > UseCSELocalOpt(DEBUG_TYPE "-use-cse-local", cl::desc("Only reorder expressions within a basic block " "when exposing CSE opportunities"), cl::init(true), cl::Hidden)
static unsigned FindInOperandList(const SmallVectorImpl< ValueEntry > &Ops, unsigned i, Value *X)
Scan backwards and forwards among values with the same rank as element i to see if X exists.
static BinaryOperator * LowerNegateToMultiply(Instruction *Neg)
Replace 0-X with X*-1.
static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode)
Add the extra weight 'RHS' to the existing weight 'LHS', reducing the combined weight using any speci...
static bool LinearizeExprTree(Instruction *I, SmallVectorImpl< RepeatedValue > &Ops, ReassociatePass::OrderedSet &ToRedo, bool &HasNUW)
Given an associative binary expression, return the leaf nodes in Ops along with their weights (how ma...
std::pair< Value *, APInt > RepeatedValue
static Instruction * CreateNeg(Value *S1, const Twine &Name, BasicBlock::iterator InsertBefore, Value *FlagsOp)
static bool hasFPAssociativeFlags(Instruction *I)
Return true if I is an instruction with the FastMathFlags that are needed for general reassociation s...
static Value * createAndInstr(BasicBlock::iterator InsertBefore, Value *Opnd, const APInt &ConstOpnd)
Helper function of CombineXorOpnd().
static Value * NegateValue(Value *V, Instruction *BI, ReassociatePass::OrderedSet &ToRedo)
Insert instructions before the instruction pointed to by BI, that computes the negative version of th...
static bool shouldConvertOrWithNoCommonBitsToAdd(Instruction *Or)
Return true if it may be profitable to convert this (X|Y) into (X+Y).
#define DEBUG_TYPE
Definition: Reassociate.cpp:68
static bool isLoadCombineCandidate(Instruction *Or)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getFastMathFlags(const MachineInstr &I)
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
Value * RHS
Value * LHS
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition: APInt.h:76
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
bool isMinValue() const
Determine if this is the smallest unsigned value.
Definition: APInt.h:395
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:449
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
Represent the analysis usage information of a pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:379
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:165
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore)
Construct a binary instruction, given the opcode and the two operands.
BinaryOps getOpcode() const
Definition: InstrTypes.h:513
static BinaryOperator * CreateNeg(Value *Op, const Twine &Name, BasicBlock::iterator InsertBefore)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:70
static Constant * getBinOpAbsorber(unsigned Opcode, Type *Ty)
Return the absorbing element for the given binary operation, i.e.
Definition: Constants.cpp:2667
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2560
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2596
static Constant * getNeg(Constant *C, bool HasNSW=false)
Definition: Constants.cpp:2523
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:268
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
const BasicBlock & getEntryBlock() const
Definition: Function.h:787
Legacy wrapper pass to provide the GlobalsAAResult object.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateFAddFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1547
Value * CreateFSubFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1574
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:311
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1587
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1361
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2667
void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:454
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:83
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
const BasicBlock * getParent() const
Definition: Instruction.h:152
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:149
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:87
bool isNilpotent() const
Return true if the instruction is nilpotent:
Definition: Instruction.h:730
const char * getOpcodeName() const
Definition: Instruction.h:254
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:252
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:451
bool isIdempotent() const
Return true if the instruction is idempotent:
Definition: Instruction.h:716
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
bool areAllPreserved() const
Test whether all analyses are preserved (and none are abandoned).
Definition: Analysis.h:281
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:144
Reassociate commutative expressions.
Definition: Reassociate.h:71
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:234
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name, BasicBlock::iterator InsertBefore)
Definition: InstrTypes.h:191
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1808
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
user_iterator user_begin()
Definition: Value.h:397
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
void clearSubclassOptionalData()
Clear the optional flags contained in this value.
Definition: Value.h:544
void deleteValue()
Delete a pointer to a generic Value.
Definition: Value.cpp:110
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
self_iterator getIterator()
Definition: ilist_node.h:109
Utility class representing a non-constant Xor-operand.
Value * getSymbolicPart() const
unsigned getSymbolicRank() const
void setSymbolicRank(unsigned R)
const APInt & getConstPart() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:816
BinaryOp_match< LHS, RHS, Instruction::FAdd > m_FAdd(const LHS &L, const RHS &R)
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
apfloat_match m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
Definition: PatternMatch.h:316
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
constexpr double e
Definition: MathExtras.h:31
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool haveNoCommonBitsSet(const WithCache< const Value * > &LHSCache, const WithCache< const Value * > &RHSCache, const SimplifyQuery &SQ)
Return true if LHS and RHS have no common bits set.
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1664
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1381
FunctionPass * createReassociatePass()
void initializeReassociateLegacyPassPass(PassRegistry &)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:400
Constant * ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op, const DataLayout &DL)
Attempt to constant fold a unary operation with the specified operand.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
bool replaceDbgUsesWithUndef(Instruction *I)
Replace all the uses of an SSA value in @llvm.dbg intrinsics with undef.
Definition: Local.cpp:617
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1954
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool mayHaveNonDefUseDependency(const Instruction &I)
Returns true if the result or effects of the given instructions I depend values not reachable through...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
Utility class representing a base and exponent pair which form one factor of some product.
Definition: Reassociate.h:59