LLVM 18.0.0git
Reassociate.cpp
Go to the documentation of this file.
1//===- Reassociate.cpp - Reassociate binary expressions -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass reassociates commutative expressions in an order that is designed
10// to promote better constant propagation, GCSE, LICM, PRE, etc.
11//
12// For example: 4 + (x + 5) -> x + (4 + 5)
13//
14// In the implementation of this algorithm, constants are assigned rank = 0,
15// function arguments are rank = 1, and other values are assigned ranks
16// corresponding to the reverse post order traversal of current function
17// (starting at 2), which effectively gives values in deep loops higher rank
18// than values not in loops.
19//
20//===----------------------------------------------------------------------===//
21
23#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/APInt.h"
25#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
35#include "llvm/IR/Argument.h"
36#include "llvm/IR/BasicBlock.h"
37#include "llvm/IR/CFG.h"
38#include "llvm/IR/Constant.h"
39#include "llvm/IR/Constants.h"
40#include "llvm/IR/Function.h"
41#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/InstrTypes.h"
43#include "llvm/IR/Instruction.h"
45#include "llvm/IR/Operator.h"
46#include "llvm/IR/PassManager.h"
48#include "llvm/IR/Type.h"
49#include "llvm/IR/User.h"
50#include "llvm/IR/Value.h"
51#include "llvm/IR/ValueHandle.h"
53#include "llvm/Pass.h"
56#include "llvm/Support/Debug.h"
60#include <algorithm>
61#include <cassert>
62#include <utility>
63
64using namespace llvm;
65using namespace reassociate;
66using namespace PatternMatch;
67
68#define DEBUG_TYPE "reassociate"
69
70STATISTIC(NumChanged, "Number of insts reassociated");
71STATISTIC(NumAnnihil, "Number of expr tree annihilated");
72STATISTIC(NumFactor , "Number of multiplies factored");
73
74static cl::opt<bool>
75 UseCSELocalOpt(DEBUG_TYPE "-use-cse-local",
76 cl::desc("Only reorder expressions within a basic block "
77 "when exposing CSE opportunities"),
78 cl::init(true), cl::Hidden);
79
80#ifndef NDEBUG
81/// Print out the expression identified in the Ops list.
83 Module *M = I->getModule();
84 dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
85 << *Ops[0].Op->getType() << '\t';
86 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
87 dbgs() << "[ ";
88 Ops[i].Op->printAsOperand(dbgs(), false, M);
89 dbgs() << ", #" << Ops[i].Rank << "] ";
90 }
91}
92#endif
93
94/// Utility class representing a non-constant Xor-operand. We classify
95/// non-constant Xor-Operands into two categories:
96/// C1) The operand is in the form "X & C", where C is a constant and C != ~0
97/// C2)
98/// C2.1) The operand is in the form of "X | C", where C is a non-zero
99/// constant.
100/// C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this
101/// operand as "E | 0"
103public:
104 XorOpnd(Value *V);
105
106 bool isInvalid() const { return SymbolicPart == nullptr; }
107 bool isOrExpr() const { return isOr; }
108 Value *getValue() const { return OrigVal; }
109 Value *getSymbolicPart() const { return SymbolicPart; }
110 unsigned getSymbolicRank() const { return SymbolicRank; }
111 const APInt &getConstPart() const { return ConstPart; }
112
113 void Invalidate() { SymbolicPart = OrigVal = nullptr; }
114 void setSymbolicRank(unsigned R) { SymbolicRank = R; }
115
116private:
117 Value *OrigVal;
118 Value *SymbolicPart;
119 APInt ConstPart;
120 unsigned SymbolicRank;
121 bool isOr;
122};
123
125 assert(!isa<ConstantInt>(V) && "No ConstantInt");
126 OrigVal = V;
127 Instruction *I = dyn_cast<Instruction>(V);
128 SymbolicRank = 0;
129
130 if (I && (I->getOpcode() == Instruction::Or ||
131 I->getOpcode() == Instruction::And)) {
132 Value *V0 = I->getOperand(0);
133 Value *V1 = I->getOperand(1);
134 const APInt *C;
135 if (match(V0, m_APInt(C)))
136 std::swap(V0, V1);
137
138 if (match(V1, m_APInt(C))) {
139 ConstPart = *C;
140 SymbolicPart = V0;
141 isOr = (I->getOpcode() == Instruction::Or);
142 return;
143 }
144 }
145
146 // view the operand as "V | 0"
147 SymbolicPart = V;
148 ConstPart = APInt::getZero(V->getType()->getScalarSizeInBits());
149 isOr = true;
150}
151
152/// Return true if I is an instruction with the FastMathFlags that are needed
153/// for general reassociation set. This is not the same as testing
154/// Instruction::isAssociative() because it includes operations like fsub.
155/// (This routine is only intended to be called for floating-point operations.)
157 assert(I && isa<FPMathOperator>(I) && "Should only check FP ops");
158 return I->hasAllowReassoc() && I->hasNoSignedZeros();
159}
160
161/// Return true if V is an instruction of the specified opcode and if it
162/// only has one use.
164 auto *BO = dyn_cast<BinaryOperator>(V);
165 if (BO && BO->hasOneUse() && BO->getOpcode() == Opcode)
166 if (!isa<FPMathOperator>(BO) || hasFPAssociativeFlags(BO))
167 return BO;
168 return nullptr;
169}
170
171static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
172 unsigned Opcode2) {
173 auto *BO = dyn_cast<BinaryOperator>(V);
174 if (BO && BO->hasOneUse() &&
175 (BO->getOpcode() == Opcode1 || BO->getOpcode() == Opcode2))
176 if (!isa<FPMathOperator>(BO) || hasFPAssociativeFlags(BO))
177 return BO;
178 return nullptr;
179}
180
181void ReassociatePass::BuildRankMap(Function &F,
183 unsigned Rank = 2;
184
185 // Assign distinct ranks to function arguments.
186 for (auto &Arg : F.args()) {
187 ValueRankMap[&Arg] = ++Rank;
188 LLVM_DEBUG(dbgs() << "Calculated Rank[" << Arg.getName() << "] = " << Rank
189 << "\n");
190 }
191
192 // Traverse basic blocks in ReversePostOrder.
193 for (BasicBlock *BB : RPOT) {
194 unsigned BBRank = RankMap[BB] = ++Rank << 16;
195
196 // Walk the basic block, adding precomputed ranks for any instructions that
197 // we cannot move. This ensures that the ranks for these instructions are
198 // all different in the block.
199 for (Instruction &I : *BB)
201 ValueRankMap[&I] = ++BBRank;
202 }
203}
204
205unsigned ReassociatePass::getRank(Value *V) {
206 Instruction *I = dyn_cast<Instruction>(V);
207 if (!I) {
208 if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument.
209 return 0; // Otherwise it's a global or constant, rank 0.
210 }
211
212 if (unsigned Rank = ValueRankMap[I])
213 return Rank; // Rank already known?
214
215 // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that
216 // we can reassociate expressions for code motion! Since we do not recurse
217 // for PHI nodes, we cannot have infinite recursion here, because there
218 // cannot be loops in the value graph that do not go through PHI nodes.
219 unsigned Rank = 0, MaxRank = RankMap[I->getParent()];
220 for (unsigned i = 0, e = I->getNumOperands(); i != e && Rank != MaxRank; ++i)
221 Rank = std::max(Rank, getRank(I->getOperand(i)));
222
223 // If this is a 'not' or 'neg' instruction, do not count it for rank. This
224 // assures us that X and ~X will have the same rank.
225 if (!match(I, m_Not(m_Value())) && !match(I, m_Neg(m_Value())) &&
226 !match(I, m_FNeg(m_Value())))
227 ++Rank;
228
229 LLVM_DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank
230 << "\n");
231
232 return ValueRankMap[I] = Rank;
233}
234
235// Canonicalize constants to RHS. Otherwise, sort the operands by rank.
236void ReassociatePass::canonicalizeOperands(Instruction *I) {
237 assert(isa<BinaryOperator>(I) && "Expected binary operator.");
238 assert(I->isCommutative() && "Expected commutative operator.");
239
240 Value *LHS = I->getOperand(0);
241 Value *RHS = I->getOperand(1);
242 if (LHS == RHS || isa<Constant>(RHS))
243 return;
244 if (isa<Constant>(LHS) || getRank(RHS) < getRank(LHS))
245 cast<BinaryOperator>(I)->swapOperands();
246}
247
248static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name,
249 Instruction *InsertBefore, Value *FlagsOp) {
250 if (S1->getType()->isIntOrIntVectorTy())
251 return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore);
252 else {
253 BinaryOperator *Res =
254 BinaryOperator::CreateFAdd(S1, S2, Name, InsertBefore);
255 Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
256 return Res;
257 }
258}
259
260static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name,
261 Instruction *InsertBefore, Value *FlagsOp) {
262 if (S1->getType()->isIntOrIntVectorTy())
263 return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore);
264 else {
265 BinaryOperator *Res =
266 BinaryOperator::CreateFMul(S1, S2, Name, InsertBefore);
267 Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
268 return Res;
269 }
270}
271
273 Instruction *InsertBefore, Value *FlagsOp) {
274 if (S1->getType()->isIntOrIntVectorTy())
275 return BinaryOperator::CreateNeg(S1, Name, InsertBefore);
276
277 if (auto *FMFSource = dyn_cast<Instruction>(FlagsOp))
278 return UnaryOperator::CreateFNegFMF(S1, FMFSource, Name, InsertBefore);
279
280 return UnaryOperator::CreateFNeg(S1, Name, InsertBefore);
281}
282
283/// Replace 0-X with X*-1.
285 assert((isa<UnaryOperator>(Neg) || isa<BinaryOperator>(Neg)) &&
286 "Expected a Negate!");
287 // FIXME: It's not safe to lower a unary FNeg into a FMul by -1.0.
288 unsigned OpNo = isa<BinaryOperator>(Neg) ? 1 : 0;
289 Type *Ty = Neg->getType();
290 Constant *NegOne = Ty->isIntOrIntVectorTy() ?
292
293 BinaryOperator *Res = CreateMul(Neg->getOperand(OpNo), NegOne, "", Neg, Neg);
294 Neg->setOperand(OpNo, Constant::getNullValue(Ty)); // Drop use of op.
295 Res->takeName(Neg);
296 Neg->replaceAllUsesWith(Res);
297 Res->setDebugLoc(Neg->getDebugLoc());
298 return Res;
299}
300
301/// Returns k such that lambda(2^Bitwidth) = 2^k, where lambda is the Carmichael
302/// function. This means that x^(2^k) === 1 mod 2^Bitwidth for
303/// every odd x, i.e. x^(2^k) = 1 for every odd x in Bitwidth-bit arithmetic.
304/// Note that 0 <= k < Bitwidth, and if Bitwidth > 3 then x^(2^k) = 0 for every
305/// even x in Bitwidth-bit arithmetic.
306static unsigned CarmichaelShift(unsigned Bitwidth) {
307 if (Bitwidth < 3)
308 return Bitwidth - 1;
309 return Bitwidth - 2;
310}
311
312/// Add the extra weight 'RHS' to the existing weight 'LHS',
313/// reducing the combined weight using any special properties of the operation.
314/// The existing weight LHS represents the computation X op X op ... op X where
315/// X occurs LHS times. The combined weight represents X op X op ... op X with
316/// X occurring LHS + RHS times. If op is "Xor" for example then the combined
317/// operation is equivalent to X if LHS + RHS is odd, or 0 if LHS + RHS is even;
318/// the routine returns 1 in LHS in the first case, and 0 in LHS in the second.
319static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) {
320 // If we were working with infinite precision arithmetic then the combined
321 // weight would be LHS + RHS. But we are using finite precision arithmetic,
322 // and the APInt sum LHS + RHS may not be correct if it wraps (it is correct
323 // for nilpotent operations and addition, but not for idempotent operations
324 // and multiplication), so it is important to correctly reduce the combined
325 // weight back into range if wrapping would be wrong.
326
327 // If RHS is zero then the weight didn't change.
328 if (RHS.isMinValue())
329 return;
330 // If LHS is zero then the combined weight is RHS.
331 if (LHS.isMinValue()) {
332 LHS = RHS;
333 return;
334 }
335 // From this point on we know that neither LHS nor RHS is zero.
336
338 // Idempotent means X op X === X, so any non-zero weight is equivalent to a
339 // weight of 1. Keeping weights at zero or one also means that wrapping is
340 // not a problem.
341 assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
342 return; // Return a weight of 1.
343 }
345 // Nilpotent means X op X === 0, so reduce weights modulo 2.
346 assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
347 LHS = 0; // 1 + 1 === 0 modulo 2.
348 return;
349 }
350 if (Opcode == Instruction::Add || Opcode == Instruction::FAdd) {
351 // TODO: Reduce the weight by exploiting nsw/nuw?
352 LHS += RHS;
353 return;
354 }
355
356 assert((Opcode == Instruction::Mul || Opcode == Instruction::FMul) &&
357 "Unknown associative operation!");
358 unsigned Bitwidth = LHS.getBitWidth();
359 // If CM is the Carmichael number then a weight W satisfying W >= CM+Bitwidth
360 // can be replaced with W-CM. That's because x^W=x^(W-CM) for every Bitwidth
361 // bit number x, since either x is odd in which case x^CM = 1, or x is even in
362 // which case both x^W and x^(W - CM) are zero. By subtracting off multiples
363 // of CM like this weights can always be reduced to the range [0, CM+Bitwidth)
364 // which by a happy accident means that they can always be represented using
365 // Bitwidth bits.
366 // TODO: Reduce the weight by exploiting nsw/nuw? (Could do much better than
367 // the Carmichael number).
368 if (Bitwidth > 3) {
369 /// CM - The value of Carmichael's lambda function.
370 APInt CM = APInt::getOneBitSet(Bitwidth, CarmichaelShift(Bitwidth));
371 // Any weight W >= Threshold can be replaced with W - CM.
372 APInt Threshold = CM + Bitwidth;
373 assert(LHS.ult(Threshold) && RHS.ult(Threshold) && "Weights not reduced!");
374 // For Bitwidth 4 or more the following sum does not overflow.
375 LHS += RHS;
376 while (LHS.uge(Threshold))
377 LHS -= CM;
378 } else {
379 // To avoid problems with overflow do everything the same as above but using
380 // a larger type.
381 unsigned CM = 1U << CarmichaelShift(Bitwidth);
382 unsigned Threshold = CM + Bitwidth;
383 assert(LHS.getZExtValue() < Threshold && RHS.getZExtValue() < Threshold &&
384 "Weights not reduced!");
385 unsigned Total = LHS.getZExtValue() + RHS.getZExtValue();
386 while (Total >= Threshold)
387 Total -= CM;
388 LHS = Total;
389 }
390}
391
392using RepeatedValue = std::pair<Value*, APInt>;
393
394/// Given an associative binary expression, return the leaf
395/// nodes in Ops along with their weights (how many times the leaf occurs). The
396/// original expression is the same as
397/// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times
398/// op
399/// (Ops[1].first op Ops[1].first op ... Ops[1].first) <- Ops[1].second times
400/// op
401/// ...
402/// op
403/// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times
404///
405/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct.
406///
407/// This routine may modify the function, in which case it returns 'true'. The
408/// changes it makes may well be destructive, changing the value computed by 'I'
409/// to something completely different. Thus if the routine returns 'true' then
410/// you MUST either replace I with a new expression computed from the Ops array,
411/// or use RewriteExprTree to put the values back in.
412///
413/// A leaf node is either not a binary operation of the same kind as the root
414/// node 'I' (i.e. is not a binary operator at all, or is, but with a different
415/// opcode), or is the same kind of binary operator but has a use which either
416/// does not belong to the expression, or does belong to the expression but is
417/// a leaf node. Every leaf node has at least one use that is a non-leaf node
418/// of the expression, while for non-leaf nodes (except for the root 'I') every
419/// use is a non-leaf node of the expression.
420///
421/// For example:
422/// expression graph node names
423///
424/// + | I
425/// / \ |
426/// + + | A, B
427/// / \ / \ |
428/// * + * | C, D, E
429/// / \ / \ / \ |
430/// + * | F, G
431///
432/// The leaf nodes are C, E, F and G. The Ops array will contain (maybe not in
433/// that order) (C, 1), (E, 1), (F, 2), (G, 2).
434///
435/// The expression is maximal: if some instruction is a binary operator of the
436/// same kind as 'I', and all of its uses are non-leaf nodes of the expression,
437/// then the instruction also belongs to the expression, is not a leaf node of
438/// it, and its operands also belong to the expression (but may be leaf nodes).
439///
440/// NOTE: This routine will set operands of non-leaf non-root nodes to undef in
441/// order to ensure that every non-root node in the expression has *exactly one*
442/// use by a non-leaf node of the expression. This destruction means that the
443/// caller MUST either replace 'I' with a new expression or use something like
444/// RewriteExprTree to put the values back in if the routine indicates that it
445/// made a change by returning 'true'.
446///
447/// In the above example either the right operand of A or the left operand of B
448/// will be replaced by undef. If it is B's operand then this gives:
449///
450/// + | I
451/// / \ |
452/// + + | A, B - operand of B replaced with undef
453/// / \ \ |
454/// * + * | C, D, E
455/// / \ / \ / \ |
456/// + * | F, G
457///
458/// Note that such undef operands can only be reached by passing through 'I'.
459/// For example, if you visit operands recursively starting from a leaf node
460/// then you will never see such an undef operand unless you get back to 'I',
461/// which requires passing through a phi node.
462///
463/// Note that this routine may also mutate binary operators of the wrong type
464/// that have all uses inside the expression (i.e. only used by non-leaf nodes
465/// of the expression) if it can turn them into binary operators of the right
466/// type and thus make the expression bigger.
470 assert((isa<UnaryOperator>(I) || isa<BinaryOperator>(I)) &&
471 "Expected a UnaryOperator or BinaryOperator!");
472 LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
473 unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits();
474 unsigned Opcode = I->getOpcode();
475 assert(I->isAssociative() && I->isCommutative() &&
476 "Expected an associative and commutative operation!");
477
478 // Visit all operands of the expression, keeping track of their weight (the
479 // number of paths from the expression root to the operand, or if you like
480 // the number of times that operand occurs in the linearized expression).
481 // For example, if I = X + A, where X = A + B, then I, X and B have weight 1
482 // while A has weight two.
483
484 // Worklist of non-leaf nodes (their operands are in the expression too) along
485 // with their weights, representing a certain number of paths to the operator.
486 // If an operator occurs in the worklist multiple times then we found multiple
487 // ways to get to it.
488 SmallVector<std::pair<Instruction*, APInt>, 8> Worklist; // (Op, Weight)
489 Worklist.push_back(std::make_pair(I, APInt(Bitwidth, 1)));
490 bool Changed = false;
491
492 // Leaves of the expression are values that either aren't the right kind of
493 // operation (eg: a constant, or a multiply in an add tree), or are, but have
494 // some uses that are not inside the expression. For example, in I = X + X,
495 // X = A + B, the value X has two uses (by I) that are in the expression. If
496 // X has any other uses, for example in a return instruction, then we consider
497 // X to be a leaf, and won't analyze it further. When we first visit a value,
498 // if it has more than one use then at first we conservatively consider it to
499 // be a leaf. Later, as the expression is explored, we may discover some more
500 // uses of the value from inside the expression. If all uses turn out to be
501 // from within the expression (and the value is a binary operator of the right
502 // kind) then the value is no longer considered to be a leaf, and its operands
503 // are explored.
504
505 // Leaves - Keeps track of the set of putative leaves as well as the number of
506 // paths to each leaf seen so far.
507 using LeafMap = DenseMap<Value *, APInt>;
508 LeafMap Leaves; // Leaf -> Total weight so far.
509 SmallVector<Value *, 8> LeafOrder; // Ensure deterministic leaf output order.
510
511#ifndef NDEBUG
512 SmallPtrSet<Value *, 8> Visited; // For checking the iteration scheme.
513#endif
514 while (!Worklist.empty()) {
515 std::pair<Instruction*, APInt> P = Worklist.pop_back_val();
516 I = P.first; // We examine the operands of this binary operator.
517
518 for (unsigned OpIdx = 0; OpIdx < I->getNumOperands(); ++OpIdx) { // Visit operands.
519 Value *Op = I->getOperand(OpIdx);
520 APInt Weight = P.second; // Number of paths to this operand.
521 LLVM_DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
522 assert(!Op->use_empty() && "No uses, so how did we get to it?!");
523
524 // If this is a binary operation of the right kind with only one use then
525 // add its operands to the expression.
527 assert(Visited.insert(Op).second && "Not first visit!");
528 LLVM_DEBUG(dbgs() << "DIRECT ADD: " << *Op << " (" << Weight << ")\n");
529 Worklist.push_back(std::make_pair(BO, Weight));
530 continue;
531 }
532
533 // Appears to be a leaf. Is the operand already in the set of leaves?
534 LeafMap::iterator It = Leaves.find(Op);
535 if (It == Leaves.end()) {
536 // Not in the leaf map. Must be the first time we saw this operand.
537 assert(Visited.insert(Op).second && "Not first visit!");
538 if (!Op->hasOneUse()) {
539 // This value has uses not accounted for by the expression, so it is
540 // not safe to modify. Mark it as being a leaf.
542 << "ADD USES LEAF: " << *Op << " (" << Weight << ")\n");
543 LeafOrder.push_back(Op);
544 Leaves[Op] = Weight;
545 continue;
546 }
547 // No uses outside the expression, try morphing it.
548 } else {
549 // Already in the leaf map.
550 assert(It != Leaves.end() && Visited.count(Op) &&
551 "In leaf map but not visited!");
552
553 // Update the number of paths to the leaf.
554 IncorporateWeight(It->second, Weight, Opcode);
555
556#if 0 // TODO: Re-enable once PR13021 is fixed.
557 // The leaf already has one use from inside the expression. As we want
558 // exactly one such use, drop this new use of the leaf.
559 assert(!Op->hasOneUse() && "Only one use, but we got here twice!");
560 I->setOperand(OpIdx, UndefValue::get(I->getType()));
561 Changed = true;
562
563 // If the leaf is a binary operation of the right kind and we now see
564 // that its multiple original uses were in fact all by nodes belonging
565 // to the expression, then no longer consider it to be a leaf and add
566 // its operands to the expression.
568 LLVM_DEBUG(dbgs() << "UNLEAF: " << *Op << " (" << It->second << ")\n");
569 Worklist.push_back(std::make_pair(BO, It->second));
570 Leaves.erase(It);
571 continue;
572 }
573#endif
574
575 // If we still have uses that are not accounted for by the expression
576 // then it is not safe to modify the value.
577 if (!Op->hasOneUse())
578 continue;
579
580 // No uses outside the expression, try morphing it.
581 Weight = It->second;
582 Leaves.erase(It); // Since the value may be morphed below.
583 }
584
585 // At this point we have a value which, first of all, is not a binary
586 // expression of the right kind, and secondly, is only used inside the
587 // expression. This means that it can safely be modified. See if we
588 // can usefully morph it into an expression of the right kind.
589 assert((!isa<Instruction>(Op) ||
590 cast<Instruction>(Op)->getOpcode() != Opcode
591 || (isa<FPMathOperator>(Op) &&
592 !hasFPAssociativeFlags(cast<Instruction>(Op)))) &&
593 "Should have been handled above!");
594 assert(Op->hasOneUse() && "Has uses outside the expression tree!");
595
596 // If this is a multiply expression, turn any internal negations into
597 // multiplies by -1 so they can be reassociated. Add any users of the
598 // newly created multiplication by -1 to the redo list, so any
599 // reassociation opportunities that are exposed will be reassociated
600 // further.
601 Instruction *Neg;
602 if (((Opcode == Instruction::Mul && match(Op, m_Neg(m_Value()))) ||
603 (Opcode == Instruction::FMul && match(Op, m_FNeg(m_Value())))) &&
604 match(Op, m_Instruction(Neg))) {
606 << "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
608 LLVM_DEBUG(dbgs() << *Mul << '\n');
609 Worklist.push_back(std::make_pair(Mul, Weight));
610 for (User *U : Mul->users()) {
611 if (BinaryOperator *UserBO = dyn_cast<BinaryOperator>(U))
612 ToRedo.insert(UserBO);
613 }
614 ToRedo.insert(Neg);
615 Changed = true;
616 continue;
617 }
618
619 // Failed to morph into an expression of the right type. This really is
620 // a leaf.
621 LLVM_DEBUG(dbgs() << "ADD LEAF: " << *Op << " (" << Weight << ")\n");
622 assert(!isReassociableOp(Op, Opcode) && "Value was morphed?");
623 LeafOrder.push_back(Op);
624 Leaves[Op] = Weight;
625 }
626 }
627
628 // The leaves, repeated according to their weights, represent the linearized
629 // form of the expression.
630 for (Value *V : LeafOrder) {
631 LeafMap::iterator It = Leaves.find(V);
632 if (It == Leaves.end())
633 // Node initially thought to be a leaf wasn't.
634 continue;
635 assert(!isReassociableOp(V, Opcode) && "Shouldn't be a leaf!");
636 APInt Weight = It->second;
637 if (Weight.isMinValue())
638 // Leaf already output or weight reduction eliminated it.
639 continue;
640 // Ensure the leaf is only output once.
641 It->second = 0;
642 Ops.push_back(std::make_pair(V, Weight));
643 }
644
645 // For nilpotent operations or addition there may be no operands, for example
646 // because the expression was "X xor X" or consisted of 2^Bitwidth additions:
647 // in both cases the weight reduces to 0 causing the value to be skipped.
648 if (Ops.empty()) {
649 Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType());
650 assert(Identity && "Associative operation without identity!");
651 Ops.emplace_back(Identity, APInt(Bitwidth, 1));
652 }
653
654 return Changed;
655}
656
657/// Now that the operands for this expression tree are
658/// linearized and optimized, emit them in-order.
659void ReassociatePass::RewriteExprTree(BinaryOperator *I,
661 assert(Ops.size() > 1 && "Single values should be used directly!");
662
663 // Since our optimizations should never increase the number of operations, the
664 // new expression can usually be written reusing the existing binary operators
665 // from the original expression tree, without creating any new instructions,
666 // though the rewritten expression may have a completely different topology.
667 // We take care to not change anything if the new expression will be the same
668 // as the original. If more than trivial changes (like commuting operands)
669 // were made then we are obliged to clear out any optional subclass data like
670 // nsw flags.
671
672 /// NodesToRewrite - Nodes from the original expression available for writing
673 /// the new expression into.
674 SmallVector<BinaryOperator*, 8> NodesToRewrite;
675 unsigned Opcode = I->getOpcode();
677
678 /// NotRewritable - The operands being written will be the leaves of the new
679 /// expression and must not be used as inner nodes (via NodesToRewrite) by
680 /// mistake. Inner nodes are always reassociable, and usually leaves are not
681 /// (if they were they would have been incorporated into the expression and so
682 /// would not be leaves), so most of the time there is no danger of this. But
683 /// in rare cases a leaf may become reassociable if an optimization kills uses
684 /// of it, or it may momentarily become reassociable during rewriting (below)
685 /// due it being removed as an operand of one of its uses. Ensure that misuse
686 /// of leaf nodes as inner nodes cannot occur by remembering all of the future
687 /// leaves and refusing to reuse any of them as inner nodes.
688 SmallPtrSet<Value*, 8> NotRewritable;
689 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
690 NotRewritable.insert(Ops[i].Op);
691
692 // ExpressionChangedStart - Non-null if the rewritten expression differs from
693 // the original in some non-trivial way, requiring the clearing of optional
694 // flags. Flags are cleared from the operator in ExpressionChangedStart up to
695 // ExpressionChangedEnd inclusive.
696 BinaryOperator *ExpressionChangedStart = nullptr,
697 *ExpressionChangedEnd = nullptr;
698 for (unsigned i = 0; ; ++i) {
699 // The last operation (which comes earliest in the IR) is special as both
700 // operands will come from Ops, rather than just one with the other being
701 // a subexpression.
702 if (i+2 == Ops.size()) {
703 Value *NewLHS = Ops[i].Op;
704 Value *NewRHS = Ops[i+1].Op;
705 Value *OldLHS = Op->getOperand(0);
706 Value *OldRHS = Op->getOperand(1);
707
708 if (NewLHS == OldLHS && NewRHS == OldRHS)
709 // Nothing changed, leave it alone.
710 break;
711
712 if (NewLHS == OldRHS && NewRHS == OldLHS) {
713 // The order of the operands was reversed. Swap them.
714 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
715 Op->swapOperands();
716 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
717 MadeChange = true;
718 ++NumChanged;
719 break;
720 }
721
722 // The new operation differs non-trivially from the original. Overwrite
723 // the old operands with the new ones.
724 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
725 if (NewLHS != OldLHS) {
727 if (BO && !NotRewritable.count(BO))
728 NodesToRewrite.push_back(BO);
729 Op->setOperand(0, NewLHS);
730 }
731 if (NewRHS != OldRHS) {
733 if (BO && !NotRewritable.count(BO))
734 NodesToRewrite.push_back(BO);
735 Op->setOperand(1, NewRHS);
736 }
737 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
738
739 ExpressionChangedStart = Op;
740 if (!ExpressionChangedEnd)
741 ExpressionChangedEnd = Op;
742 MadeChange = true;
743 ++NumChanged;
744
745 break;
746 }
747
748 // Not the last operation. The left-hand side will be a sub-expression
749 // while the right-hand side will be the current element of Ops.
750 Value *NewRHS = Ops[i].Op;
751 if (NewRHS != Op->getOperand(1)) {
752 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
753 if (NewRHS == Op->getOperand(0)) {
754 // The new right-hand side was already present as the left operand. If
755 // we are lucky then swapping the operands will sort out both of them.
756 Op->swapOperands();
757 } else {
758 // Overwrite with the new right-hand side.
759 BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode);
760 if (BO && !NotRewritable.count(BO))
761 NodesToRewrite.push_back(BO);
762 Op->setOperand(1, NewRHS);
763 ExpressionChangedStart = Op;
764 if (!ExpressionChangedEnd)
765 ExpressionChangedEnd = Op;
766 }
767 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
768 MadeChange = true;
769 ++NumChanged;
770 }
771
772 // Now deal with the left-hand side. If this is already an operation node
773 // from the original expression then just rewrite the rest of the expression
774 // into it.
775 BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode);
776 if (BO && !NotRewritable.count(BO)) {
777 Op = BO;
778 continue;
779 }
780
781 // Otherwise, grab a spare node from the original expression and use that as
782 // the left-hand side. If there are no nodes left then the optimizers made
783 // an expression with more nodes than the original! This usually means that
784 // they did something stupid but it might mean that the problem was just too
785 // hard (finding the mimimal number of multiplications needed to realize a
786 // multiplication expression is NP-complete). Whatever the reason, smart or
787 // stupid, create a new node if there are none left.
788 BinaryOperator *NewOp;
789 if (NodesToRewrite.empty()) {
790 Constant *Undef = UndefValue::get(I->getType());
792 Undef, Undef, "", I);
793 if (isa<FPMathOperator>(NewOp))
794 NewOp->setFastMathFlags(I->getFastMathFlags());
795 } else {
796 NewOp = NodesToRewrite.pop_back_val();
797 }
798
799 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
800 Op->setOperand(0, NewOp);
801 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
802 ExpressionChangedStart = Op;
803 if (!ExpressionChangedEnd)
804 ExpressionChangedEnd = Op;
805 MadeChange = true;
806 ++NumChanged;
807 Op = NewOp;
808 }
809
810 // If the expression changed non-trivially then clear out all subclass data
811 // starting from the operator specified in ExpressionChanged, and compactify
812 // the operators to just before the expression root to guarantee that the
813 // expression tree is dominated by all of Ops.
814 if (ExpressionChangedStart) {
815 bool ClearFlags = true;
816 do {
817 // Preserve FastMathFlags.
818 if (ClearFlags) {
819 if (isa<FPMathOperator>(I)) {
820 FastMathFlags Flags = I->getFastMathFlags();
821 ExpressionChangedStart->clearSubclassOptionalData();
822 ExpressionChangedStart->setFastMathFlags(Flags);
823 } else
824 ExpressionChangedStart->clearSubclassOptionalData();
825 }
826
827 if (ExpressionChangedStart == ExpressionChangedEnd)
828 ClearFlags = false;
829 if (ExpressionChangedStart == I)
830 break;
831
832 // Discard any debug info related to the expressions that has changed (we
833 // can leave debug info related to the root and any operation that didn't
834 // change, since the result of the expression tree should be the same
835 // even after reassociation).
836 if (ClearFlags)
837 replaceDbgUsesWithUndef(ExpressionChangedStart);
838
839 ExpressionChangedStart->moveBefore(I);
840 ExpressionChangedStart =
841 cast<BinaryOperator>(*ExpressionChangedStart->user_begin());
842 } while (true);
843 }
844
845 // Throw away any left over nodes from the original expression.
846 for (unsigned i = 0, e = NodesToRewrite.size(); i != e; ++i)
847 RedoInsts.insert(NodesToRewrite[i]);
848}
849
850/// Insert instructions before the instruction pointed to by BI,
851/// that computes the negative version of the value specified. The negative
852/// version of the value is returned, and BI is left pointing at the instruction
853/// that should be processed next by the reassociation pass.
854/// Also add intermediate instructions to the redo list that are modified while
855/// pushing the negates through adds. These will be revisited to see if
856/// additional opportunities have been exposed.
859 if (auto *C = dyn_cast<Constant>(V)) {
860 const DataLayout &DL = BI->getModule()->getDataLayout();
861 Constant *Res = C->getType()->isFPOrFPVectorTy()
862 ? ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL)
864 if (Res)
865 return Res;
866 }
867
868 // We are trying to expose opportunity for reassociation. One of the things
869 // that we want to do to achieve this is to push a negation as deep into an
870 // expression chain as possible, to expose the add instructions. In practice,
871 // this means that we turn this:
872 // X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D
873 // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate
874 // the constants. We assume that instcombine will clean up the mess later if
875 // we introduce tons of unnecessary negation instructions.
876 //
877 if (BinaryOperator *I =
878 isReassociableOp(V, Instruction::Add, Instruction::FAdd)) {
879 // Push the negates through the add.
880 I->setOperand(0, NegateValue(I->getOperand(0), BI, ToRedo));
881 I->setOperand(1, NegateValue(I->getOperand(1), BI, ToRedo));
882 if (I->getOpcode() == Instruction::Add) {
883 I->setHasNoUnsignedWrap(false);
884 I->setHasNoSignedWrap(false);
885 }
886
887 // We must move the add instruction here, because the neg instructions do
888 // not dominate the old add instruction in general. By moving it, we are
889 // assured that the neg instructions we just inserted dominate the
890 // instruction we are about to insert after them.
891 //
892 I->moveBefore(BI);
893 I->setName(I->getName()+".neg");
894
895 // Add the intermediate negates to the redo list as processing them later
896 // could expose more reassociating opportunities.
897 ToRedo.insert(I);
898 return I;
899 }
900
901 // Okay, we need to materialize a negated version of V with an instruction.
902 // Scan the use lists of V to see if we have one already.
903 for (User *U : V->users()) {
904 if (!match(U, m_Neg(m_Value())) && !match(U, m_FNeg(m_Value())))
905 continue;
906
907 // We found one! Now we have to make sure that the definition dominates
908 // this use. We do this by moving it to the entry block (if it is a
909 // non-instruction value) or right after the definition. These negates will
910 // be zapped by reassociate later, so we don't need much finesse here.
911 Instruction *TheNeg = dyn_cast<Instruction>(U);
912
913 // We can't safely propagate a vector zero constant with poison/undef lanes.
914 Constant *C;
915 if (match(TheNeg, m_BinOp(m_Constant(C), m_Value())) &&
916 C->containsUndefOrPoisonElement())
917 continue;
918
919 // Verify that the negate is in this function, V might be a constant expr.
920 if (!TheNeg ||
921 TheNeg->getParent()->getParent() != BI->getParent()->getParent())
922 continue;
923
924 BasicBlock::iterator InsertPt;
925 if (Instruction *InstInput = dyn_cast<Instruction>(V)) {
926 auto InsertPtOpt = InstInput->getInsertionPointAfterDef();
927 if (!InsertPtOpt)
928 continue;
929 InsertPt = *InsertPtOpt;
930 } else {
931 InsertPt = TheNeg->getFunction()
932 ->getEntryBlock()
934 ->getIterator();
935 }
936
937 TheNeg->moveBefore(*InsertPt->getParent(), InsertPt);
938 if (TheNeg->getOpcode() == Instruction::Sub) {
939 TheNeg->setHasNoUnsignedWrap(false);
940 TheNeg->setHasNoSignedWrap(false);
941 } else {
942 TheNeg->andIRFlags(BI);
943 }
944 ToRedo.insert(TheNeg);
945 return TheNeg;
946 }
947
948 // Insert a 'neg' instruction that subtracts the value from zero to get the
949 // negation.
950 Instruction *NewNeg = CreateNeg(V, V->getName() + ".neg", BI, BI);
951 ToRedo.insert(NewNeg);
952 return NewNeg;
953}
954
955// See if this `or` looks like an load widening reduction, i.e. that it
956// consists of an `or`/`shl`/`zext`/`load` nodes only. Note that we don't
957// ensure that the pattern is *really* a load widening reduction,
958// we do not ensure that it can really be replaced with a widened load,
959// only that it mostly looks like one.
963
964 auto Enqueue = [&](Value *V) {
965 auto *I = dyn_cast<Instruction>(V);
966 // Each node of an `or` reduction must be an instruction,
967 if (!I)
968 return false; // Node is certainly not part of an `or` load reduction.
969 // Only process instructions we have never processed before.
970 if (Visited.insert(I).second)
971 Worklist.emplace_back(I);
972 return true; // Will need to look at parent nodes.
973 };
974
975 if (!Enqueue(Or))
976 return false; // Not an `or` reduction pattern.
977
978 while (!Worklist.empty()) {
979 auto *I = Worklist.pop_back_val();
980
981 // Okay, which instruction is this node?
982 switch (I->getOpcode()) {
983 case Instruction::Or:
984 // Got an `or` node. That's fine, just recurse into it's operands.
985 for (Value *Op : I->operands())
986 if (!Enqueue(Op))
987 return false; // Not an `or` reduction pattern.
988 continue;
989
990 case Instruction::Shl:
991 case Instruction::ZExt:
992 // `shl`/`zext` nodes are fine, just recurse into their base operand.
993 if (!Enqueue(I->getOperand(0)))
994 return false; // Not an `or` reduction pattern.
995 continue;
996
997 case Instruction::Load:
998 // Perfect, `load` node means we've reached an edge of the graph.
999 continue;
1000
1001 default: // Unknown node.
1002 return false; // Not an `or` reduction pattern.
1003 }
1004 }
1005
1006 return true;
1007}
1008
1009/// Return true if it may be profitable to convert this (X|Y) into (X+Y).
1011 // Don't bother to convert this up unless either the LHS is an associable add
1012 // or subtract or mul or if this is only used by one of the above.
1013 // This is only a compile-time improvement, it is not needed for correctness!
1014 auto isInteresting = [](Value *V) {
1015 for (auto Op : {Instruction::Add, Instruction::Sub, Instruction::Mul,
1016 Instruction::Shl})
1017 if (isReassociableOp(V, Op))
1018 return true;
1019 return false;
1020 };
1021
1022 if (any_of(Or->operands(), isInteresting))
1023 return true;
1024
1025 Value *VB = Or->user_back();
1026 if (Or->hasOneUse() && isInteresting(VB))
1027 return true;
1028
1029 return false;
1030}
1031
1032/// If we have (X|Y), and iff X and Y have no common bits set,
1033/// transform this into (X+Y) to allow arithmetics reassociation.
1035 // Convert an or into an add.
1036 BinaryOperator *New =
1037 CreateAdd(Or->getOperand(0), Or->getOperand(1), "", Or, Or);
1038 New->setHasNoSignedWrap();
1039 New->setHasNoUnsignedWrap();
1040 New->takeName(Or);
1041
1042 // Everyone now refers to the add instruction.
1043 Or->replaceAllUsesWith(New);
1044 New->setDebugLoc(Or->getDebugLoc());
1045
1046 LLVM_DEBUG(dbgs() << "Converted or into an add: " << *New << '\n');
1047 return New;
1048}
1049
1050/// Return true if we should break up this subtract of X-Y into (X + -Y).
1052 // If this is a negation, we can't split it up!
1053 if (match(Sub, m_Neg(m_Value())) || match(Sub, m_FNeg(m_Value())))
1054 return false;
1055
1056 // Don't breakup X - undef.
1057 if (isa<UndefValue>(Sub->getOperand(1)))
1058 return false;
1059
1060 // Don't bother to break this up unless either the LHS is an associable add or
1061 // subtract or if this is only used by one.
1062 Value *V0 = Sub->getOperand(0);
1063 if (isReassociableOp(V0, Instruction::Add, Instruction::FAdd) ||
1064 isReassociableOp(V0, Instruction::Sub, Instruction::FSub))
1065 return true;
1066 Value *V1 = Sub->getOperand(1);
1067 if (isReassociableOp(V1, Instruction::Add, Instruction::FAdd) ||
1068 isReassociableOp(V1, Instruction::Sub, Instruction::FSub))
1069 return true;
1070 Value *VB = Sub->user_back();
1071 if (Sub->hasOneUse() &&
1072 (isReassociableOp(VB, Instruction::Add, Instruction::FAdd) ||
1073 isReassociableOp(VB, Instruction::Sub, Instruction::FSub)))
1074 return true;
1075
1076 return false;
1077}
1078
1079/// If we have (X-Y), and if either X is an add, or if this is only used by an
1080/// add, transform this into (X+(0-Y)) to promote better reassociation.
1083 // Convert a subtract into an add and a neg instruction. This allows sub
1084 // instructions to be commuted with other add instructions.
1085 //
1086 // Calculate the negative value of Operand 1 of the sub instruction,
1087 // and set it as the RHS of the add instruction we just made.
1088 Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo);
1089 BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub, Sub);
1090 Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op.
1091 Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op.
1092 New->takeName(Sub);
1093
1094 // Everyone now refers to the add instruction.
1095 Sub->replaceAllUsesWith(New);
1096 New->setDebugLoc(Sub->getDebugLoc());
1097
1098 LLVM_DEBUG(dbgs() << "Negated: " << *New << '\n');
1099 return New;
1100}
1101
1102/// If this is a shift of a reassociable multiply or is used by one, change
1103/// this into a multiply by a constant to assist with further reassociation.
1105 Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
1106 auto *SA = cast<ConstantInt>(Shl->getOperand(1));
1107 MulCst = ConstantExpr::getShl(MulCst, SA);
1108
1110 BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl);
1111 Shl->setOperand(0, PoisonValue::get(Shl->getType())); // Drop use of op.
1112 Mul->takeName(Shl);
1113
1114 // Everyone now refers to the mul instruction.
1115 Shl->replaceAllUsesWith(Mul);
1116 Mul->setDebugLoc(Shl->getDebugLoc());
1117
1118 // We can safely preserve the nuw flag in all cases. It's also safe to turn a
1119 // nuw nsw shl into a nuw nsw mul. However, nsw in isolation requires special
1120 // handling. It can be preserved as long as we're not left shifting by
1121 // bitwidth - 1.
1122 bool NSW = cast<BinaryOperator>(Shl)->hasNoSignedWrap();
1123 bool NUW = cast<BinaryOperator>(Shl)->hasNoUnsignedWrap();
1124 unsigned BitWidth = Shl->getType()->getIntegerBitWidth();
1125 if (NSW && (NUW || SA->getValue().ult(BitWidth - 1)))
1126 Mul->setHasNoSignedWrap(true);
1127 Mul->setHasNoUnsignedWrap(NUW);
1128 return Mul;
1129}
1130
1131/// Scan backwards and forwards among values with the same rank as element i
1132/// to see if X exists. If X does not exist, return i. This is useful when
1133/// scanning for 'x' when we see '-x' because they both get the same rank.
1135 unsigned i, Value *X) {
1136 unsigned XRank = Ops[i].Rank;
1137 unsigned e = Ops.size();
1138 for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j) {
1139 if (Ops[j].Op == X)
1140 return j;
1141 if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op))
1142 if (Instruction *I2 = dyn_cast<Instruction>(X))
1143 if (I1->isIdenticalTo(I2))
1144 return j;
1145 }
1146 // Scan backwards.
1147 for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j) {
1148 if (Ops[j].Op == X)
1149 return j;
1150 if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op))
1151 if (Instruction *I2 = dyn_cast<Instruction>(X))
1152 if (I1->isIdenticalTo(I2))
1153 return j;
1154 }
1155 return i;
1156}
1157
1158/// Emit a tree of add instructions, summing Ops together
1159/// and returning the result. Insert the tree before I.
1162 if (Ops.size() == 1) return Ops.back();
1163
1164 Value *V1 = Ops.pop_back_val();
1165 Value *V2 = EmitAddTreeOfValues(I, Ops);
1166 return CreateAdd(V2, V1, "reass.add", I, I);
1167}
1168
1169/// If V is an expression tree that is a multiplication sequence,
1170/// and if this sequence contains a multiply by Factor,
1171/// remove Factor from the tree and return the new tree.
1172Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) {
1173 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
1174 if (!BO)
1175 return nullptr;
1176
1178 MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts);
1180 Factors.reserve(Tree.size());
1181 for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
1182 RepeatedValue E = Tree[i];
1183 Factors.append(E.second.getZExtValue(),
1184 ValueEntry(getRank(E.first), E.first));
1185 }
1186
1187 bool FoundFactor = false;
1188 bool NeedsNegate = false;
1189 for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
1190 if (Factors[i].Op == Factor) {
1191 FoundFactor = true;
1192 Factors.erase(Factors.begin()+i);
1193 break;
1194 }
1195
1196 // If this is a negative version of this factor, remove it.
1197 if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor)) {
1198 if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op))
1199 if (FC1->getValue() == -FC2->getValue()) {
1200 FoundFactor = NeedsNegate = true;
1201 Factors.erase(Factors.begin()+i);
1202 break;
1203 }
1204 } else if (ConstantFP *FC1 = dyn_cast<ConstantFP>(Factor)) {
1205 if (ConstantFP *FC2 = dyn_cast<ConstantFP>(Factors[i].Op)) {
1206 const APFloat &F1 = FC1->getValueAPF();
1207 APFloat F2(FC2->getValueAPF());
1208 F2.changeSign();
1209 if (F1 == F2) {
1210 FoundFactor = NeedsNegate = true;
1211 Factors.erase(Factors.begin() + i);
1212 break;
1213 }
1214 }
1215 }
1216 }
1217
1218 if (!FoundFactor) {
1219 // Make sure to restore the operands to the expression tree.
1220 RewriteExprTree(BO, Factors);
1221 return nullptr;
1222 }
1223
1224 BasicBlock::iterator InsertPt = ++BO->getIterator();
1225
1226 // If this was just a single multiply, remove the multiply and return the only
1227 // remaining operand.
1228 if (Factors.size() == 1) {
1229 RedoInsts.insert(BO);
1230 V = Factors[0].Op;
1231 } else {
1232 RewriteExprTree(BO, Factors);
1233 V = BO;
1234 }
1235
1236 if (NeedsNegate)
1237 V = CreateNeg(V, "neg", &*InsertPt, BO);
1238
1239 return V;
1240}
1241
1242/// If V is a single-use multiply, recursively add its operands as factors,
1243/// otherwise add V to the list of factors.
1244///
1245/// Ops is the top-level list of add operands we're trying to factor.
1247 SmallVectorImpl<Value*> &Factors) {
1248 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
1249 if (!BO) {
1250 Factors.push_back(V);
1251 return;
1252 }
1253
1254 // Otherwise, add the LHS and RHS to the list of factors.
1257}
1258
1259/// Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
1260/// This optimizes based on identities. If it can be reduced to a single Value,
1261/// it is returned, otherwise the Ops list is mutated as necessary.
1264 // Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
1265 // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1.
1266 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1267 // First, check for X and ~X in the operand list.
1268 assert(i < Ops.size());
1269 Value *X;
1270 if (match(Ops[i].Op, m_Not(m_Value(X)))) { // Cannot occur for ^.
1271 unsigned FoundX = FindInOperandList(Ops, i, X);
1272 if (FoundX != i) {
1273 if (Opcode == Instruction::And) // ...&X&~X = 0
1274 return Constant::getNullValue(X->getType());
1275
1276 if (Opcode == Instruction::Or) // ...|X|~X = -1
1277 return Constant::getAllOnesValue(X->getType());
1278 }
1279 }
1280
1281 // Next, check for duplicate pairs of values, which we assume are next to
1282 // each other, due to our sorting criteria.
1283 assert(i < Ops.size());
1284 if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) {
1285 if (Opcode == Instruction::And || Opcode == Instruction::Or) {
1286 // Drop duplicate values for And and Or.
1287 Ops.erase(Ops.begin()+i);
1288 --i; --e;
1289 ++NumAnnihil;
1290 continue;
1291 }
1292
1293 // Drop pairs of values for Xor.
1294 assert(Opcode == Instruction::Xor);
1295 if (e == 2)
1296 return Constant::getNullValue(Ops[0].Op->getType());
1297
1298 // Y ^ X^X -> Y
1299 Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
1300 i -= 1; e -= 2;
1301 ++NumAnnihil;
1302 }
1303 }
1304 return nullptr;
1305}
1306
1307/// Helper function of CombineXorOpnd(). It creates a bitwise-and
1308/// instruction with the given two operands, and return the resulting
1309/// instruction. There are two special cases: 1) if the constant operand is 0,
1310/// it will return NULL. 2) if the constant is ~0, the symbolic operand will
1311/// be returned.
1312static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
1313 const APInt &ConstOpnd) {
1314 if (ConstOpnd.isZero())
1315 return nullptr;
1316
1317 if (ConstOpnd.isAllOnes())
1318 return Opnd;
1319
1320 Instruction *I = BinaryOperator::CreateAnd(
1321 Opnd, ConstantInt::get(Opnd->getType(), ConstOpnd), "and.ra",
1322 InsertBefore);
1323 I->setDebugLoc(InsertBefore->getDebugLoc());
1324 return I;
1325}
1326
1327// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
1328// into "R ^ C", where C would be 0, and R is a symbolic value.
1329//
1330// If it was successful, true is returned, and the "R" and "C" is returned
1331// via "Res" and "ConstOpnd", respectively; otherwise, false is returned,
1332// and both "Res" and "ConstOpnd" remain unchanged.
1333bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
1334 APInt &ConstOpnd, Value *&Res) {
1335 // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2
1336 // = ((x | c1) ^ c1) ^ (c1 ^ c2)
1337 // = (x & ~c1) ^ (c1 ^ c2)
1338 // It is useful only when c1 == c2.
1339 if (!Opnd1->isOrExpr() || Opnd1->getConstPart().isZero())
1340 return false;
1341
1342 if (!Opnd1->getValue()->hasOneUse())
1343 return false;
1344
1345 const APInt &C1 = Opnd1->getConstPart();
1346 if (C1 != ConstOpnd)
1347 return false;
1348
1349 Value *X = Opnd1->getSymbolicPart();
1350 Res = createAndInstr(I, X, ~C1);
1351 // ConstOpnd was C2, now C1 ^ C2.
1352 ConstOpnd ^= C1;
1353
1354 if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
1355 RedoInsts.insert(T);
1356 return true;
1357}
1358
1359// Helper function of OptimizeXor(). It tries to simplify
1360// "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a
1361// symbolic value.
1362//
1363// If it was successful, true is returned, and the "R" and "C" is returned
1364// via "Res" and "ConstOpnd", respectively (If the entire expression is
1365// evaluated to a constant, the Res is set to NULL); otherwise, false is
1366// returned, and both "Res" and "ConstOpnd" remain unchanged.
1367bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
1368 XorOpnd *Opnd2, APInt &ConstOpnd,
1369 Value *&Res) {
1370 Value *X = Opnd1->getSymbolicPart();
1371 if (X != Opnd2->getSymbolicPart())
1372 return false;
1373
1374 // This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.)
1375 int DeadInstNum = 1;
1376 if (Opnd1->getValue()->hasOneUse())
1377 DeadInstNum++;
1378 if (Opnd2->getValue()->hasOneUse())
1379 DeadInstNum++;
1380
1381 // Xor-Rule 2:
1382 // (x | c1) ^ (x & c2)
1383 // = (x|c1) ^ (x&c2) ^ (c1 ^ c1) = ((x|c1) ^ c1) ^ (x & c2) ^ c1
1384 // = (x & ~c1) ^ (x & c2) ^ c1 // Xor-Rule 1
1385 // = (x & c3) ^ c1, where c3 = ~c1 ^ c2 // Xor-rule 3
1386 //
1387 if (Opnd1->isOrExpr() != Opnd2->isOrExpr()) {
1388 if (Opnd2->isOrExpr())
1389 std::swap(Opnd1, Opnd2);
1390
1391 const APInt &C1 = Opnd1->getConstPart();
1392 const APInt &C2 = Opnd2->getConstPart();
1393 APInt C3((~C1) ^ C2);
1394
1395 // Do not increase code size!
1396 if (!C3.isZero() && !C3.isAllOnes()) {
1397 int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
1398 if (NewInstNum > DeadInstNum)
1399 return false;
1400 }
1401
1402 Res = createAndInstr(I, X, C3);
1403 ConstOpnd ^= C1;
1404 } else if (Opnd1->isOrExpr()) {
1405 // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
1406 //
1407 const APInt &C1 = Opnd1->getConstPart();
1408 const APInt &C2 = Opnd2->getConstPart();
1409 APInt C3 = C1 ^ C2;
1410
1411 // Do not increase code size
1412 if (!C3.isZero() && !C3.isAllOnes()) {
1413 int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
1414 if (NewInstNum > DeadInstNum)
1415 return false;
1416 }
1417
1418 Res = createAndInstr(I, X, C3);
1419 ConstOpnd ^= C3;
1420 } else {
1421 // Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
1422 //
1423 const APInt &C1 = Opnd1->getConstPart();
1424 const APInt &C2 = Opnd2->getConstPart();
1425 APInt C3 = C1 ^ C2;
1426 Res = createAndInstr(I, X, C3);
1427 }
1428
1429 // Put the original operands in the Redo list; hope they will be deleted
1430 // as dead code.
1431 if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
1432 RedoInsts.insert(T);
1433 if (Instruction *T = dyn_cast<Instruction>(Opnd2->getValue()))
1434 RedoInsts.insert(T);
1435
1436 return true;
1437}
1438
1439/// Optimize a series of operands to an 'xor' instruction. If it can be reduced
1440/// to a single Value, it is returned, otherwise the Ops list is mutated as
1441/// necessary.
1442Value *ReassociatePass::OptimizeXor(Instruction *I,
1444 if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops))
1445 return V;
1446
1447 if (Ops.size() == 1)
1448 return nullptr;
1449
1451 SmallVector<XorOpnd*, 8> OpndPtrs;
1452 Type *Ty = Ops[0].Op->getType();
1453 APInt ConstOpnd(Ty->getScalarSizeInBits(), 0);
1454
1455 // Step 1: Convert ValueEntry to XorOpnd
1456 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1457 Value *V = Ops[i].Op;
1458 const APInt *C;
1459 // TODO: Support non-splat vectors.
1460 if (match(V, m_APInt(C))) {
1461 ConstOpnd ^= *C;
1462 } else {
1463 XorOpnd O(V);
1464 O.setSymbolicRank(getRank(O.getSymbolicPart()));
1465 Opnds.push_back(O);
1466 }
1467 }
1468
1469 // NOTE: From this point on, do *NOT* add/delete element to/from "Opnds".
1470 // It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate
1471 // the "OpndPtrs" as well. For the similar reason, do not fuse this loop
1472 // with the previous loop --- the iterator of the "Opnds" may be invalidated
1473 // when new elements are added to the vector.
1474 for (unsigned i = 0, e = Opnds.size(); i != e; ++i)
1475 OpndPtrs.push_back(&Opnds[i]);
1476
1477 // Step 2: Sort the Xor-Operands in a way such that the operands containing
1478 // the same symbolic value cluster together. For instance, the input operand
1479 // sequence ("x | 123", "y & 456", "x & 789") will be sorted into:
1480 // ("x | 123", "x & 789", "y & 456").
1481 //
1482 // The purpose is twofold:
1483 // 1) Cluster together the operands sharing the same symbolic-value.
1484 // 2) Operand having smaller symbolic-value-rank is permuted earlier, which
1485 // could potentially shorten crital path, and expose more loop-invariants.
1486 // Note that values' rank are basically defined in RPO order (FIXME).
1487 // So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
1488 // than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
1489 // "z" in the order of X-Y-Z is better than any other orders.
1490 llvm::stable_sort(OpndPtrs, [](XorOpnd *LHS, XorOpnd *RHS) {
1491 return LHS->getSymbolicRank() < RHS->getSymbolicRank();
1492 });
1493
1494 // Step 3: Combine adjacent operands
1495 XorOpnd *PrevOpnd = nullptr;
1496 bool Changed = false;
1497 for (unsigned i = 0, e = Opnds.size(); i < e; i++) {
1498 XorOpnd *CurrOpnd = OpndPtrs[i];
1499 // The combined value
1500 Value *CV;
1501
1502 // Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
1503 if (!ConstOpnd.isZero() && CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) {
1504 Changed = true;
1505 if (CV)
1506 *CurrOpnd = XorOpnd(CV);
1507 else {
1508 CurrOpnd->Invalidate();
1509 continue;
1510 }
1511 }
1512
1513 if (!PrevOpnd || CurrOpnd->getSymbolicPart() != PrevOpnd->getSymbolicPart()) {
1514 PrevOpnd = CurrOpnd;
1515 continue;
1516 }
1517
1518 // step 3.2: When previous and current operands share the same symbolic
1519 // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"
1520 if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) {
1521 // Remove previous operand
1522 PrevOpnd->Invalidate();
1523 if (CV) {
1524 *CurrOpnd = XorOpnd(CV);
1525 PrevOpnd = CurrOpnd;
1526 } else {
1527 CurrOpnd->Invalidate();
1528 PrevOpnd = nullptr;
1529 }
1530 Changed = true;
1531 }
1532 }
1533
1534 // Step 4: Reassemble the Ops
1535 if (Changed) {
1536 Ops.clear();
1537 for (const XorOpnd &O : Opnds) {
1538 if (O.isInvalid())
1539 continue;
1540 ValueEntry VE(getRank(O.getValue()), O.getValue());
1541 Ops.push_back(VE);
1542 }
1543 if (!ConstOpnd.isZero()) {
1544 Value *C = ConstantInt::get(Ty, ConstOpnd);
1545 ValueEntry VE(getRank(C), C);
1546 Ops.push_back(VE);
1547 }
1548 unsigned Sz = Ops.size();
1549 if (Sz == 1)
1550 return Ops.back().Op;
1551 if (Sz == 0) {
1552 assert(ConstOpnd.isZero());
1553 return ConstantInt::get(Ty, ConstOpnd);
1554 }
1555 }
1556
1557 return nullptr;
1558}
1559
1560/// Optimize a series of operands to an 'add' instruction. This
1561/// optimizes based on identities. If it can be reduced to a single Value, it
1562/// is returned, otherwise the Ops list is mutated as necessary.
1563Value *ReassociatePass::OptimizeAdd(Instruction *I,
1565 // Scan the operand lists looking for X and -X pairs. If we find any, we
1566 // can simplify expressions like X+-X == 0 and X+~X ==-1. While we're at it,
1567 // scan for any
1568 // duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z.
1569
1570 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1571 Value *TheOp = Ops[i].Op;
1572 // Check to see if we've seen this operand before. If so, we factor all
1573 // instances of the operand together. Due to our sorting criteria, we know
1574 // that these need to be next to each other in the vector.
1575 if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) {
1576 // Rescan the list, remove all instances of this operand from the expr.
1577 unsigned NumFound = 0;
1578 do {
1579 Ops.erase(Ops.begin()+i);
1580 ++NumFound;
1581 } while (i != Ops.size() && Ops[i].Op == TheOp);
1582
1583 LLVM_DEBUG(dbgs() << "\nFACTORING [" << NumFound << "]: " << *TheOp
1584 << '\n');
1585 ++NumFactor;
1586
1587 // Insert a new multiply.
1588 Type *Ty = TheOp->getType();
1589 Constant *C = Ty->isIntOrIntVectorTy() ?
1590 ConstantInt::get(Ty, NumFound) : ConstantFP::get(Ty, NumFound);
1591 Instruction *Mul = CreateMul(TheOp, C, "factor", I, I);
1592
1593 // Now that we have inserted a multiply, optimize it. This allows us to
1594 // handle cases that require multiple factoring steps, such as this:
1595 // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
1596 RedoInsts.insert(Mul);
1597
1598 // If every add operand was a duplicate, return the multiply.
1599 if (Ops.empty())
1600 return Mul;
1601
1602 // Otherwise, we had some input that didn't have the dupe, such as
1603 // "A + A + B" -> "A*2 + B". Add the new multiply to the list of
1604 // things being added by this operation.
1605 Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul));
1606
1607 --i;
1608 e = Ops.size();
1609 continue;
1610 }
1611
1612 // Check for X and -X or X and ~X in the operand list.
1613 Value *X;
1614 if (!match(TheOp, m_Neg(m_Value(X))) && !match(TheOp, m_Not(m_Value(X))) &&
1615 !match(TheOp, m_FNeg(m_Value(X))))
1616 continue;
1617
1618 unsigned FoundX = FindInOperandList(Ops, i, X);
1619 if (FoundX == i)
1620 continue;
1621
1622 // Remove X and -X from the operand list.
1623 if (Ops.size() == 2 &&
1624 (match(TheOp, m_Neg(m_Value())) || match(TheOp, m_FNeg(m_Value()))))
1625 return Constant::getNullValue(X->getType());
1626
1627 // Remove X and ~X from the operand list.
1628 if (Ops.size() == 2 && match(TheOp, m_Not(m_Value())))
1629 return Constant::getAllOnesValue(X->getType());
1630
1631 Ops.erase(Ops.begin()+i);
1632 if (i < FoundX)
1633 --FoundX;
1634 else
1635 --i; // Need to back up an extra one.
1636 Ops.erase(Ops.begin()+FoundX);
1637 ++NumAnnihil;
1638 --i; // Revisit element.
1639 e -= 2; // Removed two elements.
1640
1641 // if X and ~X we append -1 to the operand list.
1642 if (match(TheOp, m_Not(m_Value()))) {
1643 Value *V = Constant::getAllOnesValue(X->getType());
1644 Ops.insert(Ops.end(), ValueEntry(getRank(V), V));
1645 e += 1;
1646 }
1647 }
1648
1649 // Scan the operand list, checking to see if there are any common factors
1650 // between operands. Consider something like A*A+A*B*C+D. We would like to
1651 // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies.
1652 // To efficiently find this, we count the number of times a factor occurs
1653 // for any ADD operands that are MULs.
1654 DenseMap<Value*, unsigned> FactorOccurrences;
1655
1656 // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4)
1657 // where they are actually the same multiply.
1658 unsigned MaxOcc = 0;
1659 Value *MaxOccVal = nullptr;
1660 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1661 BinaryOperator *BOp =
1662 isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
1663 if (!BOp)
1664 continue;
1665
1666 // Compute all of the factors of this added value.
1667 SmallVector<Value*, 8> Factors;
1668 FindSingleUseMultiplyFactors(BOp, Factors);
1669 assert(Factors.size() > 1 && "Bad linearize!");
1670
1671 // Add one to FactorOccurrences for each unique factor in this op.
1672 SmallPtrSet<Value*, 8> Duplicates;
1673 for (Value *Factor : Factors) {
1674 if (!Duplicates.insert(Factor).second)
1675 continue;
1676
1677 unsigned Occ = ++FactorOccurrences[Factor];
1678 if (Occ > MaxOcc) {
1679 MaxOcc = Occ;
1680 MaxOccVal = Factor;
1681 }
1682
1683 // If Factor is a negative constant, add the negated value as a factor
1684 // because we can percolate the negate out. Watch for minint, which
1685 // cannot be positivified.
1686 if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) {
1687 if (CI->isNegative() && !CI->isMinValue(true)) {
1688 Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
1689 if (!Duplicates.insert(Factor).second)
1690 continue;
1691 unsigned Occ = ++FactorOccurrences[Factor];
1692 if (Occ > MaxOcc) {
1693 MaxOcc = Occ;
1694 MaxOccVal = Factor;
1695 }
1696 }
1697 } else if (ConstantFP *CF = dyn_cast<ConstantFP>(Factor)) {
1698 if (CF->isNegative()) {
1699 APFloat F(CF->getValueAPF());
1700 F.changeSign();
1701 Factor = ConstantFP::get(CF->getContext(), F);
1702 if (!Duplicates.insert(Factor).second)
1703 continue;
1704 unsigned Occ = ++FactorOccurrences[Factor];
1705 if (Occ > MaxOcc) {
1706 MaxOcc = Occ;
1707 MaxOccVal = Factor;
1708 }
1709 }
1710 }
1711 }
1712 }
1713
1714 // If any factor occurred more than one time, we can pull it out.
1715 if (MaxOcc > 1) {
1716 LLVM_DEBUG(dbgs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal
1717 << '\n');
1718 ++NumFactor;
1719
1720 // Create a new instruction that uses the MaxOccVal twice. If we don't do
1721 // this, we could otherwise run into situations where removing a factor
1722 // from an expression will drop a use of maxocc, and this can cause
1723 // RemoveFactorFromExpression on successive values to behave differently.
1724 Instruction *DummyInst =
1725 I->getType()->isIntOrIntVectorTy()
1726 ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal)
1727 : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal);
1728
1730 for (unsigned i = 0; i != Ops.size(); ++i) {
1731 // Only try to remove factors from expressions we're allowed to.
1732 BinaryOperator *BOp =
1733 isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
1734 if (!BOp)
1735 continue;
1736
1737 if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
1738 // The factorized operand may occur several times. Convert them all in
1739 // one fell swoop.
1740 for (unsigned j = Ops.size(); j != i;) {
1741 --j;
1742 if (Ops[j].Op == Ops[i].Op) {
1743 NewMulOps.push_back(V);
1744 Ops.erase(Ops.begin()+j);
1745 }
1746 }
1747 --i;
1748 }
1749 }
1750
1751 // No need for extra uses anymore.
1752 DummyInst->deleteValue();
1753
1754 unsigned NumAddedValues = NewMulOps.size();
1755 Value *V = EmitAddTreeOfValues(I, NewMulOps);
1756
1757 // Now that we have inserted the add tree, optimize it. This allows us to
1758 // handle cases that require multiple factoring steps, such as this:
1759 // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C))
1760 assert(NumAddedValues > 1 && "Each occurrence should contribute a value");
1761 (void)NumAddedValues;
1762 if (Instruction *VI = dyn_cast<Instruction>(V))
1763 RedoInsts.insert(VI);
1764
1765 // Create the multiply.
1766 Instruction *V2 = CreateMul(V, MaxOccVal, "reass.mul", I, I);
1767
1768 // Rerun associate on the multiply in case the inner expression turned into
1769 // a multiply. We want to make sure that we keep things in canonical form.
1770 RedoInsts.insert(V2);
1771
1772 // If every add operand included the factor (e.g. "A*B + A*C"), then the
1773 // entire result expression is just the multiply "A*(B+C)".
1774 if (Ops.empty())
1775 return V2;
1776
1777 // Otherwise, we had some input that didn't have the factor, such as
1778 // "A*B + A*C + D" -> "A*(B+C) + D". Add the new multiply to the list of
1779 // things being added by this operation.
1780 Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
1781 }
1782
1783 return nullptr;
1784}
1785
1786/// Build up a vector of value/power pairs factoring a product.
1787///
1788/// Given a series of multiplication operands, build a vector of factors and
1789/// the powers each is raised to when forming the final product. Sort them in
1790/// the order of descending power.
1791///
1792/// (x*x) -> [(x, 2)]
1793/// ((x*x)*x) -> [(x, 3)]
1794/// ((((x*y)*x)*y)*x) -> [(x, 3), (y, 2)]
1795///
1796/// \returns Whether any factors have a power greater than one.
1798 SmallVectorImpl<Factor> &Factors) {
1799 // FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this.
1800 // Compute the sum of powers of simplifiable factors.
1801 unsigned FactorPowerSum = 0;
1802 for (unsigned Idx = 1, Size = Ops.size(); Idx < Size; ++Idx) {
1803 Value *Op = Ops[Idx-1].Op;
1804
1805 // Count the number of occurrences of this value.
1806 unsigned Count = 1;
1807 for (; Idx < Size && Ops[Idx].Op == Op; ++Idx)
1808 ++Count;
1809 // Track for simplification all factors which occur 2 or more times.
1810 if (Count > 1)
1811 FactorPowerSum += Count;
1812 }
1813
1814 // We can only simplify factors if the sum of the powers of our simplifiable
1815 // factors is 4 or higher. When that is the case, we will *always* have
1816 // a simplification. This is an important invariant to prevent cyclicly
1817 // trying to simplify already minimal formations.
1818 if (FactorPowerSum < 4)
1819 return false;
1820
1821 // Now gather the simplifiable factors, removing them from Ops.
1822 FactorPowerSum = 0;
1823 for (unsigned Idx = 1; Idx < Ops.size(); ++Idx) {
1824 Value *Op = Ops[Idx-1].Op;
1825
1826 // Count the number of occurrences of this value.
1827 unsigned Count = 1;
1828 for (; Idx < Ops.size() && Ops[Idx].Op == Op; ++Idx)
1829 ++Count;
1830 if (Count == 1)
1831 continue;
1832 // Move an even number of occurrences to Factors.
1833 Count &= ~1U;
1834 Idx -= Count;
1835 FactorPowerSum += Count;
1836 Factors.push_back(Factor(Op, Count));
1837 Ops.erase(Ops.begin()+Idx, Ops.begin()+Idx+Count);
1838 }
1839
1840 // None of the adjustments above should have reduced the sum of factor powers
1841 // below our mininum of '4'.
1842 assert(FactorPowerSum >= 4);
1843
1844 llvm::stable_sort(Factors, [](const Factor &LHS, const Factor &RHS) {
1845 return LHS.Power > RHS.Power;
1846 });
1847 return true;
1848}
1849
1850/// Build a tree of multiplies, computing the product of Ops.
1853 if (Ops.size() == 1)
1854 return Ops.back();
1855
1856 Value *LHS = Ops.pop_back_val();
1857 do {
1858 if (LHS->getType()->isIntOrIntVectorTy())
1859 LHS = Builder.CreateMul(LHS, Ops.pop_back_val());
1860 else
1861 LHS = Builder.CreateFMul(LHS, Ops.pop_back_val());
1862 } while (!Ops.empty());
1863
1864 return LHS;
1865}
1866
1867/// Build a minimal multiplication DAG for (a^x)*(b^y)*(c^z)*...
1868///
1869/// Given a vector of values raised to various powers, where no two values are
1870/// equal and the powers are sorted in decreasing order, compute the minimal
1871/// DAG of multiplies to compute the final product, and return that product
1872/// value.
1873Value *
1874ReassociatePass::buildMinimalMultiplyDAG(IRBuilderBase &Builder,
1875 SmallVectorImpl<Factor> &Factors) {
1876 assert(Factors[0].Power);
1877 SmallVector<Value *, 4> OuterProduct;
1878 for (unsigned LastIdx = 0, Idx = 1, Size = Factors.size();
1879 Idx < Size && Factors[Idx].Power > 0; ++Idx) {
1880 if (Factors[Idx].Power != Factors[LastIdx].Power) {
1881 LastIdx = Idx;
1882 continue;
1883 }
1884
1885 // We want to multiply across all the factors with the same power so that
1886 // we can raise them to that power as a single entity. Build a mini tree
1887 // for that.
1888 SmallVector<Value *, 4> InnerProduct;
1889 InnerProduct.push_back(Factors[LastIdx].Base);
1890 do {
1891 InnerProduct.push_back(Factors[Idx].Base);
1892 ++Idx;
1893 } while (Idx < Size && Factors[Idx].Power == Factors[LastIdx].Power);
1894
1895 // Reset the base value of the first factor to the new expression tree.
1896 // We'll remove all the factors with the same power in a second pass.
1897 Value *M = Factors[LastIdx].Base = buildMultiplyTree(Builder, InnerProduct);
1898 if (Instruction *MI = dyn_cast<Instruction>(M))
1899 RedoInsts.insert(MI);
1900
1901 LastIdx = Idx;
1902 }
1903 // Unique factors with equal powers -- we've folded them into the first one's
1904 // base.
1905 Factors.erase(std::unique(Factors.begin(), Factors.end(),
1906 [](const Factor &LHS, const Factor &RHS) {
1907 return LHS.Power == RHS.Power;
1908 }),
1909 Factors.end());
1910
1911 // Iteratively collect the base of each factor with an add power into the
1912 // outer product, and halve each power in preparation for squaring the
1913 // expression.
1914 for (Factor &F : Factors) {
1915 if (F.Power & 1)
1916 OuterProduct.push_back(F.Base);
1917 F.Power >>= 1;
1918 }
1919 if (Factors[0].Power) {
1920 Value *SquareRoot = buildMinimalMultiplyDAG(Builder, Factors);
1921 OuterProduct.push_back(SquareRoot);
1922 OuterProduct.push_back(SquareRoot);
1923 }
1924 if (OuterProduct.size() == 1)
1925 return OuterProduct.front();
1926
1927 Value *V = buildMultiplyTree(Builder, OuterProduct);
1928 return V;
1929}
1930
1931Value *ReassociatePass::OptimizeMul(BinaryOperator *I,
1933 // We can only optimize the multiplies when there is a chain of more than
1934 // three, such that a balanced tree might require fewer total multiplies.
1935 if (Ops.size() < 4)
1936 return nullptr;
1937
1938 // Try to turn linear trees of multiplies without other uses of the
1939 // intermediate stages into minimal multiply DAGs with perfect sub-expression
1940 // re-use.
1941 SmallVector<Factor, 4> Factors;
1942 if (!collectMultiplyFactors(Ops, Factors))
1943 return nullptr; // All distinct factors, so nothing left for us to do.
1944
1945 IRBuilder<> Builder(I);
1946 // The reassociate transformation for FP operations is performed only
1947 // if unsafe algebra is permitted by FastMathFlags. Propagate those flags
1948 // to the newly generated operations.
1949 if (auto FPI = dyn_cast<FPMathOperator>(I))
1950 Builder.setFastMathFlags(FPI->getFastMathFlags());
1951
1952 Value *V = buildMinimalMultiplyDAG(Builder, Factors);
1953 if (Ops.empty())
1954 return V;
1955
1956 ValueEntry NewEntry = ValueEntry(getRank(V), V);
1957 Ops.insert(llvm::lower_bound(Ops, NewEntry), NewEntry);
1958 return nullptr;
1959}
1960
1961Value *ReassociatePass::OptimizeExpression(BinaryOperator *I,
1963 // Now that we have the linearized expression tree, try to optimize it.
1964 // Start by folding any constants that we found.
1965 const DataLayout &DL = I->getModule()->getDataLayout();
1966 Constant *Cst = nullptr;
1967 unsigned Opcode = I->getOpcode();
1968 while (!Ops.empty()) {
1969 if (auto *C = dyn_cast<Constant>(Ops.back().Op)) {
1970 if (!Cst) {
1971 Ops.pop_back();
1972 Cst = C;
1973 continue;
1974 }
1975 if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, C, Cst, DL)) {
1976 Ops.pop_back();
1977 Cst = Res;
1978 continue;
1979 }
1980 }
1981 break;
1982 }
1983 // If there was nothing but constants then we are done.
1984 if (Ops.empty())
1985 return Cst;
1986
1987 // Put the combined constant back at the end of the operand list, except if
1988 // there is no point. For example, an add of 0 gets dropped here, while a
1989 // multiplication by zero turns the whole expression into zero.
1990 if (Cst && Cst != ConstantExpr::getBinOpIdentity(Opcode, I->getType())) {
1991 if (Cst == ConstantExpr::getBinOpAbsorber(Opcode, I->getType()))
1992 return Cst;
1993 Ops.push_back(ValueEntry(0, Cst));
1994 }
1995
1996 if (Ops.size() == 1) return Ops[0].Op;
1997
1998 // Handle destructive annihilation due to identities between elements in the
1999 // argument list here.
2000 unsigned NumOps = Ops.size();
2001 switch (Opcode) {
2002 default: break;
2003 case Instruction::And:
2004 case Instruction::Or:
2005 if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
2006 return Result;
2007 break;
2008
2009 case Instruction::Xor:
2010 if (Value *Result = OptimizeXor(I, Ops))
2011 return Result;
2012 break;
2013
2014 case Instruction::Add:
2015 case Instruction::FAdd:
2016 if (Value *Result = OptimizeAdd(I, Ops))
2017 return Result;
2018 break;
2019
2020 case Instruction::Mul:
2021 case Instruction::FMul:
2022 if (Value *Result = OptimizeMul(I, Ops))
2023 return Result;
2024 break;
2025 }
2026
2027 if (Ops.size() != NumOps)
2028 return OptimizeExpression(I, Ops);
2029 return nullptr;
2030}
2031
2032// Remove dead instructions and if any operands are trivially dead add them to
2033// Insts so they will be removed as well.
2034void ReassociatePass::RecursivelyEraseDeadInsts(Instruction *I,
2035 OrderedSet &Insts) {
2036 assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
2037 SmallVector<Value *, 4> Ops(I->operands());
2038 ValueRankMap.erase(I);
2039 Insts.remove(I);
2040 RedoInsts.remove(I);
2042 I->eraseFromParent();
2043 for (auto *Op : Ops)
2044 if (Instruction *OpInst = dyn_cast<Instruction>(Op))
2045 if (OpInst->use_empty())
2046 Insts.insert(OpInst);
2047}
2048
2049/// Zap the given instruction, adding interesting operands to the work list.
2050void ReassociatePass::EraseInst(Instruction *I) {
2051 assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
2052 LLVM_DEBUG(dbgs() << "Erasing dead inst: "; I->dump());
2053
2054 SmallVector<Value *, 8> Ops(I->operands());
2055 // Erase the dead instruction.
2056 ValueRankMap.erase(I);
2057 RedoInsts.remove(I);
2059 I->eraseFromParent();
2060 // Optimize its operands.
2061 SmallPtrSet<Instruction *, 8> Visited; // Detect self-referential nodes.
2062 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2063 if (Instruction *Op = dyn_cast<Instruction>(Ops[i])) {
2064 // If this is a node in an expression tree, climb to the expression root
2065 // and add that since that's where optimization actually happens.
2066 unsigned Opcode = Op->getOpcode();
2067 while (Op->hasOneUse() && Op->user_back()->getOpcode() == Opcode &&
2068 Visited.insert(Op).second)
2069 Op = Op->user_back();
2070
2071 // The instruction we're going to push may be coming from a
2072 // dead block, and Reassociate skips the processing of unreachable
2073 // blocks because it's a waste of time and also because it can
2074 // lead to infinite loop due to LLVM's non-standard definition
2075 // of dominance.
2076 if (ValueRankMap.contains(Op))
2077 RedoInsts.insert(Op);
2078 }
2079
2080 MadeChange = true;
2081}
2082
2083/// Recursively analyze an expression to build a list of instructions that have
2084/// negative floating-point constant operands. The caller can then transform
2085/// the list to create positive constants for better reassociation and CSE.
2087 SmallVectorImpl<Instruction *> &Candidates) {
2088 // Handle only one-use instructions. Combining negations does not justify
2089 // replicating instructions.
2090 Instruction *I;
2091 if (!match(V, m_OneUse(m_Instruction(I))))
2092 return;
2093
2094 // Handle expressions of multiplications and divisions.
2095 // TODO: This could look through floating-point casts.
2096 const APFloat *C;
2097 switch (I->getOpcode()) {
2098 case Instruction::FMul:
2099 // Not expecting non-canonical code here. Bail out and wait.
2100 if (match(I->getOperand(0), m_Constant()))
2101 break;
2102
2103 if (match(I->getOperand(1), m_APFloat(C)) && C->isNegative()) {
2104 Candidates.push_back(I);
2105 LLVM_DEBUG(dbgs() << "FMul with negative constant: " << *I << '\n');
2106 }
2107 getNegatibleInsts(I->getOperand(0), Candidates);
2108 getNegatibleInsts(I->getOperand(1), Candidates);
2109 break;
2110 case Instruction::FDiv:
2111 // Not expecting non-canonical code here. Bail out and wait.
2112 if (match(I->getOperand(0), m_Constant()) &&
2113 match(I->getOperand(1), m_Constant()))
2114 break;
2115
2116 if ((match(I->getOperand(0), m_APFloat(C)) && C->isNegative()) ||
2117 (match(I->getOperand(1), m_APFloat(C)) && C->isNegative())) {
2118 Candidates.push_back(I);
2119 LLVM_DEBUG(dbgs() << "FDiv with negative constant: " << *I << '\n');
2120 }
2121 getNegatibleInsts(I->getOperand(0), Candidates);
2122 getNegatibleInsts(I->getOperand(1), Candidates);
2123 break;
2124 default:
2125 break;
2126 }
2127}
2128
2129/// Given an fadd/fsub with an operand that is a one-use instruction
2130/// (the fadd/fsub), try to change negative floating-point constants into
2131/// positive constants to increase potential for reassociation and CSE.
2132Instruction *ReassociatePass::canonicalizeNegFPConstantsForOp(Instruction *I,
2133 Instruction *Op,
2134 Value *OtherOp) {
2135 assert((I->getOpcode() == Instruction::FAdd ||
2136 I->getOpcode() == Instruction::FSub) && "Expected fadd/fsub");
2137
2138 // Collect instructions with negative FP constants from the subtree that ends
2139 // in Op.
2141 getNegatibleInsts(Op, Candidates);
2142 if (Candidates.empty())
2143 return nullptr;
2144
2145 // Don't canonicalize x + (-Constant * y) -> x - (Constant * y), if the
2146 // resulting subtract will be broken up later. This can get us into an
2147 // infinite loop during reassociation.
2148 bool IsFSub = I->getOpcode() == Instruction::FSub;
2149 bool NeedsSubtract = !IsFSub && Candidates.size() % 2 == 1;
2150 if (NeedsSubtract && ShouldBreakUpSubtract(I))
2151 return nullptr;
2152
2153 for (Instruction *Negatible : Candidates) {
2154 const APFloat *C;
2155 if (match(Negatible->getOperand(0), m_APFloat(C))) {
2156 assert(!match(Negatible->getOperand(1), m_Constant()) &&
2157 "Expecting only 1 constant operand");
2158 assert(C->isNegative() && "Expected negative FP constant");
2159 Negatible->setOperand(0, ConstantFP::get(Negatible->getType(), abs(*C)));
2160 MadeChange = true;
2161 }
2162 if (match(Negatible->getOperand(1), m_APFloat(C))) {
2163 assert(!match(Negatible->getOperand(0), m_Constant()) &&
2164 "Expecting only 1 constant operand");
2165 assert(C->isNegative() && "Expected negative FP constant");
2166 Negatible->setOperand(1, ConstantFP::get(Negatible->getType(), abs(*C)));
2167 MadeChange = true;
2168 }
2169 }
2170 assert(MadeChange == true && "Negative constant candidate was not changed");
2171
2172 // Negations cancelled out.
2173 if (Candidates.size() % 2 == 0)
2174 return I;
2175
2176 // Negate the final operand in the expression by flipping the opcode of this
2177 // fadd/fsub.
2178 assert(Candidates.size() % 2 == 1 && "Expected odd number");
2179 IRBuilder<> Builder(I);
2180 Value *NewInst = IsFSub ? Builder.CreateFAddFMF(OtherOp, Op, I)
2181 : Builder.CreateFSubFMF(OtherOp, Op, I);
2182 I->replaceAllUsesWith(NewInst);
2183 RedoInsts.insert(I);
2184 return dyn_cast<Instruction>(NewInst);
2185}
2186
2187/// Canonicalize expressions that contain a negative floating-point constant
2188/// of the following form:
2189/// OtherOp + (subtree) -> OtherOp {+/-} (canonical subtree)
2190/// (subtree) + OtherOp -> OtherOp {+/-} (canonical subtree)
2191/// OtherOp - (subtree) -> OtherOp {+/-} (canonical subtree)
2192///
2193/// The fadd/fsub opcode may be switched to allow folding a negation into the
2194/// input instruction.
2195Instruction *ReassociatePass::canonicalizeNegFPConstants(Instruction *I) {
2196 LLVM_DEBUG(dbgs() << "Combine negations for: " << *I << '\n');
2197 Value *X;
2198 Instruction *Op;
2200 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2201 I = R;
2203 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2204 I = R;
2206 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2207 I = R;
2208 return I;
2209}
2210
2211/// Inspect and optimize the given instruction. Note that erasing
2212/// instructions is not allowed.
2213void ReassociatePass::OptimizeInst(Instruction *I) {
2214 // Only consider operations that we understand.
2215 if (!isa<UnaryOperator>(I) && !isa<BinaryOperator>(I))
2216 return;
2217
2218 if (I->getOpcode() == Instruction::Shl && isa<ConstantInt>(I->getOperand(1)))
2219 // If an operand of this shift is a reassociable multiply, or if the shift
2220 // is used by a reassociable multiply or add, turn into a multiply.
2221 if (isReassociableOp(I->getOperand(0), Instruction::Mul) ||
2222 (I->hasOneUse() &&
2223 (isReassociableOp(I->user_back(), Instruction::Mul) ||
2224 isReassociableOp(I->user_back(), Instruction::Add)))) {
2226 RedoInsts.insert(I);
2227 MadeChange = true;
2228 I = NI;
2229 }
2230
2231 // Commute binary operators, to canonicalize the order of their operands.
2232 // This can potentially expose more CSE opportunities, and makes writing other
2233 // transformations simpler.
2234 if (I->isCommutative())
2235 canonicalizeOperands(I);
2236
2237 // Canonicalize negative constants out of expressions.
2238 if (Instruction *Res = canonicalizeNegFPConstants(I))
2239 I = Res;
2240
2241 // Don't optimize floating-point instructions unless they have the
2242 // appropriate FastMathFlags for reassociation enabled.
2243 if (isa<FPMathOperator>(I) && !hasFPAssociativeFlags(I))
2244 return;
2245
2246 // Do not reassociate boolean (i1) expressions. We want to preserve the
2247 // original order of evaluation for short-circuited comparisons that
2248 // SimplifyCFG has folded to AND/OR expressions. If the expression
2249 // is not further optimized, it is likely to be transformed back to a
2250 // short-circuited form for code gen, and the source order may have been
2251 // optimized for the most likely conditions.
2252 if (I->getType()->isIntegerTy(1))
2253 return;
2254
2255 // If this is a bitwise or instruction of operands
2256 // with no common bits set, convert it to X+Y.
2257 if (I->getOpcode() == Instruction::Or &&
2259 haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1),
2260 SimplifyQuery(I->getModule()->getDataLayout(),
2261 /*DT=*/nullptr, /*AC=*/nullptr, I))) {
2263 RedoInsts.insert(I);
2264 MadeChange = true;
2265 I = NI;
2266 }
2267
2268 // If this is a subtract instruction which is not already in negate form,
2269 // see if we can convert it to X+-Y.
2270 if (I->getOpcode() == Instruction::Sub) {
2271 if (ShouldBreakUpSubtract(I)) {
2272 Instruction *NI = BreakUpSubtract(I, RedoInsts);
2273 RedoInsts.insert(I);
2274 MadeChange = true;
2275 I = NI;
2276 } else if (match(I, m_Neg(m_Value()))) {
2277 // Otherwise, this is a negation. See if the operand is a multiply tree
2278 // and if this is not an inner node of a multiply tree.
2279 if (isReassociableOp(I->getOperand(1), Instruction::Mul) &&
2280 (!I->hasOneUse() ||
2281 !isReassociableOp(I->user_back(), Instruction::Mul))) {
2283 // If the negate was simplified, revisit the users to see if we can
2284 // reassociate further.
2285 for (User *U : NI->users()) {
2286 if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
2287 RedoInsts.insert(Tmp);
2288 }
2289 RedoInsts.insert(I);
2290 MadeChange = true;
2291 I = NI;
2292 }
2293 }
2294 } else if (I->getOpcode() == Instruction::FNeg ||
2295 I->getOpcode() == Instruction::FSub) {
2296 if (ShouldBreakUpSubtract(I)) {
2297 Instruction *NI = BreakUpSubtract(I, RedoInsts);
2298 RedoInsts.insert(I);
2299 MadeChange = true;
2300 I = NI;
2301 } else if (match(I, m_FNeg(m_Value()))) {
2302 // Otherwise, this is a negation. See if the operand is a multiply tree
2303 // and if this is not an inner node of a multiply tree.
2304 Value *Op = isa<BinaryOperator>(I) ? I->getOperand(1) :
2305 I->getOperand(0);
2306 if (isReassociableOp(Op, Instruction::FMul) &&
2307 (!I->hasOneUse() ||
2308 !isReassociableOp(I->user_back(), Instruction::FMul))) {
2309 // If the negate was simplified, revisit the users to see if we can
2310 // reassociate further.
2312 for (User *U : NI->users()) {
2313 if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
2314 RedoInsts.insert(Tmp);
2315 }
2316 RedoInsts.insert(I);
2317 MadeChange = true;
2318 I = NI;
2319 }
2320 }
2321 }
2322
2323 // If this instruction is an associative binary operator, process it.
2324 if (!I->isAssociative()) return;
2325 BinaryOperator *BO = cast<BinaryOperator>(I);
2326
2327 // If this is an interior node of a reassociable tree, ignore it until we
2328 // get to the root of the tree, to avoid N^2 analysis.
2329 unsigned Opcode = BO->getOpcode();
2330 if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) {
2331 // During the initial run we will get to the root of the tree.
2332 // But if we get here while we are redoing instructions, there is no
2333 // guarantee that the root will be visited. So Redo later
2334 if (BO->user_back() != BO &&
2335 BO->getParent() == BO->user_back()->getParent())
2336 RedoInsts.insert(BO->user_back());
2337 return;
2338 }
2339
2340 // If this is an add tree that is used by a sub instruction, ignore it
2341 // until we process the subtract.
2342 if (BO->hasOneUse() && BO->getOpcode() == Instruction::Add &&
2343 cast<Instruction>(BO->user_back())->getOpcode() == Instruction::Sub)
2344 return;
2345 if (BO->hasOneUse() && BO->getOpcode() == Instruction::FAdd &&
2346 cast<Instruction>(BO->user_back())->getOpcode() == Instruction::FSub)
2347 return;
2348
2349 ReassociateExpression(BO);
2350}
2351
2352void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
2353 // First, walk the expression tree, linearizing the tree, collecting the
2354 // operand information.
2356 MadeChange |= LinearizeExprTree(I, Tree, RedoInsts);
2358 Ops.reserve(Tree.size());
2359 for (const RepeatedValue &E : Tree)
2360 Ops.append(E.second.getZExtValue(), ValueEntry(getRank(E.first), E.first));
2361
2362 LLVM_DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n');
2363
2364 // Now that we have linearized the tree to a list and have gathered all of
2365 // the operands and their ranks, sort the operands by their rank. Use a
2366 // stable_sort so that values with equal ranks will have their relative
2367 // positions maintained (and so the compiler is deterministic). Note that
2368 // this sorts so that the highest ranking values end up at the beginning of
2369 // the vector.
2370 llvm::stable_sort(Ops);
2371
2372 // Now that we have the expression tree in a convenient
2373 // sorted form, optimize it globally if possible.
2374 if (Value *V = OptimizeExpression(I, Ops)) {
2375 if (V == I)
2376 // Self-referential expression in unreachable code.
2377 return;
2378 // This expression tree simplified to something that isn't a tree,
2379 // eliminate it.
2380 LLVM_DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n');
2381 I->replaceAllUsesWith(V);
2382 if (Instruction *VI = dyn_cast<Instruction>(V))
2383 if (I->getDebugLoc())
2384 VI->setDebugLoc(I->getDebugLoc());
2385 RedoInsts.insert(I);
2386 ++NumAnnihil;
2387 return;
2388 }
2389
2390 // We want to sink immediates as deeply as possible except in the case where
2391 // this is a multiply tree used only by an add, and the immediate is a -1.
2392 // In this case we reassociate to put the negation on the outside so that we
2393 // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y
2394 if (I->hasOneUse()) {
2395 if (I->getOpcode() == Instruction::Mul &&
2396 cast<Instruction>(I->user_back())->getOpcode() == Instruction::Add &&
2397 isa<ConstantInt>(Ops.back().Op) &&
2398 cast<ConstantInt>(Ops.back().Op)->isMinusOne()) {
2399 ValueEntry Tmp = Ops.pop_back_val();
2400 Ops.insert(Ops.begin(), Tmp);
2401 } else if (I->getOpcode() == Instruction::FMul &&
2402 cast<Instruction>(I->user_back())->getOpcode() ==
2403 Instruction::FAdd &&
2404 isa<ConstantFP>(Ops.back().Op) &&
2405 cast<ConstantFP>(Ops.back().Op)->isExactlyValue(-1.0)) {
2406 ValueEntry Tmp = Ops.pop_back_val();
2407 Ops.insert(Ops.begin(), Tmp);
2408 }
2409 }
2410
2411 LLVM_DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n');
2412
2413 if (Ops.size() == 1) {
2414 if (Ops[0].Op == I)
2415 // Self-referential expression in unreachable code.
2416 return;
2417
2418 // This expression tree simplified to something that isn't a tree,
2419 // eliminate it.
2420 I->replaceAllUsesWith(Ops[0].Op);
2421 if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op))
2422 OI->setDebugLoc(I->getDebugLoc());
2423 RedoInsts.insert(I);
2424 return;
2425 }
2426
2427 if (Ops.size() > 2 && Ops.size() <= GlobalReassociateLimit) {
2428 // Find the pair with the highest count in the pairmap and move it to the
2429 // back of the list so that it can later be CSE'd.
2430 // example:
2431 // a*b*c*d*e
2432 // if c*e is the most "popular" pair, we can express this as
2433 // (((c*e)*d)*b)*a
2434 unsigned Max = 1;
2435 unsigned BestRank = 0;
2436 std::pair<unsigned, unsigned> BestPair;
2437 unsigned Idx = I->getOpcode() - Instruction::BinaryOpsBegin;
2438 unsigned LimitIdx = 0;
2439 // With the CSE-driven heuristic, we are about to slap two values at the
2440 // beginning of the expression whereas they could live very late in the CFG.
2441 // When using the CSE-local heuristic we avoid creating dependences from
2442 // completely unrelated part of the CFG by limiting the expression
2443 // reordering on the values that live in the first seen basic block.
2444 // The main idea is that we want to avoid forming expressions that would
2445 // become loop dependent.
2446 if (UseCSELocalOpt) {
2447 const BasicBlock *FirstSeenBB = nullptr;
2448 int StartIdx = Ops.size() - 1;
2449 // Skip the first value of the expression since we need at least two
2450 // values to materialize an expression. I.e., even if this value is
2451 // anchored in a different basic block, the actual first sub expression
2452 // will be anchored on the second value.
2453 for (int i = StartIdx - 1; i != -1; --i) {
2454 const Value *Val = Ops[i].Op;
2455 const auto *CurrLeafInstr = dyn_cast<Instruction>(Val);
2456 const BasicBlock *SeenBB = nullptr;
2457 if (!CurrLeafInstr) {
2458 // The value is free of any CFG dependencies.
2459 // Do as if it lives in the entry block.
2460 //
2461 // We do this to make sure all the values falling on this path are
2462 // seen through the same anchor point. The rationale is these values
2463 // can be combined together to from a sub expression free of any CFG
2464 // dependencies so we want them to stay together.
2465 // We could be cleverer and postpone the anchor down to the first
2466 // anchored value, but that's likely complicated to get right.
2467 // E.g., we wouldn't want to do that if that means being stuck in a
2468 // loop.
2469 //
2470 // For instance, we wouldn't want to change:
2471 // res = arg1 op arg2 op arg3 op ... op loop_val1 op loop_val2 ...
2472 // into
2473 // res = loop_val1 op arg1 op arg2 op arg3 op ... op loop_val2 ...
2474 // Because all the sub expressions with arg2..N would be stuck between
2475 // two loop dependent values.
2476 SeenBB = &I->getParent()->getParent()->getEntryBlock();
2477 } else {
2478 SeenBB = CurrLeafInstr->getParent();
2479 }
2480
2481 if (!FirstSeenBB) {
2482 FirstSeenBB = SeenBB;
2483 continue;
2484 }
2485 if (FirstSeenBB != SeenBB) {
2486 // ith value is in a different basic block.
2487 // Rewind the index once to point to the last value on the same basic
2488 // block.
2489 LimitIdx = i + 1;
2490 LLVM_DEBUG(dbgs() << "CSE reordering: Consider values between ["
2491 << LimitIdx << ", " << StartIdx << "]\n");
2492 break;
2493 }
2494 }
2495 }
2496 for (unsigned i = Ops.size() - 1; i > LimitIdx; --i) {
2497 // We must use int type to go below zero when LimitIdx is 0.
2498 for (int j = i - 1; j >= (int)LimitIdx; --j) {
2499 unsigned Score = 0;
2500 Value *Op0 = Ops[i].Op;
2501 Value *Op1 = Ops[j].Op;
2502 if (std::less<Value *>()(Op1, Op0))
2503 std::swap(Op0, Op1);
2504 auto it = PairMap[Idx].find({Op0, Op1});
2505 if (it != PairMap[Idx].end()) {
2506 // Functions like BreakUpSubtract() can erase the Values we're using
2507 // as keys and create new Values after we built the PairMap. There's a
2508 // small chance that the new nodes can have the same address as
2509 // something already in the table. We shouldn't accumulate the stored
2510 // score in that case as it refers to the wrong Value.
2511 if (it->second.isValid())
2512 Score += it->second.Score;
2513 }
2514
2515 unsigned MaxRank = std::max(Ops[i].Rank, Ops[j].Rank);
2516
2517 // By construction, the operands are sorted in reverse order of their
2518 // topological order.
2519 // So we tend to form (sub) expressions with values that are close to
2520 // each other.
2521 //
2522 // Now to expose more CSE opportunities we want to expose the pair of
2523 // operands that occur the most (as statically computed in
2524 // BuildPairMap.) as the first sub-expression.
2525 //
2526 // If two pairs occur as many times, we pick the one with the
2527 // lowest rank, meaning the one with both operands appearing first in
2528 // the topological order.
2529 if (Score > Max || (Score == Max && MaxRank < BestRank)) {
2530 BestPair = {j, i};
2531 Max = Score;
2532 BestRank = MaxRank;
2533 }
2534 }
2535 }
2536 if (Max > 1) {
2537 auto Op0 = Ops[BestPair.first];
2538 auto Op1 = Ops[BestPair.second];
2539 Ops.erase(&Ops[BestPair.second]);
2540 Ops.erase(&Ops[BestPair.first]);
2541 Ops.push_back(Op0);
2542 Ops.push_back(Op1);
2543 }
2544 }
2545 LLVM_DEBUG(dbgs() << "RAOut after CSE reorder:\t"; PrintOps(I, Ops);
2546 dbgs() << '\n');
2547 // Now that we ordered and optimized the expressions, splat them back into
2548 // the expression tree, removing any unneeded nodes.
2549 RewriteExprTree(I, Ops);
2550}
2551
2552void
2553ReassociatePass::BuildPairMap(ReversePostOrderTraversal<Function *> &RPOT) {
2554 // Make a "pairmap" of how often each operand pair occurs.
2555 for (BasicBlock *BI : RPOT) {
2556 for (Instruction &I : *BI) {
2557 if (!I.isAssociative())
2558 continue;
2559
2560 // Ignore nodes that aren't at the root of trees.
2561 if (I.hasOneUse() && I.user_back()->getOpcode() == I.getOpcode())
2562 continue;
2563
2564 // Collect all operands in a single reassociable expression.
2565 // Since Reassociate has already been run once, we can assume things
2566 // are already canonical according to Reassociation's regime.
2567 SmallVector<Value *, 8> Worklist = { I.getOperand(0), I.getOperand(1) };
2569 while (!Worklist.empty() && Ops.size() <= GlobalReassociateLimit) {
2570 Value *Op = Worklist.pop_back_val();
2571 Instruction *OpI = dyn_cast<Instruction>(Op);
2572 if (!OpI || OpI->getOpcode() != I.getOpcode() || !OpI->hasOneUse()) {
2573 Ops.push_back(Op);
2574 continue;
2575 }
2576 // Be paranoid about self-referencing expressions in unreachable code.
2577 if (OpI->getOperand(0) != OpI)
2578 Worklist.push_back(OpI->getOperand(0));
2579 if (OpI->getOperand(1) != OpI)
2580 Worklist.push_back(OpI->getOperand(1));
2581 }
2582 // Skip extremely long expressions.
2583 if (Ops.size() > GlobalReassociateLimit)
2584 continue;
2585
2586 // Add all pairwise combinations of operands to the pair map.
2587 unsigned BinaryIdx = I.getOpcode() - Instruction::BinaryOpsBegin;
2589 for (unsigned i = 0; i < Ops.size() - 1; ++i) {
2590 for (unsigned j = i + 1; j < Ops.size(); ++j) {
2591 // Canonicalize operand orderings.
2592 Value *Op0 = Ops[i];
2593 Value *Op1 = Ops[j];
2594 if (std::less<Value *>()(Op1, Op0))
2595 std::swap(Op0, Op1);
2596 if (!Visited.insert({Op0, Op1}).second)
2597 continue;
2598 auto res = PairMap[BinaryIdx].insert({{Op0, Op1}, {Op0, Op1, 1}});
2599 if (!res.second) {
2600 // If either key value has been erased then we've got the same
2601 // address by coincidence. That can't happen here because nothing is
2602 // erasing values but it can happen by the time we're querying the
2603 // map.
2604 assert(res.first->second.isValid() && "WeakVH invalidated");
2605 ++res.first->second.Score;
2606 }
2607 }
2608 }
2609 }
2610 }
2611}
2612
2614 // Get the functions basic blocks in Reverse Post Order. This order is used by
2615 // BuildRankMap to pre calculate ranks correctly. It also excludes dead basic
2616 // blocks (it has been seen that the analysis in this pass could hang when
2617 // analysing dead basic blocks).
2619
2620 // Calculate the rank map for F.
2621 BuildRankMap(F, RPOT);
2622
2623 // Build the pair map before running reassociate.
2624 // Technically this would be more accurate if we did it after one round
2625 // of reassociation, but in practice it doesn't seem to help much on
2626 // real-world code, so don't waste the compile time running reassociate
2627 // twice.
2628 // If a user wants, they could expicitly run reassociate twice in their
2629 // pass pipeline for further potential gains.
2630 // It might also be possible to update the pair map during runtime, but the
2631 // overhead of that may be large if there's many reassociable chains.
2632 BuildPairMap(RPOT);
2633
2634 MadeChange = false;
2635
2636 // Traverse the same blocks that were analysed by BuildRankMap.
2637 for (BasicBlock *BI : RPOT) {
2638 assert(RankMap.count(&*BI) && "BB should be ranked.");
2639 // Optimize every instruction in the basic block.
2640 for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;)
2641 if (isInstructionTriviallyDead(&*II)) {
2642 EraseInst(&*II++);
2643 } else {
2644 OptimizeInst(&*II);
2645 assert(II->getParent() == &*BI && "Moved to a different block!");
2646 ++II;
2647 }
2648
2649 // Make a copy of all the instructions to be redone so we can remove dead
2650 // instructions.
2651 OrderedSet ToRedo(RedoInsts);
2652 // Iterate over all instructions to be reevaluated and remove trivially dead
2653 // instructions. If any operand of the trivially dead instruction becomes
2654 // dead mark it for deletion as well. Continue this process until all
2655 // trivially dead instructions have been removed.
2656 while (!ToRedo.empty()) {
2657 Instruction *I = ToRedo.pop_back_val();
2659 RecursivelyEraseDeadInsts(I, ToRedo);
2660 MadeChange = true;
2661 }
2662 }
2663
2664 // Now that we have removed dead instructions, we can reoptimize the
2665 // remaining instructions.
2666 while (!RedoInsts.empty()) {
2667 Instruction *I = RedoInsts.front();
2668 RedoInsts.erase(RedoInsts.begin());
2670 EraseInst(I);
2671 else
2672 OptimizeInst(I);
2673 }
2674 }
2675
2676 // We are done with the rank map and pair map.
2677 RankMap.clear();
2678 ValueRankMap.clear();
2679 for (auto &Entry : PairMap)
2680 Entry.clear();
2681
2682 if (MadeChange) {
2685 return PA;
2686 }
2687
2688 return PreservedAnalyses::all();
2689}
2690
2691namespace {
2692
2693 class ReassociateLegacyPass : public FunctionPass {
2694 ReassociatePass Impl;
2695
2696 public:
2697 static char ID; // Pass identification, replacement for typeid
2698
2699 ReassociateLegacyPass() : FunctionPass(ID) {
2701 }
2702
2703 bool runOnFunction(Function &F) override {
2704 if (skipFunction(F))
2705 return false;
2706
2707 FunctionAnalysisManager DummyFAM;
2708 auto PA = Impl.run(F, DummyFAM);
2709 return !PA.areAllPreserved();
2710 }
2711
2712 void getAnalysisUsage(AnalysisUsage &AU) const override {
2713 AU.setPreservesCFG();
2717 }
2718 };
2719
2720} // end anonymous namespace
2721
2722char ReassociateLegacyPass::ID = 0;
2723
2724INITIALIZE_PASS(ReassociateLegacyPass, "reassociate",
2725 "Reassociate expressions", false, false)
2726
2727// Public interface to the Reassociate pass
2729 return new ReassociateLegacyPass();
2730}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
std::string Name
uint64_t Size
static bool runOnFunction(Function &F, bool PostInlining)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
IRTranslator LLVM IR MI
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, ScalarEvolution *SE, LoopInfo *LI)
isInteresting - Test whether the given expression is "interesting" when used by the given expression,...
Definition: IVUsers.cpp:56
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
nary reassociate
#define P(N)
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static void PrintOps(Instruction *I, const SmallVectorImpl< ValueEntry > &Ops)
Print out the expression identified in the Ops list.
Definition: Reassociate.cpp:82
static bool ShouldBreakUpSubtract(Instruction *Sub)
Return true if we should break up this subtract of X-Y into (X + -Y).
static Value * buildMultiplyTree(IRBuilderBase &Builder, SmallVectorImpl< Value * > &Ops)
Build a tree of multiplies, computing the product of Ops.
static void getNegatibleInsts(Value *V, SmallVectorImpl< Instruction * > &Candidates)
Recursively analyze an expression to build a list of instructions that have negative floating-point c...
static unsigned CarmichaelShift(unsigned Bitwidth)
Returns k such that lambda(2^Bitwidth) = 2^k, where lambda is the Carmichael function.
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
static BinaryOperator * BreakUpSubtract(Instruction *Sub, ReassociatePass::OrderedSet &ToRedo)
If we have (X-Y), and if either X is an add, or if this is only used by an add, transform this into (...
static void FindSingleUseMultiplyFactors(Value *V, SmallVectorImpl< Value * > &Factors)
If V is a single-use multiply, recursively add its operands as factors, otherwise add V to the list o...
static Value * OptimizeAndOrXor(unsigned Opcode, SmallVectorImpl< ValueEntry > &Ops)
Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
static BinaryOperator * convertOrWithNoCommonBitsToAdd(Instruction *Or)
If we have (X|Y), and iff X and Y have no common bits set, transform this into (X+Y) to allow arithme...
static Instruction * CreateNeg(Value *S1, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
static bool collectMultiplyFactors(SmallVectorImpl< ValueEntry > &Ops, SmallVectorImpl< Factor > &Factors)
Build up a vector of value/power pairs factoring a product.
static BinaryOperator * ConvertShiftToMul(Instruction *Shl)
If this is a shift of a reassociable multiply or is used by one, change this into a multiply by a con...
static cl::opt< bool > UseCSELocalOpt(DEBUG_TYPE "-use-cse-local", cl::desc("Only reorder expressions within a basic block " "when exposing CSE opportunities"), cl::init(true), cl::Hidden)
static unsigned FindInOperandList(const SmallVectorImpl< ValueEntry > &Ops, unsigned i, Value *X)
Scan backwards and forwards among values with the same rank as element i to see if X exists.
static BinaryOperator * LowerNegateToMultiply(Instruction *Neg)
Replace 0-X with X*-1.
static BinaryOperator * isReassociableOp(Value *V, unsigned Opcode)
Return true if V is an instruction of the specified opcode and if it only has one use.
static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode)
Add the extra weight 'RHS' to the existing weight 'LHS', reducing the combined weight using any speci...
static bool LinearizeExprTree(Instruction *I, SmallVectorImpl< RepeatedValue > &Ops, ReassociatePass::OrderedSet &ToRedo)
Given an associative binary expression, return the leaf nodes in Ops along with their weights (how ma...
std::pair< Value *, APInt > RepeatedValue
static bool hasFPAssociativeFlags(Instruction *I)
Return true if I is an instruction with the FastMathFlags that are needed for general reassociation s...
static Value * NegateValue(Value *V, Instruction *BI, ReassociatePass::OrderedSet &ToRedo)
Insert instructions before the instruction pointed to by BI, that computes the negative version of th...
static bool shouldConvertOrWithNoCommonBitsToAdd(Instruction *Or)
Return true if it may be profitable to convert this (X|Y) into (X+Y).
static Value * createAndInstr(Instruction *InsertBefore, Value *Opnd, const APInt &ConstOpnd)
Helper function of CombineXorOpnd().
#define DEBUG_TYPE
Definition: Reassociate.cpp:68
static BinaryOperator * CreateMul(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
static bool isLoadCombineCandidate(Instruction *Or)
static Value * EmitAddTreeOfValues(Instruction *I, SmallVectorImpl< WeakTrackingVH > &Ops)
Emit a tree of add instructions, summing Ops together and returning the result.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
Value * RHS
Value * LHS
static constexpr uint32_t Opcode
Definition: aarch32.h:200
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition: APInt.h:76
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
bool isMinValue() const
Determine if this is the smallest unsigned value.
Definition: APInt.h:395
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:449
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:649
Represent the analysis usage information of a pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:416
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:173
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", Instruction *InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
BinaryOps getOpcode() const
Definition: InstrTypes.h:391
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:133
static Constant * getBinOpAbsorber(unsigned Opcode, Type *Ty)
Return the absorbing element for the given binary operation, i.e.
Definition: Constants.cpp:2559
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2478
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2514
static Constant * getNeg(Constant *C, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2441
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:260
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:927
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:403
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:356
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
const BasicBlock & getEntryBlock() const
Definition: Function.h:778
Legacy wrapper pass to provide the GlobalsAAResult object.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateFAddFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1541
Value * CreateFSubFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1568
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:305
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1581
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1355
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2639
void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:438
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:71
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
const BasicBlock * getParent() const
Definition: Instruction.h:139
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:136
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:75
bool isNilpotent() const
Return true if the instruction is nilpotent:
Definition: Instruction.h:704
const char * getOpcodeName() const
Definition: Instruction.h:241
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:239
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:435
bool isIdempotent() const
Return true if the instruction is idempotent:
Definition: Instruction.h:690
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:275
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1743
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:172
bool areAllPreserved() const
Test whether all analyses are preserved (and none are abandoned).
Definition: PassManager.h:347
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:178
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:208
Reassociate commutative expressions.
Definition: Reassociate.h:71
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:384
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:451
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
void reserve(size_type N)
Definition: SmallVector.h:667
iterator erase(const_iterator CI)
Definition: SmallVector.h:741
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:809
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:234
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:163
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1724
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
user_iterator user_begin()
Definition: Value.h:397
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
void clearSubclassOptionalData()
Clear the optional flags contained in this value.
Definition: Value.h:544
void deleteValue()
Delete a pointer to a generic Value.
Definition: Value.cpp:110
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
self_iterator getIterator()
Definition: ilist_node.h:109
Utility class representing a non-constant Xor-operand.
Value * getSymbolicPart() const
unsigned getSymbolicRank() const
void setSymbolicRank(unsigned R)
const APInt & getConstPart() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:144
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:724
BinaryOp_match< LHS, RHS, Instruction::FAdd > m_FAdd(const LHS &L, const RHS &R)
Definition: PatternMatch.h:988
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:278
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
apfloat_match m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
Definition: PatternMatch.h:295
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
constexpr double e
Definition: MathExtras.h:31
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:237
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool haveNoCommonBitsSet(const WithCache< const Value * > &LHSCache, const WithCache< const Value * > &RHSCache, const SimplifyQuery &SQ)
Return true if LHS and RHS have no common bits set.
void stable_sort(R &&Range)
Definition: STLExtras.h:1970
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1394
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1381
FunctionPass * createReassociatePass()
void initializeReassociateLegacyPassPass(PassRegistry &)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1733
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:399
Constant * ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op, const DataLayout &DL)
Attempt to constant fold a unary operation with the specified operand.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
bool replaceDbgUsesWithUndef(Instruction *I)
Replace all the uses of an SSA value in @llvm.dbg intrinsics with undef.
Definition: Local.cpp:610
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1945
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool mayHaveNonDefUseDependency(const Instruction &I)
Returns true if the result or effects of the given instructions I depend values not reachable through...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
Utility class representing a base and exponent pair which form one factor of some product.
Definition: Reassociate.h:59