LLVM 23.0.0git
NaryReassociate.h
Go to the documentation of this file.
1//===- NaryReassociate.h - Reassociate n-ary expressions --------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass reassociates n-ary add expressions and eliminates the redundancy
10// exposed by the reassociation.
11//
12// A motivating example:
13//
14// void foo(int a, int b) {
15// bar(a + b);
16// bar((a + 2) + b);
17// }
18//
19// An ideal compiler should reassociate (a + 2) + b to (a + b) + 2 and simplify
20// the above code to
21//
22// int t = a + b;
23// bar(t);
24// bar(t + 2);
25//
26// However, the Reassociate pass is unable to do that because it processes each
27// instruction individually and believes (a + 2) + b is the best form according
28// to its rank system.
29//
30// To address this limitation, NaryReassociate reassociates an expression in a
31// form that reuses existing instructions. As a result, NaryReassociate can
32// reassociate (a + 2) + b in the example to (a + b) + 2 because it detects that
33// (a + b) is computed before.
34//
35// NaryReassociate works as follows. For every instruction in the form of (a +
36// b) + c, it checks whether a + c or b + c is already computed by a dominating
37// instruction. If so, it then reassociates (a + b) + c into (a + c) + b or (b +
38// c) + a and removes the redundancy accordingly. To efficiently look up whether
39// an expression is computed before, we store each instruction seen and its SCEV
40// into an SCEV-to-instruction map.
41//
42// Although the algorithm pattern-matches only ternary additions, it
43// automatically handles many >3-ary expressions by walking through the function
44// in the depth-first order. For example, given
45//
46// (a + c) + d
47// ((a + b) + c) + d
48//
49// NaryReassociate first rewrites (a + b) + c to (a + c) + b, and then rewrites
50// ((a + c) + b) + d into ((a + c) + d) + b.
51//
52// Finally, the above dominator-based algorithm may need to be run multiple
53// iterations before emitting optimal code. One source of this need is that we
54// only split an operand when it is used only once. The above algorithm can
55// eliminate an instruction and decrease the usage count of its operands. As a
56// result, an instruction that previously had multiple uses may become a
57// single-use instruction and thus eligible for split consideration. For
58// example,
59//
60// ac = a + c
61// ab = a + b
62// abc = ab + c
63// ab2 = ab + b
64// ab2c = ab2 + c
65//
66// In the first iteration, we cannot reassociate abc to ac+b because ab is used
67// twice. However, we can reassociate ab2c to abc+b in the first iteration. As a
68// result, ab2 becomes dead and ab will be used only once in the second
69// iteration.
70//
71// Limitations and TODO items:
72//
73// 1) We only considers n-ary adds and muls for now. This should be extended
74// and generalized.
75//
76//===----------------------------------------------------------------------===//
77
78#ifndef LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H
79#define LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H
80
81#include "llvm/ADT/DenseMap.h"
84#include "llvm/IR/PassManager.h"
85#include "llvm/IR/ValueHandle.h"
86
87namespace llvm {
88
89class AssumptionCache;
90class BinaryOperator;
91class DataLayout;
92class DominatorTree;
93class Function;
95class Instruction;
98class Type;
99class Value;
100
101class NaryReassociatePass : public PassInfoMixin<NaryReassociatePass> {
102public:
104
105 // Glue for old PM.
108 TargetTransformInfo *TTI_);
109
110private:
111 // Runs only one iteration of the dominator-based algorithm. See the header
112 // comments for why we need multiple iterations.
113 bool doOneIteration(Function &F);
114
115 // Reassociates I for better CSE.
116 Instruction *tryReassociate(Instruction *I, SCEVUse &OrigSCEV);
117
118 // Reassociate GEP for better CSE.
119 Instruction *tryReassociateGEP(GetElementPtrInst *GEP);
120
121 // Try splitting GEP at the I-th index and see whether either part can be
122 // CSE'ed. This is a helper function for tryReassociateGEP.
123 //
124 // \p IndexedType The element type indexed by GEP's I-th index. This is
125 // equivalent to
126 // GEP->getIndexedType(GEP->getPointerOperand(), 0-th index,
127 // ..., i-th index).
128 GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP,
129 unsigned I, Type *IndexedType);
130
131 // Given GEP's I-th index = LHS + RHS, see whether &Base[..][LHS][..] or
132 // &Base[..][RHS][..] can be CSE'ed and rewrite GEP accordingly.
133 GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP,
134 unsigned I, Value *LHS,
135 Value *RHS, Type *IndexedType);
136
137 // Reassociate binary operators for better CSE.
138 Instruction *tryReassociateBinaryOp(BinaryOperator *I);
139
140 // A helper function for tryReassociateBinaryOp. LHS and RHS are explicitly
141 // passed.
142 Instruction *tryReassociateBinaryOp(Value *LHS, Value *RHS,
144 // Rewrites I to (LHS op RHS) if LHS is computed already.
145 Instruction *tryReassociatedBinaryOp(SCEVUse LHS, Value *RHS,
147
148 // Tries to match Op1 and Op2 by using V.
149 bool matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1, Value *&Op2);
150
151 // Gets SCEV for (LHS op RHS).
152 SCEVUse getBinarySCEV(BinaryOperator *I, SCEVUse LHS, SCEVUse RHS);
153
154 // Returns the closest dominator of \c Dominatee that computes
155 // \c CandidateExpr. Returns null if not found.
156 Instruction *findClosestMatchingDominator(SCEVUse CandidateExpr,
157 Instruction *Dominatee);
158
159 // Try to match \p I as signed/unsigned Min/Max and reassociate it. \p
160 // OrigSCEV is set if \I matches Min/Max regardless whether resassociation is
161 // done or not. If reassociation was successful newly generated instruction is
162 // returned, otherwise nullptr.
163 template <typename PredT>
164 Instruction *matchAndReassociateMinOrMax(Instruction *I, SCEVUse &OrigSCEV);
165
166 // Reassociate Min/Max.
167 template <typename MaxMinT>
168 Value *tryReassociateMinOrMax(Instruction *I, MaxMinT MaxMinMatch, Value *LHS,
169 Value *RHS);
170
171 // GetElementPtrInst implicitly sign-extends an index if the index is shorter
172 // than the pointer size. This function returns whether Index is shorter than
173 // GEP's pointer size, i.e., whether Index needs to be sign-extended in order
174 // to be an index of GEP.
175 bool requiresSignExtension(Value *Index, GetElementPtrInst *GEP);
176
177 AssumptionCache *AC;
178 const DataLayout *DL;
179 DominatorTree *DT;
180 ScalarEvolution *SE;
183
184 // A lookup table quickly telling which instructions compute the given SCEV.
185 // Note that there can be multiple instructions at different locations
186 // computing to the same SCEV, so we map a SCEV to an instruction list. For
187 // example,
188 //
189 // if (p1)
190 // foo(a + b);
191 // if (p2)
192 // bar(a + b);
194};
195
196} // end namespace llvm
197
198#endif // LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H
This file defines the DenseMap class.
Hexagon Common GEP
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file defines the SmallVector class.
Value * RHS
Value * LHS
A cache of @llvm.assume calls within a function.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool runImpl(Function &F, AssumptionCache *AC_, DominatorTree *DT_, ScalarEvolution *SE_, TargetLibraryInfo *TLI_, TargetTransformInfo *TTI_)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
The main scalar evolution driver.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM Value Representation.
Definition Value.h:75
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:70