LLVM  14.0.0git
SeparateConstOffsetFromGEP.cpp
Go to the documentation of this file.
1 //===- SeparateConstOffsetFromGEP.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Loop unrolling may create many similar GEPs for array accesses.
10 // e.g., a 2-level loop
11 //
12 // float a[32][32]; // global variable
13 //
14 // for (int i = 0; i < 2; ++i) {
15 // for (int j = 0; j < 2; ++j) {
16 // ...
17 // ... = a[x + i][y + j];
18 // ...
19 // }
20 // }
21 //
22 // will probably be unrolled to:
23 //
24 // gep %a, 0, %x, %y; load
25 // gep %a, 0, %x, %y + 1; load
26 // gep %a, 0, %x + 1, %y; load
27 // gep %a, 0, %x + 1, %y + 1; load
28 //
29 // LLVM's GVN does not use partial redundancy elimination yet, and is thus
30 // unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs
31 // significant slowdown in targets with limited addressing modes. For instance,
32 // because the PTX target does not support the reg+reg addressing mode, the
33 // NVPTX backend emits PTX code that literally computes the pointer address of
34 // each GEP, wasting tons of registers. It emits the following PTX for the
35 // first load and similar PTX for other loads.
36 //
37 // mov.u32 %r1, %x;
38 // mov.u32 %r2, %y;
39 // mul.wide.u32 %rl2, %r1, 128;
40 // mov.u64 %rl3, a;
41 // add.s64 %rl4, %rl3, %rl2;
42 // mul.wide.u32 %rl5, %r2, 4;
43 // add.s64 %rl6, %rl4, %rl5;
44 // ld.global.f32 %f1, [%rl6];
45 //
46 // To reduce the register pressure, the optimization implemented in this file
47 // merges the common part of a group of GEPs, so we can compute each pointer
48 // address by adding a simple offset to the common part, saving many registers.
49 //
50 // It works by splitting each GEP into a variadic base and a constant offset.
51 // The variadic base can be computed once and reused by multiple GEPs, and the
52 // constant offsets can be nicely folded into the reg+immediate addressing mode
53 // (supported by most targets) without using any extra register.
54 //
55 // For instance, we transform the four GEPs and four loads in the above example
56 // into:
57 //
58 // base = gep a, 0, x, y
59 // load base
60 // laod base + 1 * sizeof(float)
61 // load base + 32 * sizeof(float)
62 // load base + 33 * sizeof(float)
63 //
64 // Given the transformed IR, a backend that supports the reg+immediate
65 // addressing mode can easily fold the pointer arithmetics into the loads. For
66 // example, the NVPTX backend can easily fold the pointer arithmetics into the
67 // ld.global.f32 instructions, and the resultant PTX uses much fewer registers.
68 //
69 // mov.u32 %r1, %tid.x;
70 // mov.u32 %r2, %tid.y;
71 // mul.wide.u32 %rl2, %r1, 128;
72 // mov.u64 %rl3, a;
73 // add.s64 %rl4, %rl3, %rl2;
74 // mul.wide.u32 %rl5, %r2, 4;
75 // add.s64 %rl6, %rl4, %rl5;
76 // ld.global.f32 %f1, [%rl6]; // so far the same as unoptimized PTX
77 // ld.global.f32 %f2, [%rl6+4]; // much better
78 // ld.global.f32 %f3, [%rl6+128]; // much better
79 // ld.global.f32 %f4, [%rl6+132]; // much better
80 //
81 // Another improvement enabled by the LowerGEP flag is to lower a GEP with
82 // multiple indices to either multiple GEPs with a single index or arithmetic
83 // operations (depending on whether the target uses alias analysis in codegen).
84 // Such transformation can have following benefits:
85 // (1) It can always extract constants in the indices of structure type.
86 // (2) After such Lowering, there are more optimization opportunities such as
87 // CSE, LICM and CGP.
88 //
89 // E.g. The following GEPs have multiple indices:
90 // BB1:
91 // %p = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 3
92 // load %p
93 // ...
94 // BB2:
95 // %p2 = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 2
96 // load %p2
97 // ...
98 //
99 // We can not do CSE to the common part related to index "i64 %i". Lowering
100 // GEPs can achieve such goals.
101 // If the target does not use alias analysis in codegen, this pass will
102 // lower a GEP with multiple indices into arithmetic operations:
103 // BB1:
104 // %1 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
105 // %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
106 // %3 = add i64 %1, %2 ; CSE opportunity
107 // %4 = mul i64 %j1, length_of_struct
108 // %5 = add i64 %3, %4
109 // %6 = add i64 %3, struct_field_3 ; Constant offset
110 // %p = inttoptr i64 %6 to i32*
111 // load %p
112 // ...
113 // BB2:
114 // %7 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
115 // %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
116 // %9 = add i64 %7, %8 ; CSE opportunity
117 // %10 = mul i64 %j2, length_of_struct
118 // %11 = add i64 %9, %10
119 // %12 = add i64 %11, struct_field_2 ; Constant offset
120 // %p = inttoptr i64 %12 to i32*
121 // load %p2
122 // ...
123 //
124 // If the target uses alias analysis in codegen, this pass will lower a GEP
125 // with multiple indices into multiple GEPs with a single index:
126 // BB1:
127 // %1 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
128 // %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
129 // %3 = getelementptr i8* %1, i64 %2 ; CSE opportunity
130 // %4 = mul i64 %j1, length_of_struct
131 // %5 = getelementptr i8* %3, i64 %4
132 // %6 = getelementptr i8* %5, struct_field_3 ; Constant offset
133 // %p = bitcast i8* %6 to i32*
134 // load %p
135 // ...
136 // BB2:
137 // %7 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
138 // %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
139 // %9 = getelementptr i8* %7, i64 %8 ; CSE opportunity
140 // %10 = mul i64 %j2, length_of_struct
141 // %11 = getelementptr i8* %9, i64 %10
142 // %12 = getelementptr i8* %11, struct_field_2 ; Constant offset
143 // %p2 = bitcast i8* %12 to i32*
144 // load %p2
145 // ...
146 //
147 // Lowering GEPs can also benefit other passes such as LICM and CGP.
148 // LICM (Loop Invariant Code Motion) can not hoist/sink a GEP of multiple
149 // indices if one of the index is variant. If we lower such GEP into invariant
150 // parts and variant parts, LICM can hoist/sink those invariant parts.
151 // CGP (CodeGen Prepare) tries to sink address calculations that match the
152 // target's addressing modes. A GEP with multiple indices may not match and will
153 // not be sunk. If we lower such GEP into smaller parts, CGP may sink some of
154 // them. So we end up with a better addressing mode.
155 //
156 //===----------------------------------------------------------------------===//
157 
159 #include "llvm/ADT/APInt.h"
160 #include "llvm/ADT/DenseMap.h"
162 #include "llvm/ADT/SmallVector.h"
163 #include "llvm/Analysis/LoopInfo.h"
169 #include "llvm/IR/BasicBlock.h"
170 #include "llvm/IR/Constant.h"
171 #include "llvm/IR/Constants.h"
172 #include "llvm/IR/DataLayout.h"
173 #include "llvm/IR/DerivedTypes.h"
174 #include "llvm/IR/Dominators.h"
175 #include "llvm/IR/Function.h"
177 #include "llvm/IR/IRBuilder.h"
178 #include "llvm/IR/Instruction.h"
179 #include "llvm/IR/Instructions.h"
180 #include "llvm/IR/Module.h"
181 #include "llvm/IR/PassManager.h"
182 #include "llvm/IR/PatternMatch.h"
183 #include "llvm/IR/Type.h"
184 #include "llvm/IR/User.h"
185 #include "llvm/IR/Value.h"
186 #include "llvm/InitializePasses.h"
187 #include "llvm/Pass.h"
188 #include "llvm/Support/Casting.h"
193 #include "llvm/Transforms/Scalar.h"
195 #include <cassert>
196 #include <cstdint>
197 #include <string>
198 
199 using namespace llvm;
200 using namespace llvm::PatternMatch;
201 
203  "disable-separate-const-offset-from-gep", cl::init(false),
204  cl::desc("Do not separate the constant offset from a GEP instruction"),
205  cl::Hidden);
206 
207 // Setting this flag may emit false positives when the input module already
208 // contains dead instructions. Therefore, we set it only in unit tests that are
209 // free of dead code.
210 static cl::opt<bool>
211  VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false),
212  cl::desc("Verify this pass produces no dead code"),
213  cl::Hidden);
214 
215 namespace {
216 
217 /// A helper class for separating a constant offset from a GEP index.
218 ///
219 /// In real programs, a GEP index may be more complicated than a simple addition
220 /// of something and a constant integer which can be trivially splitted. For
221 /// example, to split ((a << 3) | 5) + b, we need to search deeper for the
222 /// constant offset, so that we can separate the index to (a << 3) + b and 5.
223 ///
224 /// Therefore, this class looks into the expression that computes a given GEP
225 /// index, and tries to find a constant integer that can be hoisted to the
226 /// outermost level of the expression as an addition. Not every constant in an
227 /// expression can jump out. e.g., we cannot transform (b * (a + 5)) to (b * a +
228 /// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case,
229 /// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15).
230 class ConstantOffsetExtractor {
231 public:
232  /// Extracts a constant offset from the given GEP index. It returns the
233  /// new index representing the remainder (equal to the original index minus
234  /// the constant offset), or nullptr if we cannot extract a constant offset.
235  /// \p Idx The given GEP index
236  /// \p GEP The given GEP
237  /// \p UserChainTail Outputs the tail of UserChain so that we can
238  /// garbage-collect unused instructions in UserChain.
239  static Value *Extract(Value *Idx, GetElementPtrInst *GEP,
240  User *&UserChainTail, const DominatorTree *DT);
241 
242  /// Looks for a constant offset from the given GEP index without extracting
243  /// it. It returns the numeric value of the extracted constant offset (0 if
244  /// failed). The meaning of the arguments are the same as Extract.
245  static int64_t Find(Value *Idx, GetElementPtrInst *GEP,
246  const DominatorTree *DT);
247 
248 private:
249  ConstantOffsetExtractor(Instruction *InsertionPt, const DominatorTree *DT)
250  : IP(InsertionPt), DL(InsertionPt->getModule()->getDataLayout()), DT(DT) {
251  }
252 
253  /// Searches the expression that computes V for a non-zero constant C s.t.
254  /// V can be reassociated into the form V' + C. If the searching is
255  /// successful, returns C and update UserChain as a def-use chain from C to V;
256  /// otherwise, UserChain is empty.
257  ///
258  /// \p V The given expression
259  /// \p SignExtended Whether V will be sign-extended in the computation of the
260  /// GEP index
261  /// \p ZeroExtended Whether V will be zero-extended in the computation of the
262  /// GEP index
263  /// \p NonNegative Whether V is guaranteed to be non-negative. For example,
264  /// an index of an inbounds GEP is guaranteed to be
265  /// non-negative. Levaraging this, we can better split
266  /// inbounds GEPs.
267  APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative);
268 
269  /// A helper function to look into both operands of a binary operator.
270  APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended,
271  bool ZeroExtended);
272 
273  /// After finding the constant offset C from the GEP index I, we build a new
274  /// index I' s.t. I' + C = I. This function builds and returns the new
275  /// index I' according to UserChain produced by function "find".
276  ///
277  /// The building conceptually takes two steps:
278  /// 1) iteratively distribute s/zext towards the leaves of the expression tree
279  /// that computes I
280  /// 2) reassociate the expression tree to the form I' + C.
281  ///
282  /// For example, to extract the 5 from sext(a + (b + 5)), we first distribute
283  /// sext to a, b and 5 so that we have
284  /// sext(a) + (sext(b) + 5).
285  /// Then, we reassociate it to
286  /// (sext(a) + sext(b)) + 5.
287  /// Given this form, we know I' is sext(a) + sext(b).
288  Value *rebuildWithoutConstOffset();
289 
290  /// After the first step of rebuilding the GEP index without the constant
291  /// offset, distribute s/zext to the operands of all operators in UserChain.
292  /// e.g., zext(sext(a + (b + 5)) (assuming no overflow) =>
293  /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))).
294  ///
295  /// The function also updates UserChain to point to new subexpressions after
296  /// distributing s/zext. e.g., the old UserChain of the above example is
297  /// 5 -> b + 5 -> a + (b + 5) -> sext(...) -> zext(sext(...)),
298  /// and the new UserChain is
299  /// zext(sext(5)) -> zext(sext(b)) + zext(sext(5)) ->
300  /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))
301  ///
302  /// \p ChainIndex The index to UserChain. ChainIndex is initially
303  /// UserChain.size() - 1, and is decremented during
304  /// the recursion.
305  Value *distributeExtsAndCloneChain(unsigned ChainIndex);
306 
307  /// Reassociates the GEP index to the form I' + C and returns I'.
308  Value *removeConstOffset(unsigned ChainIndex);
309 
310  /// A helper function to apply ExtInsts, a list of s/zext, to value V.
311  /// e.g., if ExtInsts = [sext i32 to i64, zext i16 to i32], this function
312  /// returns "sext i32 (zext i16 V to i32) to i64".
313  Value *applyExts(Value *V);
314 
315  /// A helper function that returns whether we can trace into the operands
316  /// of binary operator BO for a constant offset.
317  ///
318  /// \p SignExtended Whether BO is surrounded by sext
319  /// \p ZeroExtended Whether BO is surrounded by zext
320  /// \p NonNegative Whether BO is known to be non-negative, e.g., an in-bound
321  /// array index.
322  bool CanTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO,
323  bool NonNegative);
324 
325  /// The path from the constant offset to the old GEP index. e.g., if the GEP
326  /// index is "a * b + (c + 5)". After running function find, UserChain[0] will
327  /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and
328  /// UserChain[2] will be the entire expression "a * b + (c + 5)".
329  ///
330  /// This path helps to rebuild the new GEP index.
331  SmallVector<User *, 8> UserChain;
332 
333  /// A data structure used in rebuildWithoutConstOffset. Contains all
334  /// sext/zext instructions along UserChain.
336 
337  /// Insertion position of cloned instructions.
338  Instruction *IP;
339 
340  const DataLayout &DL;
341  const DominatorTree *DT;
342 };
343 
344 /// A pass that tries to split every GEP in the function into a variadic
345 /// base and a constant offset. It is a FunctionPass because searching for the
346 /// constant offset may inspect other basic blocks.
347 class SeparateConstOffsetFromGEPLegacyPass : public FunctionPass {
348 public:
349  static char ID;
350 
351  SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false)
352  : FunctionPass(ID), LowerGEP(LowerGEP) {
355  }
356 
357  void getAnalysisUsage(AnalysisUsage &AU) const override {
362  AU.setPreservesCFG();
364  }
365 
366  bool runOnFunction(Function &F) override;
367 
368 private:
369  bool LowerGEP;
370 };
371 
372 /// A pass that tries to split every GEP in the function into a variadic
373 /// base and a constant offset. It is a FunctionPass because searching for the
374 /// constant offset may inspect other basic blocks.
375 class SeparateConstOffsetFromGEP {
376 public:
377  SeparateConstOffsetFromGEP(
378  DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI,
379  TargetLibraryInfo *TLI,
380  function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LowerGEP)
381  : DT(DT), SE(SE), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP) {}
382 
383  bool run(Function &F);
384 
385 private:
386  /// Tries to split the given GEP into a variadic base and a constant offset,
387  /// and returns true if the splitting succeeds.
388  bool splitGEP(GetElementPtrInst *GEP);
389 
390  /// Lower a GEP with multiple indices into multiple GEPs with a single index.
391  /// Function splitGEP already split the original GEP into a variadic part and
392  /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
393  /// variadic part into a set of GEPs with a single index and applies
394  /// AccumulativeByteOffset to it.
395  /// \p Variadic The variadic part of the original GEP.
396  /// \p AccumulativeByteOffset The constant offset.
397  void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic,
398  int64_t AccumulativeByteOffset);
399 
400  /// Lower a GEP with multiple indices into ptrtoint+arithmetics+inttoptr form.
401  /// Function splitGEP already split the original GEP into a variadic part and
402  /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
403  /// variadic part into a set of arithmetic operations and applies
404  /// AccumulativeByteOffset to it.
405  /// \p Variadic The variadic part of the original GEP.
406  /// \p AccumulativeByteOffset The constant offset.
407  void lowerToArithmetics(GetElementPtrInst *Variadic,
408  int64_t AccumulativeByteOffset);
409 
410  /// Finds the constant offset within each index and accumulates them. If
411  /// LowerGEP is true, it finds in indices of both sequential and structure
412  /// types, otherwise it only finds in sequential indices. The output
413  /// NeedsExtraction indicates whether we successfully find a non-zero constant
414  /// offset.
415  int64_t accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction);
416 
417  /// Canonicalize array indices to pointer-size integers. This helps to
418  /// simplify the logic of splitting a GEP. For example, if a + b is a
419  /// pointer-size integer, we have
420  /// gep base, a + b = gep (gep base, a), b
421  /// However, this equality may not hold if the size of a + b is smaller than
422  /// the pointer size, because LLVM conceptually sign-extends GEP indices to
423  /// pointer size before computing the address
424  /// (http://llvm.org/docs/LangRef.html#id181).
425  ///
426  /// This canonicalization is very likely already done in clang and
427  /// instcombine. Therefore, the program will probably remain the same.
428  ///
429  /// Returns true if the module changes.
430  ///
431  /// Verified in @i32_add in split-gep.ll
432  bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP);
433 
434  /// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow.
435  /// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting
436  /// the constant offset. After extraction, it becomes desirable to reunion the
437  /// distributed sexts. For example,
438  ///
439  /// &a[sext(i +nsw (j +nsw 5)]
440  /// => distribute &a[sext(i) +nsw (sext(j) +nsw 5)]
441  /// => constant extraction &a[sext(i) + sext(j)] + 5
442  /// => reunion &a[sext(i +nsw j)] + 5
443  bool reuniteExts(Function &F);
444 
445  /// A helper that reunites sexts in an instruction.
446  bool reuniteExts(Instruction *I);
447 
448  /// Find the closest dominator of <Dominatee> that is equivalent to <Key>.
449  Instruction *findClosestMatchingDominator(
450  const SCEV *Key, Instruction *Dominatee,
451  DenseMap<const SCEV *, SmallVector<Instruction *, 2>> &DominatingExprs);
452 
453  /// Verify F is free of dead code.
454  void verifyNoDeadCode(Function &F);
455 
456  bool hasMoreThanOneUseInLoop(Value *v, Loop *L);
457 
458  // Swap the index operand of two GEP.
459  void swapGEPOperand(GetElementPtrInst *First, GetElementPtrInst *Second);
460 
461  // Check if it is safe to swap operand of two GEP.
462  bool isLegalToSwapOperand(GetElementPtrInst *First, GetElementPtrInst *Second,
463  Loop *CurLoop);
464 
465  const DataLayout *DL = nullptr;
466  DominatorTree *DT = nullptr;
467  ScalarEvolution *SE;
468  LoopInfo *LI;
469  TargetLibraryInfo *TLI;
470  // Retrieved lazily since not always used.
472 
473  /// Whether to lower a GEP with multiple indices into arithmetic operations or
474  /// multiple GEPs with a single index.
475  bool LowerGEP;
476 
479 };
480 
481 } // end anonymous namespace
482 
484 
486  SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
487  "Split GEPs to a variadic base and a constant offset for better CSE", false,
488  false)
495  SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
496  "Split GEPs to a variadic base and a constant offset for better CSE", false,
497  false)
498 
500  return new SeparateConstOffsetFromGEPLegacyPass(LowerGEP);
501 }
502 
503 bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
504  bool ZeroExtended,
505  BinaryOperator *BO,
506  bool NonNegative) {
507  // We only consider ADD, SUB and OR, because a non-zero constant found in
508  // expressions composed of these operations can be easily hoisted as a
509  // constant offset by reassociation.
510  if (BO->getOpcode() != Instruction::Add &&
511  BO->getOpcode() != Instruction::Sub &&
512  BO->getOpcode() != Instruction::Or) {
513  return false;
514  }
515 
516  Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1);
517  // Do not trace into "or" unless it is equivalent to "add". If LHS and RHS
518  // don't have common bits, (LHS | RHS) is equivalent to (LHS + RHS).
519  // FIXME: this does not appear to be covered by any tests
520  // (with x86/aarch64 backends at least)
521  if (BO->getOpcode() == Instruction::Or &&
522  !haveNoCommonBitsSet(LHS, RHS, DL, nullptr, BO, DT))
523  return false;
524 
525  // In addition, tracing into BO requires that its surrounding s/zext (if
526  // any) is distributable to both operands.
527  //
528  // Suppose BO = A op B.
529  // SignExtended | ZeroExtended | Distributable?
530  // --------------+--------------+----------------------------------
531  // 0 | 0 | true because no s/zext exists
532  // 0 | 1 | zext(BO) == zext(A) op zext(B)
533  // 1 | 0 | sext(BO) == sext(A) op sext(B)
534  // 1 | 1 | zext(sext(BO)) ==
535  // | | zext(sext(A)) op zext(sext(B))
536  if (BO->getOpcode() == Instruction::Add && !ZeroExtended && NonNegative) {
537  // If a + b >= 0 and (a >= 0 or b >= 0), then
538  // sext(a + b) = sext(a) + sext(b)
539  // even if the addition is not marked nsw.
540  //
541  // Leveraging this invariant, we can trace into an sext'ed inbound GEP
542  // index if the constant offset is non-negative.
543  //
544  // Verified in @sext_add in split-gep.ll.
545  if (ConstantInt *ConstLHS = dyn_cast<ConstantInt>(LHS)) {
546  if (!ConstLHS->isNegative())
547  return true;
548  }
549  if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) {
550  if (!ConstRHS->isNegative())
551  return true;
552  }
553  }
554 
555  // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B)
556  // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B)
557  if (BO->getOpcode() == Instruction::Add ||
558  BO->getOpcode() == Instruction::Sub) {
559  if (SignExtended && !BO->hasNoSignedWrap())
560  return false;
561  if (ZeroExtended && !BO->hasNoUnsignedWrap())
562  return false;
563  }
564 
565  return true;
566 }
567 
568 APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO,
569  bool SignExtended,
570  bool ZeroExtended) {
571  // Save off the current height of the chain, in case we need to restore it.
572  size_t ChainLength = UserChain.size();
573 
574  // BO being non-negative does not shed light on whether its operands are
575  // non-negative. Clear the NonNegative flag here.
576  APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended,
577  /* NonNegative */ false);
578  // If we found a constant offset in the left operand, stop and return that.
579  // This shortcut might cause us to miss opportunities of combining the
580  // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9.
581  // However, such cases are probably already handled by -instcombine,
582  // given this pass runs after the standard optimizations.
583  if (ConstantOffset != 0) return ConstantOffset;
584 
585  // Reset the chain back to where it was when we started exploring this node,
586  // since visiting the LHS didn't pan out.
587  UserChain.resize(ChainLength);
588 
589  ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended,
590  /* NonNegative */ false);
591  // If U is a sub operator, negate the constant offset found in the right
592  // operand.
593  if (BO->getOpcode() == Instruction::Sub)
594  ConstantOffset = -ConstantOffset;
595 
596  // If RHS wasn't a suitable candidate either, reset the chain again.
597  if (ConstantOffset == 0)
598  UserChain.resize(ChainLength);
599 
600  return ConstantOffset;
601 }
602 
603 APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended,
604  bool ZeroExtended, bool NonNegative) {
605  // TODO(jingyue): We could trace into integer/pointer casts, such as
606  // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only
607  // integers because it gives good enough results for our benchmarks.
608  unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
609 
610  // We cannot do much with Values that are not a User, such as an Argument.
611  User *U = dyn_cast<User>(V);
612  if (U == nullptr) return APInt(BitWidth, 0);
613 
614  APInt ConstantOffset(BitWidth, 0);
615  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
616  // Hooray, we found it!
617  ConstantOffset = CI->getValue();
618  } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) {
619  // Trace into subexpressions for more hoisting opportunities.
620  if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative))
621  ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
622  } else if (isa<TruncInst>(V)) {
623  ConstantOffset =
624  find(U->getOperand(0), SignExtended, ZeroExtended, NonNegative)
625  .trunc(BitWidth);
626  } else if (isa<SExtInst>(V)) {
627  ConstantOffset = find(U->getOperand(0), /* SignExtended */ true,
628  ZeroExtended, NonNegative).sext(BitWidth);
629  } else if (isa<ZExtInst>(V)) {
630  // As an optimization, we can clear the SignExtended flag because
631  // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll.
632  //
633  // Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0.
634  ConstantOffset =
635  find(U->getOperand(0), /* SignExtended */ false,
636  /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth);
637  }
638 
639  // If we found a non-zero constant offset, add it to the path for
640  // rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't
641  // help this optimization.
642  if (ConstantOffset != 0)
643  UserChain.push_back(U);
644  return ConstantOffset;
645 }
646 
647 Value *ConstantOffsetExtractor::applyExts(Value *V) {
648  Value *Current = V;
649  // ExtInsts is built in the use-def order. Therefore, we apply them to V
650  // in the reversed order.
651  for (auto I = ExtInsts.rbegin(), E = ExtInsts.rend(); I != E; ++I) {
652  if (Constant *C = dyn_cast<Constant>(Current)) {
653  // If Current is a constant, apply s/zext using ConstantExpr::getCast.
654  // ConstantExpr::getCast emits a ConstantInt if C is a ConstantInt.
655  Current = ConstantExpr::getCast((*I)->getOpcode(), C, (*I)->getType());
656  } else {
657  Instruction *Ext = (*I)->clone();
658  Ext->setOperand(0, Current);
659  Ext->insertBefore(IP);
660  Current = Ext;
661  }
662  }
663  return Current;
664 }
665 
666 Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() {
667  distributeExtsAndCloneChain(UserChain.size() - 1);
668  // Remove all nullptrs (used to be s/zext) from UserChain.
669  unsigned NewSize = 0;
670  for (User *I : UserChain) {
671  if (I != nullptr) {
672  UserChain[NewSize] = I;
673  NewSize++;
674  }
675  }
676  UserChain.resize(NewSize);
677  return removeConstOffset(UserChain.size() - 1);
678 }
679 
680 Value *
681 ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned ChainIndex) {
682  User *U = UserChain[ChainIndex];
683  if (ChainIndex == 0) {
684  assert(isa<ConstantInt>(U));
685  // If U is a ConstantInt, applyExts will return a ConstantInt as well.
686  return UserChain[ChainIndex] = cast<ConstantInt>(applyExts(U));
687  }
688 
689  if (CastInst *Cast = dyn_cast<CastInst>(U)) {
690  assert(
691  (isa<SExtInst>(Cast) || isa<ZExtInst>(Cast) || isa<TruncInst>(Cast)) &&
692  "Only following instructions can be traced: sext, zext & trunc");
693  ExtInsts.push_back(Cast);
694  UserChain[ChainIndex] = nullptr;
695  return distributeExtsAndCloneChain(ChainIndex - 1);
696  }
697 
698  // Function find only trace into BinaryOperator and CastInst.
699  BinaryOperator *BO = cast<BinaryOperator>(U);
700  // OpNo = which operand of BO is UserChain[ChainIndex - 1]
701  unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
702  Value *TheOther = applyExts(BO->getOperand(1 - OpNo));
703  Value *NextInChain = distributeExtsAndCloneChain(ChainIndex - 1);
704 
705  BinaryOperator *NewBO = nullptr;
706  if (OpNo == 0) {
707  NewBO = BinaryOperator::Create(BO->getOpcode(), NextInChain, TheOther,
708  BO->getName(), IP);
709  } else {
710  NewBO = BinaryOperator::Create(BO->getOpcode(), TheOther, NextInChain,
711  BO->getName(), IP);
712  }
713  return UserChain[ChainIndex] = NewBO;
714 }
715 
716 Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
717  if (ChainIndex == 0) {
718  assert(isa<ConstantInt>(UserChain[ChainIndex]));
719  return ConstantInt::getNullValue(UserChain[ChainIndex]->getType());
720  }
721 
722  BinaryOperator *BO = cast<BinaryOperator>(UserChain[ChainIndex]);
723  assert((BO->use_empty() || BO->hasOneUse()) &&
724  "distributeExtsAndCloneChain clones each BinaryOperator in "
725  "UserChain, so no one should be used more than "
726  "once");
727 
728  unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
729  assert(BO->getOperand(OpNo) == UserChain[ChainIndex - 1]);
730  Value *NextInChain = removeConstOffset(ChainIndex - 1);
731  Value *TheOther = BO->getOperand(1 - OpNo);
732 
733  // If NextInChain is 0 and not the LHS of a sub, we can simplify the
734  // sub-expression to be just TheOther.
735  if (ConstantInt *CI = dyn_cast<ConstantInt>(NextInChain)) {
736  if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == 0))
737  return TheOther;
738  }
739 
740  BinaryOperator::BinaryOps NewOp = BO->getOpcode();
741  if (BO->getOpcode() == Instruction::Or) {
742  // Rebuild "or" as "add", because "or" may be invalid for the new
743  // expression.
744  //
745  // For instance, given
746  // a | (b + 5) where a and b + 5 have no common bits,
747  // we can extract 5 as the constant offset.
748  //
749  // However, reusing the "or" in the new index would give us
750  // (a | b) + 5
751  // which does not equal a | (b + 5).
752  //
753  // Replacing the "or" with "add" is fine, because
754  // a | (b + 5) = a + (b + 5) = (a + b) + 5
755  NewOp = Instruction::Add;
756  }
757 
758  BinaryOperator *NewBO;
759  if (OpNo == 0) {
760  NewBO = BinaryOperator::Create(NewOp, NextInChain, TheOther, "", IP);
761  } else {
762  NewBO = BinaryOperator::Create(NewOp, TheOther, NextInChain, "", IP);
763  }
764  NewBO->takeName(BO);
765  return NewBO;
766 }
767 
768 Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,
769  User *&UserChainTail,
770  const DominatorTree *DT) {
771  ConstantOffsetExtractor Extractor(GEP, DT);
772  // Find a non-zero constant offset first.
773  APInt ConstantOffset =
774  Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
775  GEP->isInBounds());
776  if (ConstantOffset == 0) {
777  UserChainTail = nullptr;
778  return nullptr;
779  }
780  // Separates the constant offset from the GEP index.
781  Value *IdxWithoutConstOffset = Extractor.rebuildWithoutConstOffset();
782  UserChainTail = Extractor.UserChain.back();
783  return IdxWithoutConstOffset;
784 }
785 
787  const DominatorTree *DT) {
788  // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.
789  return ConstantOffsetExtractor(GEP, DT)
790  .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
791  GEP->isInBounds())
792  .getSExtValue();
793 }
794 
795 bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToPointerSize(
797  bool Changed = false;
798  Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
800  for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end();
801  I != E; ++I, ++GTI) {
802  // Skip struct member indices which must be i32.
803  if (GTI.isSequential()) {
804  if ((*I)->getType() != IntPtrTy) {
805  *I = CastInst::CreateIntegerCast(*I, IntPtrTy, true, "idxprom", GEP);
806  Changed = true;
807  }
808  }
809  }
810  return Changed;
811 }
812 
813 int64_t
814 SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
815  bool &NeedsExtraction) {
816  NeedsExtraction = false;
817  int64_t AccumulativeByteOffset = 0;
819  for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
820  if (GTI.isSequential()) {
821  // Tries to extract a constant offset from this GEP index.
822  int64_t ConstantOffset =
823  ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP, DT);
824  if (ConstantOffset != 0) {
825  NeedsExtraction = true;
826  // A GEP may have multiple indices. We accumulate the extracted
827  // constant offset to a byte offset, and later offset the remainder of
828  // the original GEP with this byte offset.
829  AccumulativeByteOffset +=
830  ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType());
831  }
832  } else if (LowerGEP) {
833  StructType *StTy = GTI.getStructType();
834  uint64_t Field = cast<ConstantInt>(GEP->getOperand(I))->getZExtValue();
835  // Skip field 0 as the offset is always 0.
836  if (Field != 0) {
837  NeedsExtraction = true;
838  AccumulativeByteOffset +=
839  DL->getStructLayout(StTy)->getElementOffset(Field);
840  }
841  }
842  }
843  return AccumulativeByteOffset;
844 }
845 
846 void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
847  GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) {
848  IRBuilder<> Builder(Variadic);
849  Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
850 
851  Type *I8PtrTy =
852  Builder.getInt8PtrTy(Variadic->getType()->getPointerAddressSpace());
853  Value *ResultPtr = Variadic->getOperand(0);
854  Loop *L = LI->getLoopFor(Variadic->getParent());
855  // Check if the base is not loop invariant or used more than once.
856  bool isSwapCandidate =
857  L && L->isLoopInvariant(ResultPtr) &&
858  !hasMoreThanOneUseInLoop(ResultPtr, L);
859  Value *FirstResult = nullptr;
860 
861  if (ResultPtr->getType() != I8PtrTy)
862  ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
863 
864  gep_type_iterator GTI = gep_type_begin(*Variadic);
865  // Create an ugly GEP for each sequential index. We don't create GEPs for
866  // structure indices, as they are accumulated in the constant offset index.
867  for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
868  if (GTI.isSequential()) {
869  Value *Idx = Variadic->getOperand(I);
870  // Skip zero indices.
871  if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
872  if (CI->isZero())
873  continue;
874 
875  APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
876  DL->getTypeAllocSize(GTI.getIndexedType()));
877  // Scale the index by element size.
878  if (ElementSize != 1) {
879  if (ElementSize.isPowerOf2()) {
880  Idx = Builder.CreateShl(
881  Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2()));
882  } else {
883  Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize));
884  }
885  }
886  // Create an ugly GEP with a single index for each index.
887  ResultPtr =
888  Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Idx, "uglygep");
889  if (FirstResult == nullptr)
890  FirstResult = ResultPtr;
891  }
892  }
893 
894  // Create a GEP with the constant offset index.
895  if (AccumulativeByteOffset != 0) {
896  Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset);
897  ResultPtr =
898  Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep");
899  } else
900  isSwapCandidate = false;
901 
902  // If we created a GEP with constant index, and the base is loop invariant,
903  // then we swap the first one with it, so LICM can move constant GEP out
904  // later.
905  auto *FirstGEP = dyn_cast_or_null<GetElementPtrInst>(FirstResult);
906  auto *SecondGEP = dyn_cast<GetElementPtrInst>(ResultPtr);
907  if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L))
908  swapGEPOperand(FirstGEP, SecondGEP);
909 
910  if (ResultPtr->getType() != Variadic->getType())
911  ResultPtr = Builder.CreateBitCast(ResultPtr, Variadic->getType());
912 
913  Variadic->replaceAllUsesWith(ResultPtr);
914  Variadic->eraseFromParent();
915 }
916 
917 void
918 SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
919  int64_t AccumulativeByteOffset) {
920  IRBuilder<> Builder(Variadic);
921  Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
922 
923  Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy);
924  gep_type_iterator GTI = gep_type_begin(*Variadic);
925  // Create ADD/SHL/MUL arithmetic operations for each sequential indices. We
926  // don't create arithmetics for structure indices, as they are accumulated
927  // in the constant offset index.
928  for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
929  if (GTI.isSequential()) {
930  Value *Idx = Variadic->getOperand(I);
931  // Skip zero indices.
932  if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
933  if (CI->isZero())
934  continue;
935 
936  APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
937  DL->getTypeAllocSize(GTI.getIndexedType()));
938  // Scale the index by element size.
939  if (ElementSize != 1) {
940  if (ElementSize.isPowerOf2()) {
941  Idx = Builder.CreateShl(
942  Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2()));
943  } else {
944  Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize));
945  }
946  }
947  // Create an ADD for each index.
948  ResultPtr = Builder.CreateAdd(ResultPtr, Idx);
949  }
950  }
951 
952  // Create an ADD for the constant offset index.
953  if (AccumulativeByteOffset != 0) {
954  ResultPtr = Builder.CreateAdd(
955  ResultPtr, ConstantInt::get(IntPtrTy, AccumulativeByteOffset));
956  }
957 
958  ResultPtr = Builder.CreateIntToPtr(ResultPtr, Variadic->getType());
959  Variadic->replaceAllUsesWith(ResultPtr);
960  Variadic->eraseFromParent();
961 }
962 
963 bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
964  // Skip vector GEPs.
965  if (GEP->getType()->isVectorTy())
966  return false;
967 
968  // The backend can already nicely handle the case where all indices are
969  // constant.
970  if (GEP->hasAllConstantIndices())
971  return false;
972 
973  bool Changed = canonicalizeArrayIndicesToPointerSize(GEP);
974 
975  bool NeedsExtraction;
976  int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction);
977 
978  if (!NeedsExtraction)
979  return Changed;
980 
981  TargetTransformInfo &TTI = GetTTI(*GEP->getFunction());
982 
983  // If LowerGEP is disabled, before really splitting the GEP, check whether the
984  // backend supports the addressing mode we are about to produce. If no, this
985  // splitting probably won't be beneficial.
986  // If LowerGEP is enabled, even the extracted constant offset can not match
987  // the addressing mode, we can still do optimizations to other lowered parts
988  // of variable indices. Therefore, we don't check for addressing modes in that
989  // case.
990  if (!LowerGEP) {
991  unsigned AddrSpace = GEP->getPointerAddressSpace();
992  if (!TTI.isLegalAddressingMode(GEP->getResultElementType(),
993  /*BaseGV=*/nullptr, AccumulativeByteOffset,
994  /*HasBaseReg=*/true, /*Scale=*/0,
995  AddrSpace)) {
996  return Changed;
997  }
998  }
999 
1000  // Remove the constant offset in each sequential index. The resultant GEP
1001  // computes the variadic base.
1002  // Notice that we don't remove struct field indices here. If LowerGEP is
1003  // disabled, a structure index is not accumulated and we still use the old
1004  // one. If LowerGEP is enabled, a structure index is accumulated in the
1005  // constant offset. LowerToSingleIndexGEPs or lowerToArithmetics will later
1006  // handle the constant offset and won't need a new structure index.
1008  for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
1009  if (GTI.isSequential()) {
1010  // Splits this GEP index into a variadic part and a constant offset, and
1011  // uses the variadic part as the new index.
1012  Value *OldIdx = GEP->getOperand(I);
1013  User *UserChainTail;
1014  Value *NewIdx =
1015  ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail, DT);
1016  if (NewIdx != nullptr) {
1017  // Switches to the index with the constant offset removed.
1018  GEP->setOperand(I, NewIdx);
1019  // After switching to the new index, we can garbage-collect UserChain
1020  // and the old index if they are not used.
1023  }
1024  }
1025  }
1026 
1027  // Clear the inbounds attribute because the new index may be off-bound.
1028  // e.g.,
1029  //
1030  // b = add i64 a, 5
1031  // addr = gep inbounds float, float* p, i64 b
1032  //
1033  // is transformed to:
1034  //
1035  // addr2 = gep float, float* p, i64 a ; inbounds removed
1036  // addr = gep inbounds float, float* addr2, i64 5
1037  //
1038  // If a is -4, although the old index b is in bounds, the new index a is
1039  // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
1040  // inbounds keyword is not present, the offsets are added to the base
1041  // address with silently-wrapping two's complement arithmetic".
1042  // Therefore, the final code will be a semantically equivalent.
1043  //
1044  // TODO(jingyue): do some range analysis to keep as many inbounds as
1045  // possible. GEPs with inbounds are more friendly to alias analysis.
1046  bool GEPWasInBounds = GEP->isInBounds();
1047  GEP->setIsInBounds(false);
1048 
1049  // Lowers a GEP to either GEPs with a single index or arithmetic operations.
1050  if (LowerGEP) {
1051  // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to
1052  // arithmetic operations if the target uses alias analysis in codegen.
1053  if (TTI.useAA())
1054  lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);
1055  else
1056  lowerToArithmetics(GEP, AccumulativeByteOffset);
1057  return true;
1058  }
1059 
1060  // No need to create another GEP if the accumulative byte offset is 0.
1061  if (AccumulativeByteOffset == 0)
1062  return true;
1063 
1064  // Offsets the base with the accumulative byte offset.
1065  //
1066  // %gep ; the base
1067  // ... %gep ...
1068  //
1069  // => add the offset
1070  //
1071  // %gep2 ; clone of %gep
1072  // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
1073  // %gep ; will be removed
1074  // ... %gep ...
1075  //
1076  // => replace all uses of %gep with %new.gep and remove %gep
1077  //
1078  // %gep2 ; clone of %gep
1079  // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
1080  // ... %new.gep ...
1081  //
1082  // If AccumulativeByteOffset is not a multiple of sizeof(*%gep), we emit an
1083  // uglygep (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep):
1084  // bitcast %gep2 to i8*, add the offset, and bitcast the result back to the
1085  // type of %gep.
1086  //
1087  // %gep2 ; clone of %gep
1088  // %0 = bitcast %gep2 to i8*
1089  // %uglygep = gep %0, <offset>
1090  // %new.gep = bitcast %uglygep to <type of %gep>
1091  // ... %new.gep ...
1092  Instruction *NewGEP = GEP->clone();
1093  NewGEP->insertBefore(GEP);
1094 
1095  // Per ANSI C standard, signed / unsigned = unsigned and signed % unsigned =
1096  // unsigned.. Therefore, we cast ElementTypeSizeOfGEP to signed because it is
1097  // used with unsigned integers later.
1098  int64_t ElementTypeSizeOfGEP = static_cast<int64_t>(
1099  DL->getTypeAllocSize(GEP->getResultElementType()));
1100  Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
1101  if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) {
1102  // Very likely. As long as %gep is naturally aligned, the byte offset we
1103  // extracted should be a multiple of sizeof(*%gep).
1104  int64_t Index = AccumulativeByteOffset / ElementTypeSizeOfGEP;
1105  NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
1106  ConstantInt::get(IntPtrTy, Index, true),
1107  GEP->getName(), GEP);
1108  NewGEP->copyMetadata(*GEP);
1109  // Inherit the inbounds attribute of the original GEP.
1110  cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
1111  } else {
1112  // Unlikely but possible. For example,
1113  // #pragma pack(1)
1114  // struct S {
1115  // int a[3];
1116  // int64 b[8];
1117  // };
1118  // #pragma pack()
1119  //
1120  // Suppose the gep before extraction is &s[i + 1].b[j + 3]. After
1121  // extraction, it becomes &s[i].b[j] and AccumulativeByteOffset is
1122  // sizeof(S) + 3 * sizeof(int64) = 100, which is not a multiple of
1123  // sizeof(int64).
1124  //
1125  // Emit an uglygep in this case.
1126  Type *I8PtrTy = Type::getInt8PtrTy(GEP->getContext(),
1127  GEP->getPointerAddressSpace());
1128  NewGEP = new BitCastInst(NewGEP, I8PtrTy, "", GEP);
1129  NewGEP = GetElementPtrInst::Create(
1130  Type::getInt8Ty(GEP->getContext()), NewGEP,
1131  ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "uglygep",
1132  GEP);
1133  NewGEP->copyMetadata(*GEP);
1134  // Inherit the inbounds attribute of the original GEP.
1135  cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
1136  if (GEP->getType() != I8PtrTy)
1137  NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP);
1138  }
1139 
1140  GEP->replaceAllUsesWith(NewGEP);
1141  GEP->eraseFromParent();
1142 
1143  return true;
1144 }
1145 
1147  if (skipFunction(F))
1148  return false;
1149  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1150  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1151  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1152  auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1153  auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
1154  return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1155  };
1156  SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP);
1157  return Impl.run(F);
1158 }
1159 
1160 bool SeparateConstOffsetFromGEP::run(Function &F) {
1162  return false;
1163 
1164  DL = &F.getParent()->getDataLayout();
1165  bool Changed = false;
1166  for (BasicBlock &B : F) {
1167  if (!DT->isReachableFromEntry(&B))
1168  continue;
1169 
1170  for (BasicBlock::iterator I = B.begin(), IE = B.end(); I != IE;)
1171  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++))
1172  Changed |= splitGEP(GEP);
1173  // No need to split GEP ConstantExprs because all its indices are constant
1174  // already.
1175  }
1176 
1177  Changed |= reuniteExts(F);
1178 
1179  if (VerifyNoDeadCode)
1180  verifyNoDeadCode(F);
1181 
1182  return Changed;
1183 }
1184 
1185 Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(
1186  const SCEV *Key, Instruction *Dominatee,
1187  DenseMap<const SCEV *, SmallVector<Instruction *, 2>> &DominatingExprs) {
1188  auto Pos = DominatingExprs.find(Key);
1189  if (Pos == DominatingExprs.end())
1190  return nullptr;
1191 
1192  auto &Candidates = Pos->second;
1193  // Because we process the basic blocks in pre-order of the dominator tree, a
1194  // candidate that doesn't dominate the current instruction won't dominate any
1195  // future instruction either. Therefore, we pop it out of the stack. This
1196  // optimization makes the algorithm O(n).
1197  while (!Candidates.empty()) {
1198  Instruction *Candidate = Candidates.back();
1199  if (DT->dominates(Candidate, Dominatee))
1200  return Candidate;
1201  Candidates.pop_back();
1202  }
1203  return nullptr;
1204 }
1205 
1206 bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
1207  if (!SE->isSCEVable(I->getType()))
1208  return false;
1209 
1210  // Dom: LHS+RHS
1211  // I: sext(LHS)+sext(RHS)
1212  // If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom).
1213  // TODO: handle zext
1214  Value *LHS = nullptr, *RHS = nullptr;
1215  if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
1216  if (LHS->getType() == RHS->getType()) {
1217  const SCEV *Key =
1218  SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
1219  if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingAdds)) {
1220  Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
1221  NewSExt->takeName(I);
1222  I->replaceAllUsesWith(NewSExt);
1224  return true;
1225  }
1226  }
1227  } else if (match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
1228  if (LHS->getType() == RHS->getType()) {
1229  const SCEV *Key =
1230  SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
1231  if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingSubs)) {
1232  Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
1233  NewSExt->takeName(I);
1234  I->replaceAllUsesWith(NewSExt);
1236  return true;
1237  }
1238  }
1239  }
1240 
1241  // Add I to DominatingExprs if it's an add/sub that can't sign overflow.
1242  if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS)))) {
1243  if (programUndefinedIfPoison(I)) {
1244  const SCEV *Key =
1245  SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
1246  DominatingAdds[Key].push_back(I);
1247  }
1248  } else if (match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
1249  if (programUndefinedIfPoison(I)) {
1250  const SCEV *Key =
1251  SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
1252  DominatingSubs[Key].push_back(I);
1253  }
1254  }
1255  return false;
1256 }
1257 
1258 bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
1259  bool Changed = false;
1260  DominatingAdds.clear();
1261  DominatingSubs.clear();
1262  for (const auto Node : depth_first(DT)) {
1263  BasicBlock *BB = Node->getBlock();
1265  Changed |= reuniteExts(&I);
1266  }
1267  return Changed;
1268 }
1269 
1270 void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
1271  for (BasicBlock &B : F) {
1272  for (Instruction &I : B) {
1273  if (isInstructionTriviallyDead(&I)) {
1274  std::string ErrMessage;
1275  raw_string_ostream RSO(ErrMessage);
1276  RSO << "Dead instruction detected!\n" << I << "\n";
1277  llvm_unreachable(RSO.str().c_str());
1278  }
1279  }
1280  }
1281 }
1282 
1283 bool SeparateConstOffsetFromGEP::isLegalToSwapOperand(
1284  GetElementPtrInst *FirstGEP, GetElementPtrInst *SecondGEP, Loop *CurLoop) {
1285  if (!FirstGEP || !FirstGEP->hasOneUse())
1286  return false;
1287 
1288  if (!SecondGEP || FirstGEP->getParent() != SecondGEP->getParent())
1289  return false;
1290 
1291  if (FirstGEP == SecondGEP)
1292  return false;
1293 
1294  unsigned FirstNum = FirstGEP->getNumOperands();
1295  unsigned SecondNum = SecondGEP->getNumOperands();
1296  // Give up if the number of operands are not 2.
1297  if (FirstNum != SecondNum || FirstNum != 2)
1298  return false;
1299 
1300  Value *FirstBase = FirstGEP->getOperand(0);
1301  Value *SecondBase = SecondGEP->getOperand(0);
1302  Value *FirstOffset = FirstGEP->getOperand(1);
1303  // Give up if the index of the first GEP is loop invariant.
1304  if (CurLoop->isLoopInvariant(FirstOffset))
1305  return false;
1306 
1307  // Give up if base doesn't have same type.
1308  if (FirstBase->getType() != SecondBase->getType())
1309  return false;
1310 
1311  Instruction *FirstOffsetDef = dyn_cast<Instruction>(FirstOffset);
1312 
1313  // Check if the second operand of first GEP has constant coefficient.
1314  // For an example, for the following code, we won't gain anything by
1315  // hoisting the second GEP out because the second GEP can be folded away.
1316  // %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256
1317  // %67 = shl i64 %scevgep.sum.ur159, 2
1318  // %uglygep160 = getelementptr i8* %65, i64 %67
1319  // %uglygep161 = getelementptr i8* %uglygep160, i64 -1024
1320 
1321  // Skip constant shift instruction which may be generated by Splitting GEPs.
1322  if (FirstOffsetDef && FirstOffsetDef->isShift() &&
1323  isa<ConstantInt>(FirstOffsetDef->getOperand(1)))
1324  FirstOffsetDef = dyn_cast<Instruction>(FirstOffsetDef->getOperand(0));
1325 
1326  // Give up if FirstOffsetDef is an Add or Sub with constant.
1327  // Because it may not profitable at all due to constant folding.
1328  if (FirstOffsetDef)
1329  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FirstOffsetDef)) {
1330  unsigned opc = BO->getOpcode();
1331  if ((opc == Instruction::Add || opc == Instruction::Sub) &&
1332  (isa<ConstantInt>(BO->getOperand(0)) ||
1333  isa<ConstantInt>(BO->getOperand(1))))
1334  return false;
1335  }
1336  return true;
1337 }
1338 
1339 bool SeparateConstOffsetFromGEP::hasMoreThanOneUseInLoop(Value *V, Loop *L) {
1340  int UsesInLoop = 0;
1341  for (User *U : V->users()) {
1342  if (Instruction *User = dyn_cast<Instruction>(U))
1343  if (L->contains(User))
1344  if (++UsesInLoop > 1)
1345  return true;
1346  }
1347  return false;
1348 }
1349 
1350 void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,
1351  GetElementPtrInst *Second) {
1352  Value *Offset1 = First->getOperand(1);
1353  Value *Offset2 = Second->getOperand(1);
1354  First->setOperand(1, Offset2);
1355  Second->setOperand(1, Offset1);
1356 
1357  // We changed p+o+c to p+c+o, p+c may not be inbound anymore.
1358  const DataLayout &DAL = First->getModule()->getDataLayout();
1360  cast<PointerType>(First->getType())->getAddressSpace()),
1361  0);
1362  Value *NewBase =
1363  First->stripAndAccumulateInBoundsConstantOffsets(DAL, Offset);
1364  uint64_t ObjectSize;
1365  if (!getObjectSize(NewBase, ObjectSize, DAL, TLI) ||
1366  Offset.ugt(ObjectSize)) {
1367  First->setIsInBounds(false);
1368  Second->setIsInBounds(false);
1369  } else
1370  First->setIsInBounds(true);
1371 }
1372 
1375  auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1376  auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
1377  auto *LI = &AM.getResult<LoopAnalysis>(F);
1378  auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
1379  auto GetTTI = [&AM](Function &F) -> TargetTransformInfo & {
1380  return AM.getResult<TargetIRAnalysis>(F);
1381  };
1382  SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP);
1383  if (!Impl.run(F))
1384  return PreservedAnalyses::all();
1385  PreservedAnalyses PA;
1386  PA.preserveSet<CFGAnalyses>();
1387  return PA;
1388 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:511
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2331
llvm::GetElementPtrInst::setIsInBounds
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Definition: Instructions.cpp:1810
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:64
llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition: ScalarEvolution.h:2036
constant
we should consider alternate ways to model stack dependencies Lots of things could be done in WebAssemblyTargetTransformInfo cpp there are numerous optimization related hooks that can be overridden in WebAssemblyTargetLowering Instead of the OptimizeReturned which should consider preserving the returned attribute through to MachineInstrs and extending the MemIntrinsicResults pass to do this optimization on calls too That would also let the WebAssemblyPeephole pass clean up dead defs for such as it does for stores Consider implementing and or getMachineCombinerPatterns Find a clean way to fix the problem which leads to the Shrink Wrapping pass being run after the WebAssembly PEI pass When setting multiple variables to the same constant
Definition: README.txt:91
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::haveNoCommonBitsSet
bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if LHS and RHS have no common bits set.
Definition: ValueTracking.cpp:258
llvm::createSeparateConstOffsetFromGEPPass
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
Definition: SeparateConstOffsetFromGEP.cpp:499
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:435
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:255
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:779
Scalar.h
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::APInt::isPowerOf2
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:429
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:625
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5200
GetElementPtrTypeIterator.h
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::PatternMatch::m_Add
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1008
ErrorHandling.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::IRBuilder<>
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:461
llvm::Instruction::hasNoUnsignedWrap
bool hasNoUnsignedWrap() const
Determine whether the no unsigned wrap flag is set.
Definition: Instruction.cpp:136
ValueTracking.h
Local.h
llvm::Instruction::insertBefore
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:84
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
APInt.h
to
Should compile to
Definition: README.txt:449
llvm::programUndefinedIfPoison
bool programUndefinedIfPoison(const Instruction *Inst)
Definition: ValueTracking.cpp:5549
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
DenseMap.h
Module.h
MemoryBuiltins.h
llvm::Instruction::isShift
bool isShift() const
Definition: Instruction.h:167
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1268
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::Instruction::copyMetadata
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Definition: Instruction.cpp:829
llvm::CastInst::CreateIntegerCast
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
Definition: Instructions.cpp:3242
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:139
and
We currently generate a but we really shouldn eax ecx xorl edx divl ecx eax divl ecx movl eax ret A similar code sequence works for division We currently compile i32 v2 eax eax jo LBB1_2 and
Definition: README.txt:1271
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:201
DepthFirstIterator.h
llvm::SeparateConstOffsetFromGEPPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
Definition: SeparateConstOffsetFromGEP.cpp:1374
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
a
=0.0 ? 0.0 :(a > 0.0 ? 1.0 :-1.0) a
Definition: README.txt:489
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:115
llvm::DataLayout::getIndexSizeInBits
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:418
Instruction.h
CommandLine.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1335
CSE
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
Definition: SeparateConstOffsetFromGEP.cpp:496
TargetMachine.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::AMDGPU::PALMD::Key
Key
PAL metadata keys.
Definition: AMDGPUMetadata.h:481
IP
Definition: NVPTXLowerArgs.cpp:166
TargetLibraryInfo.h
false
Definition: StackSlotColoring.cpp:142
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
First
into llvm powi allowing the code generator to produce balanced multiplication trees First
Definition: README.txt:54
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:393
llvm::Instruction
Definition: Instruction.h:45
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:900
llvm::PatternMatch::m_NSWSub
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWSub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1194
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2066
PatternMatch.h
llvm::getObjectSize
bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
Definition: MemoryBuiltins.cpp:514
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:345
Type.h
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
llvm::PatternMatch::m_NSWAdd
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1186
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:168
BasicBlock.h
llvm::cl::opt< bool >
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:78
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:156
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:463
uint64_t
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2387
llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1571
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
llvm::generic_gep_type_iterator::getStructType
StructType * getStructType() const
Definition: GetElementPtrTypeIterator.h:121
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::APInt::logBase2
unsigned logBase2() const
Definition: APInt.h:1636
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:928
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:576
IRBuilder.h
SeparateConstOffsetFromGEP.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::PatternMatch::m_Sub
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1020
base
therefore end up llgh r3 lr r0 br r14 but truncating the load would lh r3 br r14 Functions ret i64 and ought to be implemented ngr r0 br r14 but two address optimizations reverse the order of the AND and ngr r2 lgr r0 br r14 CodeGen SystemZ and ll has several examples of this Out of range displacements are usually handled by loading the full address into a register In many cases it would be better to create an anchor point instead E g i64 base
Definition: README.txt:125
llvm::ConstantExpr::getCast
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1963
llvm::generic_gep_type_iterator::getIndexedType
Type * getIndexedType() const
Definition: GetElementPtrTypeIterator.h:72
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:328
Find
static const T * Find(StringRef S, ArrayRef< T > A)
Find KV in array using binary search.
Definition: MCSubtargetInfo.cpp:25
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
llvm::GetElementPtrInst::Create
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:954
llvm::PatternMatch::m_SExt
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Definition: PatternMatch.h:1633
llvm::isInstructionTriviallyDead
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:398
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::BinaryOperator
Definition: InstrTypes.h:189
DataLayout.h
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:116
llvm::generic_gep_type_iterator::isSequential
bool isSequential() const
Definition: GetElementPtrTypeIterator.h:119
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
DisableSeparateConstOffsetFromGEP
static cl::opt< bool > DisableSeparateConstOffsetFromGEP("disable-separate-const-offset-from-gep", cl::init(false), cl::desc("Do not separate the constant offset from a GEP instruction"), cl::Hidden)
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:430
llvm::initializeSeparateConstOffsetFromGEPLegacyPassPass
void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &)
llvm::SExtInst
This class represents a sign extension of integer types.
Definition: Instructions.h:4833
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
for
this could be done in SelectionDAGISel along with other special for
Definition: README.txt:104
gep
separate const offset from gep
Definition: SeparateConstOffsetFromGEP.cpp:495
llvm::MCID::Variadic
@ Variadic
Definition: MCInstrDesc.h:148
llvm::depth_first
iterator_range< df_iterator< T > > depth_first(const T &G)
Definition: DepthFirstIterator.h:229
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
Constant.h
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:348
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
Casting.h
Function.h
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
better
In x86 we generate this spiffy xmm0 xmm0 ret in x86 we generate this which could be better
Definition: README-SSE.txt:537
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:219
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:183
llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:785
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
Instructions.h
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:191
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
SmallVector.h
User.h
llvm::Instruction::hasNoSignedWrap
bool hasNoSignedWrap() const
Determine whether the no signed wrap flag is set.
Definition: Instruction.cpp:140
Dominators.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
TargetTransformInfo.h
llvm::PatternMatch
Definition: PatternMatch.h:47
DerivedTypes.h
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::OptimizedStructLayoutField
A field in a structure.
Definition: OptimizedStructLayout.h:45
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:382
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep", "Split GEPs to a variadic base and a constant offset for better CSE", false, false) INITIALIZE_PASS_END(SeparateConstOffsetFromGEPLegacyPass
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:414
raw_ostream.h
llvm::BinaryOperator::Create
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Definition: Instructions.cpp:2674
Value.h
llvm::TargetTransformInfo::useAA
bool useAA() const
Definition: TargetTransformInfo.cpp:452
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:438
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:422
VerifyNoDeadCode
static cl::opt< bool > VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false), cl::desc("Verify this pass produces no dead code"), cl::Hidden)
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1243
llvm::TargetTransformInfo::isLegalAddressingMode
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Definition: TargetTransformInfo.cpp:339
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37