LLVM  10.0.0svn
SeparateConstOffsetFromGEP.cpp
Go to the documentation of this file.
1 //===- SeparateConstOffsetFromGEP.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Loop unrolling may create many similar GEPs for array accesses.
10 // e.g., a 2-level loop
11 //
12 // float a[32][32]; // global variable
13 //
14 // for (int i = 0; i < 2; ++i) {
15 // for (int j = 0; j < 2; ++j) {
16 // ...
17 // ... = a[x + i][y + j];
18 // ...
19 // }
20 // }
21 //
22 // will probably be unrolled to:
23 //
24 // gep %a, 0, %x, %y; load
25 // gep %a, 0, %x, %y + 1; load
26 // gep %a, 0, %x + 1, %y; load
27 // gep %a, 0, %x + 1, %y + 1; load
28 //
29 // LLVM's GVN does not use partial redundancy elimination yet, and is thus
30 // unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs
31 // significant slowdown in targets with limited addressing modes. For instance,
32 // because the PTX target does not support the reg+reg addressing mode, the
33 // NVPTX backend emits PTX code that literally computes the pointer address of
34 // each GEP, wasting tons of registers. It emits the following PTX for the
35 // first load and similar PTX for other loads.
36 //
37 // mov.u32 %r1, %x;
38 // mov.u32 %r2, %y;
39 // mul.wide.u32 %rl2, %r1, 128;
40 // mov.u64 %rl3, a;
41 // add.s64 %rl4, %rl3, %rl2;
42 // mul.wide.u32 %rl5, %r2, 4;
43 // add.s64 %rl6, %rl4, %rl5;
44 // ld.global.f32 %f1, [%rl6];
45 //
46 // To reduce the register pressure, the optimization implemented in this file
47 // merges the common part of a group of GEPs, so we can compute each pointer
48 // address by adding a simple offset to the common part, saving many registers.
49 //
50 // It works by splitting each GEP into a variadic base and a constant offset.
51 // The variadic base can be computed once and reused by multiple GEPs, and the
52 // constant offsets can be nicely folded into the reg+immediate addressing mode
53 // (supported by most targets) without using any extra register.
54 //
55 // For instance, we transform the four GEPs and four loads in the above example
56 // into:
57 //
58 // base = gep a, 0, x, y
59 // load base
60 // laod base + 1 * sizeof(float)
61 // load base + 32 * sizeof(float)
62 // load base + 33 * sizeof(float)
63 //
64 // Given the transformed IR, a backend that supports the reg+immediate
65 // addressing mode can easily fold the pointer arithmetics into the loads. For
66 // example, the NVPTX backend can easily fold the pointer arithmetics into the
67 // ld.global.f32 instructions, and the resultant PTX uses much fewer registers.
68 //
69 // mov.u32 %r1, %tid.x;
70 // mov.u32 %r2, %tid.y;
71 // mul.wide.u32 %rl2, %r1, 128;
72 // mov.u64 %rl3, a;
73 // add.s64 %rl4, %rl3, %rl2;
74 // mul.wide.u32 %rl5, %r2, 4;
75 // add.s64 %rl6, %rl4, %rl5;
76 // ld.global.f32 %f1, [%rl6]; // so far the same as unoptimized PTX
77 // ld.global.f32 %f2, [%rl6+4]; // much better
78 // ld.global.f32 %f3, [%rl6+128]; // much better
79 // ld.global.f32 %f4, [%rl6+132]; // much better
80 //
81 // Another improvement enabled by the LowerGEP flag is to lower a GEP with
82 // multiple indices to either multiple GEPs with a single index or arithmetic
83 // operations (depending on whether the target uses alias analysis in codegen).
84 // Such transformation can have following benefits:
85 // (1) It can always extract constants in the indices of structure type.
86 // (2) After such Lowering, there are more optimization opportunities such as
87 // CSE, LICM and CGP.
88 //
89 // E.g. The following GEPs have multiple indices:
90 // BB1:
91 // %p = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 3
92 // load %p
93 // ...
94 // BB2:
95 // %p2 = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 2
96 // load %p2
97 // ...
98 //
99 // We can not do CSE to the common part related to index "i64 %i". Lowering
100 // GEPs can achieve such goals.
101 // If the target does not use alias analysis in codegen, this pass will
102 // lower a GEP with multiple indices into arithmetic operations:
103 // BB1:
104 // %1 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
105 // %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
106 // %3 = add i64 %1, %2 ; CSE opportunity
107 // %4 = mul i64 %j1, length_of_struct
108 // %5 = add i64 %3, %4
109 // %6 = add i64 %3, struct_field_3 ; Constant offset
110 // %p = inttoptr i64 %6 to i32*
111 // load %p
112 // ...
113 // BB2:
114 // %7 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
115 // %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
116 // %9 = add i64 %7, %8 ; CSE opportunity
117 // %10 = mul i64 %j2, length_of_struct
118 // %11 = add i64 %9, %10
119 // %12 = add i64 %11, struct_field_2 ; Constant offset
120 // %p = inttoptr i64 %12 to i32*
121 // load %p2
122 // ...
123 //
124 // If the target uses alias analysis in codegen, this pass will lower a GEP
125 // with multiple indices into multiple GEPs with a single index:
126 // BB1:
127 // %1 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
128 // %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
129 // %3 = getelementptr i8* %1, i64 %2 ; CSE opportunity
130 // %4 = mul i64 %j1, length_of_struct
131 // %5 = getelementptr i8* %3, i64 %4
132 // %6 = getelementptr i8* %5, struct_field_3 ; Constant offset
133 // %p = bitcast i8* %6 to i32*
134 // load %p
135 // ...
136 // BB2:
137 // %7 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
138 // %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
139 // %9 = getelementptr i8* %7, i64 %8 ; CSE opportunity
140 // %10 = mul i64 %j2, length_of_struct
141 // %11 = getelementptr i8* %9, i64 %10
142 // %12 = getelementptr i8* %11, struct_field_2 ; Constant offset
143 // %p2 = bitcast i8* %12 to i32*
144 // load %p2
145 // ...
146 //
147 // Lowering GEPs can also benefit other passes such as LICM and CGP.
148 // LICM (Loop Invariant Code Motion) can not hoist/sink a GEP of multiple
149 // indices if one of the index is variant. If we lower such GEP into invariant
150 // parts and variant parts, LICM can hoist/sink those invariant parts.
151 // CGP (CodeGen Prepare) tries to sink address calculations that match the
152 // target's addressing modes. A GEP with multiple indices may not match and will
153 // not be sunk. If we lower such GEP into smaller parts, CGP may sink some of
154 // them. So we end up with a better addressing mode.
155 //
156 //===----------------------------------------------------------------------===//
157 
158 #include "llvm/ADT/APInt.h"
159 #include "llvm/ADT/DenseMap.h"
161 #include "llvm/ADT/SmallVector.h"
162 #include "llvm/Analysis/LoopInfo.h"
169 #include "llvm/IR/BasicBlock.h"
170 #include "llvm/IR/Constant.h"
171 #include "llvm/IR/Constants.h"
172 #include "llvm/IR/DataLayout.h"
173 #include "llvm/IR/DerivedTypes.h"
174 #include "llvm/IR/Dominators.h"
175 #include "llvm/IR/Function.h"
177 #include "llvm/IR/IRBuilder.h"
178 #include "llvm/IR/Instruction.h"
179 #include "llvm/IR/Instructions.h"
180 #include "llvm/IR/Module.h"
181 #include "llvm/IR/PatternMatch.h"
182 #include "llvm/IR/Type.h"
183 #include "llvm/IR/User.h"
184 #include "llvm/IR/Value.h"
185 #include "llvm/Pass.h"
186 #include "llvm/Support/Casting.h"
191 #include "llvm/Transforms/Scalar.h"
192 #include <cassert>
193 #include <cstdint>
194 #include <string>
195 
196 using namespace llvm;
197 using namespace llvm::PatternMatch;
198 
200  "disable-separate-const-offset-from-gep", cl::init(false),
201  cl::desc("Do not separate the constant offset from a GEP instruction"),
202  cl::Hidden);
203 
204 // Setting this flag may emit false positives when the input module already
205 // contains dead instructions. Therefore, we set it only in unit tests that are
206 // free of dead code.
207 static cl::opt<bool>
208  VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false),
209  cl::desc("Verify this pass produces no dead code"),
210  cl::Hidden);
211 
212 namespace {
213 
214 /// A helper class for separating a constant offset from a GEP index.
215 ///
216 /// In real programs, a GEP index may be more complicated than a simple addition
217 /// of something and a constant integer which can be trivially splitted. For
218 /// example, to split ((a << 3) | 5) + b, we need to search deeper for the
219 /// constant offset, so that we can separate the index to (a << 3) + b and 5.
220 ///
221 /// Therefore, this class looks into the expression that computes a given GEP
222 /// index, and tries to find a constant integer that can be hoisted to the
223 /// outermost level of the expression as an addition. Not every constant in an
224 /// expression can jump out. e.g., we cannot transform (b * (a + 5)) to (b * a +
225 /// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case,
226 /// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15).
227 class ConstantOffsetExtractor {
228 public:
229  /// Extracts a constant offset from the given GEP index. It returns the
230  /// new index representing the remainder (equal to the original index minus
231  /// the constant offset), or nullptr if we cannot extract a constant offset.
232  /// \p Idx The given GEP index
233  /// \p GEP The given GEP
234  /// \p UserChainTail Outputs the tail of UserChain so that we can
235  /// garbage-collect unused instructions in UserChain.
236  static Value *Extract(Value *Idx, GetElementPtrInst *GEP,
237  User *&UserChainTail, const DominatorTree *DT);
238 
239  /// Looks for a constant offset from the given GEP index without extracting
240  /// it. It returns the numeric value of the extracted constant offset (0 if
241  /// failed). The meaning of the arguments are the same as Extract.
242  static int64_t Find(Value *Idx, GetElementPtrInst *GEP,
243  const DominatorTree *DT);
244 
245 private:
246  ConstantOffsetExtractor(Instruction *InsertionPt, const DominatorTree *DT)
247  : IP(InsertionPt), DL(InsertionPt->getModule()->getDataLayout()), DT(DT) {
248  }
249 
250  /// Searches the expression that computes V for a non-zero constant C s.t.
251  /// V can be reassociated into the form V' + C. If the searching is
252  /// successful, returns C and update UserChain as a def-use chain from C to V;
253  /// otherwise, UserChain is empty.
254  ///
255  /// \p V The given expression
256  /// \p SignExtended Whether V will be sign-extended in the computation of the
257  /// GEP index
258  /// \p ZeroExtended Whether V will be zero-extended in the computation of the
259  /// GEP index
260  /// \p NonNegative Whether V is guaranteed to be non-negative. For example,
261  /// an index of an inbounds GEP is guaranteed to be
262  /// non-negative. Levaraging this, we can better split
263  /// inbounds GEPs.
264  APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative);
265 
266  /// A helper function to look into both operands of a binary operator.
267  APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended,
268  bool ZeroExtended);
269 
270  /// After finding the constant offset C from the GEP index I, we build a new
271  /// index I' s.t. I' + C = I. This function builds and returns the new
272  /// index I' according to UserChain produced by function "find".
273  ///
274  /// The building conceptually takes two steps:
275  /// 1) iteratively distribute s/zext towards the leaves of the expression tree
276  /// that computes I
277  /// 2) reassociate the expression tree to the form I' + C.
278  ///
279  /// For example, to extract the 5 from sext(a + (b + 5)), we first distribute
280  /// sext to a, b and 5 so that we have
281  /// sext(a) + (sext(b) + 5).
282  /// Then, we reassociate it to
283  /// (sext(a) + sext(b)) + 5.
284  /// Given this form, we know I' is sext(a) + sext(b).
285  Value *rebuildWithoutConstOffset();
286 
287  /// After the first step of rebuilding the GEP index without the constant
288  /// offset, distribute s/zext to the operands of all operators in UserChain.
289  /// e.g., zext(sext(a + (b + 5)) (assuming no overflow) =>
290  /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))).
291  ///
292  /// The function also updates UserChain to point to new subexpressions after
293  /// distributing s/zext. e.g., the old UserChain of the above example is
294  /// 5 -> b + 5 -> a + (b + 5) -> sext(...) -> zext(sext(...)),
295  /// and the new UserChain is
296  /// zext(sext(5)) -> zext(sext(b)) + zext(sext(5)) ->
297  /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))
298  ///
299  /// \p ChainIndex The index to UserChain. ChainIndex is initially
300  /// UserChain.size() - 1, and is decremented during
301  /// the recursion.
302  Value *distributeExtsAndCloneChain(unsigned ChainIndex);
303 
304  /// Reassociates the GEP index to the form I' + C and returns I'.
305  Value *removeConstOffset(unsigned ChainIndex);
306 
307  /// A helper function to apply ExtInsts, a list of s/zext, to value V.
308  /// e.g., if ExtInsts = [sext i32 to i64, zext i16 to i32], this function
309  /// returns "sext i32 (zext i16 V to i32) to i64".
310  Value *applyExts(Value *V);
311 
312  /// A helper function that returns whether we can trace into the operands
313  /// of binary operator BO for a constant offset.
314  ///
315  /// \p SignExtended Whether BO is surrounded by sext
316  /// \p ZeroExtended Whether BO is surrounded by zext
317  /// \p NonNegative Whether BO is known to be non-negative, e.g., an in-bound
318  /// array index.
319  bool CanTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO,
320  bool NonNegative);
321 
322  /// The path from the constant offset to the old GEP index. e.g., if the GEP
323  /// index is "a * b + (c + 5)". After running function find, UserChain[0] will
324  /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and
325  /// UserChain[2] will be the entire expression "a * b + (c + 5)".
326  ///
327  /// This path helps to rebuild the new GEP index.
328  SmallVector<User *, 8> UserChain;
329 
330  /// A data structure used in rebuildWithoutConstOffset. Contains all
331  /// sext/zext instructions along UserChain.
333 
334  /// Insertion position of cloned instructions.
335  Instruction *IP;
336 
337  const DataLayout &DL;
338  const DominatorTree *DT;
339 };
340 
341 /// A pass that tries to split every GEP in the function into a variadic
342 /// base and a constant offset. It is a FunctionPass because searching for the
343 /// constant offset may inspect other basic blocks.
344 class SeparateConstOffsetFromGEP : public FunctionPass {
345 public:
346  static char ID;
347 
348  SeparateConstOffsetFromGEP(bool LowerGEP = false)
349  : FunctionPass(ID), LowerGEP(LowerGEP) {
351  }
352 
353  void getAnalysisUsage(AnalysisUsage &AU) const override {
358  AU.setPreservesCFG();
360  }
361 
362  bool doInitialization(Module &M) override {
363  DL = &M.getDataLayout();
364  return false;
365  }
366 
367  bool runOnFunction(Function &F) override;
368 
369 private:
370  /// Tries to split the given GEP into a variadic base and a constant offset,
371  /// and returns true if the splitting succeeds.
372  bool splitGEP(GetElementPtrInst *GEP);
373 
374  /// Lower a GEP with multiple indices into multiple GEPs with a single index.
375  /// Function splitGEP already split the original GEP into a variadic part and
376  /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
377  /// variadic part into a set of GEPs with a single index and applies
378  /// AccumulativeByteOffset to it.
379  /// \p Variadic The variadic part of the original GEP.
380  /// \p AccumulativeByteOffset The constant offset.
381  void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic,
382  int64_t AccumulativeByteOffset);
383 
384  /// Lower a GEP with multiple indices into ptrtoint+arithmetics+inttoptr form.
385  /// Function splitGEP already split the original GEP into a variadic part and
386  /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
387  /// variadic part into a set of arithmetic operations and applies
388  /// AccumulativeByteOffset to it.
389  /// \p Variadic The variadic part of the original GEP.
390  /// \p AccumulativeByteOffset The constant offset.
391  void lowerToArithmetics(GetElementPtrInst *Variadic,
392  int64_t AccumulativeByteOffset);
393 
394  /// Finds the constant offset within each index and accumulates them. If
395  /// LowerGEP is true, it finds in indices of both sequential and structure
396  /// types, otherwise it only finds in sequential indices. The output
397  /// NeedsExtraction indicates whether we successfully find a non-zero constant
398  /// offset.
399  int64_t accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction);
400 
401  /// Canonicalize array indices to pointer-size integers. This helps to
402  /// simplify the logic of splitting a GEP. For example, if a + b is a
403  /// pointer-size integer, we have
404  /// gep base, a + b = gep (gep base, a), b
405  /// However, this equality may not hold if the size of a + b is smaller than
406  /// the pointer size, because LLVM conceptually sign-extends GEP indices to
407  /// pointer size before computing the address
408  /// (http://llvm.org/docs/LangRef.html#id181).
409  ///
410  /// This canonicalization is very likely already done in clang and
411  /// instcombine. Therefore, the program will probably remain the same.
412  ///
413  /// Returns true if the module changes.
414  ///
415  /// Verified in @i32_add in split-gep.ll
416  bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP);
417 
418  /// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow.
419  /// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting
420  /// the constant offset. After extraction, it becomes desirable to reunion the
421  /// distributed sexts. For example,
422  ///
423  /// &a[sext(i +nsw (j +nsw 5)]
424  /// => distribute &a[sext(i) +nsw (sext(j) +nsw 5)]
425  /// => constant extraction &a[sext(i) + sext(j)] + 5
426  /// => reunion &a[sext(i +nsw j)] + 5
427  bool reuniteExts(Function &F);
428 
429  /// A helper that reunites sexts in an instruction.
430  bool reuniteExts(Instruction *I);
431 
432  /// Find the closest dominator of <Dominatee> that is equivalent to <Key>.
433  Instruction *findClosestMatchingDominator(const SCEV *Key,
434  Instruction *Dominatee);
435  /// Verify F is free of dead code.
436  void verifyNoDeadCode(Function &F);
437 
438  bool hasMoreThanOneUseInLoop(Value *v, Loop *L);
439 
440  // Swap the index operand of two GEP.
441  void swapGEPOperand(GetElementPtrInst *First, GetElementPtrInst *Second);
442 
443  // Check if it is safe to swap operand of two GEP.
444  bool isLegalToSwapOperand(GetElementPtrInst *First, GetElementPtrInst *Second,
445  Loop *CurLoop);
446 
447  const DataLayout *DL = nullptr;
448  DominatorTree *DT = nullptr;
449  ScalarEvolution *SE;
450 
451  LoopInfo *LI;
452  TargetLibraryInfo *TLI;
453 
454  /// Whether to lower a GEP with multiple indices into arithmetic operations or
455  /// multiple GEPs with a single index.
456  bool LowerGEP;
457 
459 };
460 
461 } // end anonymous namespace
462 
464 
466  SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
467  "Split GEPs to a variadic base and a constant offset for better CSE", false,
468  false)
475  SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
476  "Split GEPs to a variadic base and a constant offset for better CSE", false,
477  false)
478 
480  return new SeparateConstOffsetFromGEP(LowerGEP);
481 }
482 
483 bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
484  bool ZeroExtended,
485  BinaryOperator *BO,
486  bool NonNegative) {
487  // We only consider ADD, SUB and OR, because a non-zero constant found in
488  // expressions composed of these operations can be easily hoisted as a
489  // constant offset by reassociation.
490  if (BO->getOpcode() != Instruction::Add &&
491  BO->getOpcode() != Instruction::Sub &&
492  BO->getOpcode() != Instruction::Or) {
493  return false;
494  }
495 
496  Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1);
497  // Do not trace into "or" unless it is equivalent to "add". If LHS and RHS
498  // don't have common bits, (LHS | RHS) is equivalent to (LHS + RHS).
499  // FIXME: this does not appear to be covered by any tests
500  // (with x86/aarch64 backends at least)
501  if (BO->getOpcode() == Instruction::Or &&
502  !haveNoCommonBitsSet(LHS, RHS, DL, nullptr, BO, DT))
503  return false;
504 
505  // In addition, tracing into BO requires that its surrounding s/zext (if
506  // any) is distributable to both operands.
507  //
508  // Suppose BO = A op B.
509  // SignExtended | ZeroExtended | Distributable?
510  // --------------+--------------+----------------------------------
511  // 0 | 0 | true because no s/zext exists
512  // 0 | 1 | zext(BO) == zext(A) op zext(B)
513  // 1 | 0 | sext(BO) == sext(A) op sext(B)
514  // 1 | 1 | zext(sext(BO)) ==
515  // | | zext(sext(A)) op zext(sext(B))
516  if (BO->getOpcode() == Instruction::Add && !ZeroExtended && NonNegative) {
517  // If a + b >= 0 and (a >= 0 or b >= 0), then
518  // sext(a + b) = sext(a) + sext(b)
519  // even if the addition is not marked nsw.
520  //
521  // Leveraging this invarient, we can trace into an sext'ed inbound GEP
522  // index if the constant offset is non-negative.
523  //
524  // Verified in @sext_add in split-gep.ll.
525  if (ConstantInt *ConstLHS = dyn_cast<ConstantInt>(LHS)) {
526  if (!ConstLHS->isNegative())
527  return true;
528  }
529  if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) {
530  if (!ConstRHS->isNegative())
531  return true;
532  }
533  }
534 
535  // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B)
536  // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B)
537  if (BO->getOpcode() == Instruction::Add ||
538  BO->getOpcode() == Instruction::Sub) {
539  if (SignExtended && !BO->hasNoSignedWrap())
540  return false;
541  if (ZeroExtended && !BO->hasNoUnsignedWrap())
542  return false;
543  }
544 
545  return true;
546 }
547 
548 APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO,
549  bool SignExtended,
550  bool ZeroExtended) {
551  // BO being non-negative does not shed light on whether its operands are
552  // non-negative. Clear the NonNegative flag here.
553  APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended,
554  /* NonNegative */ false);
555  // If we found a constant offset in the left operand, stop and return that.
556  // This shortcut might cause us to miss opportunities of combining the
557  // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9.
558  // However, such cases are probably already handled by -instcombine,
559  // given this pass runs after the standard optimizations.
560  if (ConstantOffset != 0) return ConstantOffset;
561  ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended,
562  /* NonNegative */ false);
563  // If U is a sub operator, negate the constant offset found in the right
564  // operand.
565  if (BO->getOpcode() == Instruction::Sub)
566  ConstantOffset = -ConstantOffset;
567  return ConstantOffset;
568 }
569 
570 APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended,
571  bool ZeroExtended, bool NonNegative) {
572  // TODO(jingyue): We could trace into integer/pointer casts, such as
573  // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only
574  // integers because it gives good enough results for our benchmarks.
575  unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
576 
577  // We cannot do much with Values that are not a User, such as an Argument.
578  User *U = dyn_cast<User>(V);
579  if (U == nullptr) return APInt(BitWidth, 0);
580 
581  APInt ConstantOffset(BitWidth, 0);
582  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
583  // Hooray, we found it!
584  ConstantOffset = CI->getValue();
585  } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) {
586  // Trace into subexpressions for more hoisting opportunities.
587  if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative))
588  ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
589  } else if (isa<TruncInst>(V)) {
590  ConstantOffset =
591  find(U->getOperand(0), SignExtended, ZeroExtended, NonNegative)
592  .trunc(BitWidth);
593  } else if (isa<SExtInst>(V)) {
594  ConstantOffset = find(U->getOperand(0), /* SignExtended */ true,
595  ZeroExtended, NonNegative).sext(BitWidth);
596  } else if (isa<ZExtInst>(V)) {
597  // As an optimization, we can clear the SignExtended flag because
598  // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll.
599  //
600  // Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0.
601  ConstantOffset =
602  find(U->getOperand(0), /* SignExtended */ false,
603  /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth);
604  }
605 
606  // If we found a non-zero constant offset, add it to the path for
607  // rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't
608  // help this optimization.
609  if (ConstantOffset != 0)
610  UserChain.push_back(U);
611  return ConstantOffset;
612 }
613 
614 Value *ConstantOffsetExtractor::applyExts(Value *V) {
615  Value *Current = V;
616  // ExtInsts is built in the use-def order. Therefore, we apply them to V
617  // in the reversed order.
618  for (auto I = ExtInsts.rbegin(), E = ExtInsts.rend(); I != E; ++I) {
619  if (Constant *C = dyn_cast<Constant>(Current)) {
620  // If Current is a constant, apply s/zext using ConstantExpr::getCast.
621  // ConstantExpr::getCast emits a ConstantInt if C is a ConstantInt.
622  Current = ConstantExpr::getCast((*I)->getOpcode(), C, (*I)->getType());
623  } else {
624  Instruction *Ext = (*I)->clone();
625  Ext->setOperand(0, Current);
626  Ext->insertBefore(IP);
627  Current = Ext;
628  }
629  }
630  return Current;
631 }
632 
633 Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() {
634  distributeExtsAndCloneChain(UserChain.size() - 1);
635  // Remove all nullptrs (used to be s/zext) from UserChain.
636  unsigned NewSize = 0;
637  for (User *I : UserChain) {
638  if (I != nullptr) {
639  UserChain[NewSize] = I;
640  NewSize++;
641  }
642  }
643  UserChain.resize(NewSize);
644  return removeConstOffset(UserChain.size() - 1);
645 }
646 
647 Value *
648 ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned ChainIndex) {
649  User *U = UserChain[ChainIndex];
650  if (ChainIndex == 0) {
651  assert(isa<ConstantInt>(U));
652  // If U is a ConstantInt, applyExts will return a ConstantInt as well.
653  return UserChain[ChainIndex] = cast<ConstantInt>(applyExts(U));
654  }
655 
656  if (CastInst *Cast = dyn_cast<CastInst>(U)) {
657  assert(
658  (isa<SExtInst>(Cast) || isa<ZExtInst>(Cast) || isa<TruncInst>(Cast)) &&
659  "Only following instructions can be traced: sext, zext & trunc");
660  ExtInsts.push_back(Cast);
661  UserChain[ChainIndex] = nullptr;
662  return distributeExtsAndCloneChain(ChainIndex - 1);
663  }
664 
665  // Function find only trace into BinaryOperator and CastInst.
666  BinaryOperator *BO = cast<BinaryOperator>(U);
667  // OpNo = which operand of BO is UserChain[ChainIndex - 1]
668  unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
669  Value *TheOther = applyExts(BO->getOperand(1 - OpNo));
670  Value *NextInChain = distributeExtsAndCloneChain(ChainIndex - 1);
671 
672  BinaryOperator *NewBO = nullptr;
673  if (OpNo == 0) {
674  NewBO = BinaryOperator::Create(BO->getOpcode(), NextInChain, TheOther,
675  BO->getName(), IP);
676  } else {
677  NewBO = BinaryOperator::Create(BO->getOpcode(), TheOther, NextInChain,
678  BO->getName(), IP);
679  }
680  return UserChain[ChainIndex] = NewBO;
681 }
682 
683 Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
684  if (ChainIndex == 0) {
685  assert(isa<ConstantInt>(UserChain[ChainIndex]));
686  return ConstantInt::getNullValue(UserChain[ChainIndex]->getType());
687  }
688 
689  BinaryOperator *BO = cast<BinaryOperator>(UserChain[ChainIndex]);
690  assert(BO->getNumUses() <= 1 &&
691  "distributeExtsAndCloneChain clones each BinaryOperator in "
692  "UserChain, so no one should be used more than "
693  "once");
694 
695  unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
696  assert(BO->getOperand(OpNo) == UserChain[ChainIndex - 1]);
697  Value *NextInChain = removeConstOffset(ChainIndex - 1);
698  Value *TheOther = BO->getOperand(1 - OpNo);
699 
700  // If NextInChain is 0 and not the LHS of a sub, we can simplify the
701  // sub-expression to be just TheOther.
702  if (ConstantInt *CI = dyn_cast<ConstantInt>(NextInChain)) {
703  if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == 0))
704  return TheOther;
705  }
706 
707  BinaryOperator::BinaryOps NewOp = BO->getOpcode();
708  if (BO->getOpcode() == Instruction::Or) {
709  // Rebuild "or" as "add", because "or" may be invalid for the new
710  // expression.
711  //
712  // For instance, given
713  // a | (b + 5) where a and b + 5 have no common bits,
714  // we can extract 5 as the constant offset.
715  //
716  // However, reusing the "or" in the new index would give us
717  // (a | b) + 5
718  // which does not equal a | (b + 5).
719  //
720  // Replacing the "or" with "add" is fine, because
721  // a | (b + 5) = a + (b + 5) = (a + b) + 5
722  NewOp = Instruction::Add;
723  }
724 
725  BinaryOperator *NewBO;
726  if (OpNo == 0) {
727  NewBO = BinaryOperator::Create(NewOp, NextInChain, TheOther, "", IP);
728  } else {
729  NewBO = BinaryOperator::Create(NewOp, TheOther, NextInChain, "", IP);
730  }
731  NewBO->takeName(BO);
732  return NewBO;
733 }
734 
735 Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,
736  User *&UserChainTail,
737  const DominatorTree *DT) {
738  ConstantOffsetExtractor Extractor(GEP, DT);
739  // Find a non-zero constant offset first.
740  APInt ConstantOffset =
741  Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
742  GEP->isInBounds());
743  if (ConstantOffset == 0) {
744  UserChainTail = nullptr;
745  return nullptr;
746  }
747  // Separates the constant offset from the GEP index.
748  Value *IdxWithoutConstOffset = Extractor.rebuildWithoutConstOffset();
749  UserChainTail = Extractor.UserChain.back();
750  return IdxWithoutConstOffset;
751 }
752 
754  const DominatorTree *DT) {
755  // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.
756  return ConstantOffsetExtractor(GEP, DT)
757  .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
758  GEP->isInBounds())
759  .getSExtValue();
760 }
761 
762 bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToPointerSize(
763  GetElementPtrInst *GEP) {
764  bool Changed = false;
765  Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
766  gep_type_iterator GTI = gep_type_begin(*GEP);
767  for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end();
768  I != E; ++I, ++GTI) {
769  // Skip struct member indices which must be i32.
770  if (GTI.isSequential()) {
771  if ((*I)->getType() != IntPtrTy) {
772  *I = CastInst::CreateIntegerCast(*I, IntPtrTy, true, "idxprom", GEP);
773  Changed = true;
774  }
775  }
776  }
777  return Changed;
778 }
779 
780 int64_t
781 SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
782  bool &NeedsExtraction) {
783  NeedsExtraction = false;
784  int64_t AccumulativeByteOffset = 0;
785  gep_type_iterator GTI = gep_type_begin(*GEP);
786  for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
787  if (GTI.isSequential()) {
788  // Tries to extract a constant offset from this GEP index.
789  int64_t ConstantOffset =
791  if (ConstantOffset != 0) {
792  NeedsExtraction = true;
793  // A GEP may have multiple indices. We accumulate the extracted
794  // constant offset to a byte offset, and later offset the remainder of
795  // the original GEP with this byte offset.
796  AccumulativeByteOffset +=
797  ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType());
798  }
799  } else if (LowerGEP) {
800  StructType *StTy = GTI.getStructType();
801  uint64_t Field = cast<ConstantInt>(GEP->getOperand(I))->getZExtValue();
802  // Skip field 0 as the offset is always 0.
803  if (Field != 0) {
804  NeedsExtraction = true;
805  AccumulativeByteOffset +=
806  DL->getStructLayout(StTy)->getElementOffset(Field);
807  }
808  }
809  }
810  return AccumulativeByteOffset;
811 }
812 
813 void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
814  GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) {
815  IRBuilder<> Builder(Variadic);
816  Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
817 
818  Type *I8PtrTy =
819  Builder.getInt8PtrTy(Variadic->getType()->getPointerAddressSpace());
820  Value *ResultPtr = Variadic->getOperand(0);
821  Loop *L = LI->getLoopFor(Variadic->getParent());
822  // Check if the base is not loop invariant or used more than once.
823  bool isSwapCandidate =
824  L && L->isLoopInvariant(ResultPtr) &&
825  !hasMoreThanOneUseInLoop(ResultPtr, L);
826  Value *FirstResult = nullptr;
827 
828  if (ResultPtr->getType() != I8PtrTy)
829  ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
830 
831  gep_type_iterator GTI = gep_type_begin(*Variadic);
832  // Create an ugly GEP for each sequential index. We don't create GEPs for
833  // structure indices, as they are accumulated in the constant offset index.
834  for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
835  if (GTI.isSequential()) {
836  Value *Idx = Variadic->getOperand(I);
837  // Skip zero indices.
838  if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
839  if (CI->isZero())
840  continue;
841 
842  APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
843  DL->getTypeAllocSize(GTI.getIndexedType()));
844  // Scale the index by element size.
845  if (ElementSize != 1) {
846  if (ElementSize.isPowerOf2()) {
847  Idx = Builder.CreateShl(
848  Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2()));
849  } else {
850  Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize));
851  }
852  }
853  // Create an ugly GEP with a single index for each index.
854  ResultPtr =
855  Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Idx, "uglygep");
856  if (FirstResult == nullptr)
857  FirstResult = ResultPtr;
858  }
859  }
860 
861  // Create a GEP with the constant offset index.
862  if (AccumulativeByteOffset != 0) {
863  Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset);
864  ResultPtr =
865  Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep");
866  } else
867  isSwapCandidate = false;
868 
869  // If we created a GEP with constant index, and the base is loop invariant,
870  // then we swap the first one with it, so LICM can move constant GEP out
871  // later.
872  GetElementPtrInst *FirstGEP = dyn_cast_or_null<GetElementPtrInst>(FirstResult);
873  GetElementPtrInst *SecondGEP = dyn_cast_or_null<GetElementPtrInst>(ResultPtr);
874  if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L))
875  swapGEPOperand(FirstGEP, SecondGEP);
876 
877  if (ResultPtr->getType() != Variadic->getType())
878  ResultPtr = Builder.CreateBitCast(ResultPtr, Variadic->getType());
879 
880  Variadic->replaceAllUsesWith(ResultPtr);
881  Variadic->eraseFromParent();
882 }
883 
884 void
885 SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
886  int64_t AccumulativeByteOffset) {
887  IRBuilder<> Builder(Variadic);
888  Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
889 
890  Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy);
891  gep_type_iterator GTI = gep_type_begin(*Variadic);
892  // Create ADD/SHL/MUL arithmetic operations for each sequential indices. We
893  // don't create arithmetics for structure indices, as they are accumulated
894  // in the constant offset index.
895  for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
896  if (GTI.isSequential()) {
897  Value *Idx = Variadic->getOperand(I);
898  // Skip zero indices.
899  if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
900  if (CI->isZero())
901  continue;
902 
903  APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
904  DL->getTypeAllocSize(GTI.getIndexedType()));
905  // Scale the index by element size.
906  if (ElementSize != 1) {
907  if (ElementSize.isPowerOf2()) {
908  Idx = Builder.CreateShl(
909  Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2()));
910  } else {
911  Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize));
912  }
913  }
914  // Create an ADD for each index.
915  ResultPtr = Builder.CreateAdd(ResultPtr, Idx);
916  }
917  }
918 
919  // Create an ADD for the constant offset index.
920  if (AccumulativeByteOffset != 0) {
921  ResultPtr = Builder.CreateAdd(
922  ResultPtr, ConstantInt::get(IntPtrTy, AccumulativeByteOffset));
923  }
924 
925  ResultPtr = Builder.CreateIntToPtr(ResultPtr, Variadic->getType());
926  Variadic->replaceAllUsesWith(ResultPtr);
927  Variadic->eraseFromParent();
928 }
929 
930 bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
931  // Skip vector GEPs.
932  if (GEP->getType()->isVectorTy())
933  return false;
934 
935  // The backend can already nicely handle the case where all indices are
936  // constant.
937  if (GEP->hasAllConstantIndices())
938  return false;
939 
940  bool Changed = canonicalizeArrayIndicesToPointerSize(GEP);
941 
942  bool NeedsExtraction;
943  int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction);
944 
945  if (!NeedsExtraction)
946  return Changed;
947 
948  TargetTransformInfo &TTI =
949  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*GEP->getFunction());
950 
951  // If LowerGEP is disabled, before really splitting the GEP, check whether the
952  // backend supports the addressing mode we are about to produce. If no, this
953  // splitting probably won't be beneficial.
954  // If LowerGEP is enabled, even the extracted constant offset can not match
955  // the addressing mode, we can still do optimizations to other lowered parts
956  // of variable indices. Therefore, we don't check for addressing modes in that
957  // case.
958  if (!LowerGEP) {
959  unsigned AddrSpace = GEP->getPointerAddressSpace();
961  /*BaseGV=*/nullptr, AccumulativeByteOffset,
962  /*HasBaseReg=*/true, /*Scale=*/0,
963  AddrSpace)) {
964  return Changed;
965  }
966  }
967 
968  // Remove the constant offset in each sequential index. The resultant GEP
969  // computes the variadic base.
970  // Notice that we don't remove struct field indices here. If LowerGEP is
971  // disabled, a structure index is not accumulated and we still use the old
972  // one. If LowerGEP is enabled, a structure index is accumulated in the
973  // constant offset. LowerToSingleIndexGEPs or lowerToArithmetics will later
974  // handle the constant offset and won't need a new structure index.
975  gep_type_iterator GTI = gep_type_begin(*GEP);
976  for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
977  if (GTI.isSequential()) {
978  // Splits this GEP index into a variadic part and a constant offset, and
979  // uses the variadic part as the new index.
980  Value *OldIdx = GEP->getOperand(I);
981  User *UserChainTail;
982  Value *NewIdx =
983  ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail, DT);
984  if (NewIdx != nullptr) {
985  // Switches to the index with the constant offset removed.
986  GEP->setOperand(I, NewIdx);
987  // After switching to the new index, we can garbage-collect UserChain
988  // and the old index if they are not used.
991  }
992  }
993  }
994 
995  // Clear the inbounds attribute because the new index may be off-bound.
996  // e.g.,
997  //
998  // b = add i64 a, 5
999  // addr = gep inbounds float, float* p, i64 b
1000  //
1001  // is transformed to:
1002  //
1003  // addr2 = gep float, float* p, i64 a ; inbounds removed
1004  // addr = gep inbounds float, float* addr2, i64 5
1005  //
1006  // If a is -4, although the old index b is in bounds, the new index a is
1007  // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
1008  // inbounds keyword is not present, the offsets are added to the base
1009  // address with silently-wrapping two's complement arithmetic".
1010  // Therefore, the final code will be a semantically equivalent.
1011  //
1012  // TODO(jingyue): do some range analysis to keep as many inbounds as
1013  // possible. GEPs with inbounds are more friendly to alias analysis.
1014  bool GEPWasInBounds = GEP->isInBounds();
1015  GEP->setIsInBounds(false);
1016 
1017  // Lowers a GEP to either GEPs with a single index or arithmetic operations.
1018  if (LowerGEP) {
1019  // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to
1020  // arithmetic operations if the target uses alias analysis in codegen.
1021  if (TTI.useAA())
1022  lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);
1023  else
1024  lowerToArithmetics(GEP, AccumulativeByteOffset);
1025  return true;
1026  }
1027 
1028  // No need to create another GEP if the accumulative byte offset is 0.
1029  if (AccumulativeByteOffset == 0)
1030  return true;
1031 
1032  // Offsets the base with the accumulative byte offset.
1033  //
1034  // %gep ; the base
1035  // ... %gep ...
1036  //
1037  // => add the offset
1038  //
1039  // %gep2 ; clone of %gep
1040  // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
1041  // %gep ; will be removed
1042  // ... %gep ...
1043  //
1044  // => replace all uses of %gep with %new.gep and remove %gep
1045  //
1046  // %gep2 ; clone of %gep
1047  // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
1048  // ... %new.gep ...
1049  //
1050  // If AccumulativeByteOffset is not a multiple of sizeof(*%gep), we emit an
1051  // uglygep (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep):
1052  // bitcast %gep2 to i8*, add the offset, and bitcast the result back to the
1053  // type of %gep.
1054  //
1055  // %gep2 ; clone of %gep
1056  // %0 = bitcast %gep2 to i8*
1057  // %uglygep = gep %0, <offset>
1058  // %new.gep = bitcast %uglygep to <type of %gep>
1059  // ... %new.gep ...
1060  Instruction *NewGEP = GEP->clone();
1061  NewGEP->insertBefore(GEP);
1062 
1063  // Per ANSI C standard, signed / unsigned = unsigned and signed % unsigned =
1064  // unsigned.. Therefore, we cast ElementTypeSizeOfGEP to signed because it is
1065  // used with unsigned integers later.
1066  int64_t ElementTypeSizeOfGEP = static_cast<int64_t>(
1067  DL->getTypeAllocSize(GEP->getResultElementType()));
1068  Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
1069  if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) {
1070  // Very likely. As long as %gep is naturally aligned, the byte offset we
1071  // extracted should be a multiple of sizeof(*%gep).
1072  int64_t Index = AccumulativeByteOffset / ElementTypeSizeOfGEP;
1073  NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
1074  ConstantInt::get(IntPtrTy, Index, true),
1075  GEP->getName(), GEP);
1076  NewGEP->copyMetadata(*GEP);
1077  // Inherit the inbounds attribute of the original GEP.
1078  cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
1079  } else {
1080  // Unlikely but possible. For example,
1081  // #pragma pack(1)
1082  // struct S {
1083  // int a[3];
1084  // int64 b[8];
1085  // };
1086  // #pragma pack()
1087  //
1088  // Suppose the gep before extraction is &s[i + 1].b[j + 3]. After
1089  // extraction, it becomes &s[i].b[j] and AccumulativeByteOffset is
1090  // sizeof(S) + 3 * sizeof(int64) = 100, which is not a multiple of
1091  // sizeof(int64).
1092  //
1093  // Emit an uglygep in this case.
1094  Type *I8PtrTy = Type::getInt8PtrTy(GEP->getContext(),
1095  GEP->getPointerAddressSpace());
1096  NewGEP = new BitCastInst(NewGEP, I8PtrTy, "", GEP);
1097  NewGEP = GetElementPtrInst::Create(
1098  Type::getInt8Ty(GEP->getContext()), NewGEP,
1099  ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "uglygep",
1100  GEP);
1101  NewGEP->copyMetadata(*GEP);
1102  // Inherit the inbounds attribute of the original GEP.
1103  cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
1104  if (GEP->getType() != I8PtrTy)
1105  NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP);
1106  }
1107 
1108  GEP->replaceAllUsesWith(NewGEP);
1109  GEP->eraseFromParent();
1110 
1111  return true;
1112 }
1113 
1115  if (skipFunction(F))
1116  return false;
1117 
1119  return false;
1120 
1121  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1122  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1123  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1124  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1125  bool Changed = false;
1126  for (BasicBlock &B : F) {
1127  for (BasicBlock::iterator I = B.begin(), IE = B.end(); I != IE;)
1128  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++))
1129  Changed |= splitGEP(GEP);
1130  // No need to split GEP ConstantExprs because all its indices are constant
1131  // already.
1132  }
1133 
1134  Changed |= reuniteExts(F);
1135 
1136  if (VerifyNoDeadCode)
1137  verifyNoDeadCode(F);
1138 
1139  return Changed;
1140 }
1141 
1142 Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(
1143  const SCEV *Key, Instruction *Dominatee) {
1144  auto Pos = DominatingExprs.find(Key);
1145  if (Pos == DominatingExprs.end())
1146  return nullptr;
1147 
1148  auto &Candidates = Pos->second;
1149  // Because we process the basic blocks in pre-order of the dominator tree, a
1150  // candidate that doesn't dominate the current instruction won't dominate any
1151  // future instruction either. Therefore, we pop it out of the stack. This
1152  // optimization makes the algorithm O(n).
1153  while (!Candidates.empty()) {
1154  Instruction *Candidate = Candidates.back();
1155  if (DT->dominates(Candidate, Dominatee))
1156  return Candidate;
1157  Candidates.pop_back();
1158  }
1159  return nullptr;
1160 }
1161 
1162 bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
1163  if (!SE->isSCEVable(I->getType()))
1164  return false;
1165 
1166  // Dom: LHS+RHS
1167  // I: sext(LHS)+sext(RHS)
1168  // If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom).
1169  // TODO: handle zext
1170  Value *LHS = nullptr, *RHS = nullptr;
1171  if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS)))) ||
1172  match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
1173  if (LHS->getType() == RHS->getType()) {
1174  const SCEV *Key =
1175  SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
1176  if (auto *Dom = findClosestMatchingDominator(Key, I)) {
1177  Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
1178  NewSExt->takeName(I);
1179  I->replaceAllUsesWith(NewSExt);
1181  return true;
1182  }
1183  }
1184  }
1185 
1186  // Add I to DominatingExprs if it's an add/sub that can't sign overflow.
1187  if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS))) ||
1188  match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
1190  const SCEV *Key =
1191  SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
1192  DominatingExprs[Key].push_back(I);
1193  }
1194  }
1195  return false;
1196 }
1197 
1198 bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
1199  bool Changed = false;
1200  DominatingExprs.clear();
1201  for (const auto Node : depth_first(DT)) {
1202  BasicBlock *BB = Node->getBlock();
1203  for (auto I = BB->begin(); I != BB->end(); ) {
1204  Instruction *Cur = &*I++;
1205  Changed |= reuniteExts(Cur);
1206  }
1207  }
1208  return Changed;
1209 }
1210 
1211 void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
1212  for (BasicBlock &B : F) {
1213  for (Instruction &I : B) {
1214  if (isInstructionTriviallyDead(&I)) {
1215  std::string ErrMessage;
1216  raw_string_ostream RSO(ErrMessage);
1217  RSO << "Dead instruction detected!\n" << I << "\n";
1218  llvm_unreachable(RSO.str().c_str());
1219  }
1220  }
1221  }
1222 }
1223 
1224 bool SeparateConstOffsetFromGEP::isLegalToSwapOperand(
1225  GetElementPtrInst *FirstGEP, GetElementPtrInst *SecondGEP, Loop *CurLoop) {
1226  if (!FirstGEP || !FirstGEP->hasOneUse())
1227  return false;
1228 
1229  if (!SecondGEP || FirstGEP->getParent() != SecondGEP->getParent())
1230  return false;
1231 
1232  if (FirstGEP == SecondGEP)
1233  return false;
1234 
1235  unsigned FirstNum = FirstGEP->getNumOperands();
1236  unsigned SecondNum = SecondGEP->getNumOperands();
1237  // Give up if the number of operands are not 2.
1238  if (FirstNum != SecondNum || FirstNum != 2)
1239  return false;
1240 
1241  Value *FirstBase = FirstGEP->getOperand(0);
1242  Value *SecondBase = SecondGEP->getOperand(0);
1243  Value *FirstOffset = FirstGEP->getOperand(1);
1244  // Give up if the index of the first GEP is loop invariant.
1245  if (CurLoop->isLoopInvariant(FirstOffset))
1246  return false;
1247 
1248  // Give up if base doesn't have same type.
1249  if (FirstBase->getType() != SecondBase->getType())
1250  return false;
1251 
1252  Instruction *FirstOffsetDef = dyn_cast<Instruction>(FirstOffset);
1253 
1254  // Check if the second operand of first GEP has constant coefficient.
1255  // For an example, for the following code, we won't gain anything by
1256  // hoisting the second GEP out because the second GEP can be folded away.
1257  // %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256
1258  // %67 = shl i64 %scevgep.sum.ur159, 2
1259  // %uglygep160 = getelementptr i8* %65, i64 %67
1260  // %uglygep161 = getelementptr i8* %uglygep160, i64 -1024
1261 
1262  // Skip constant shift instruction which may be generated by Splitting GEPs.
1263  if (FirstOffsetDef && FirstOffsetDef->isShift() &&
1264  isa<ConstantInt>(FirstOffsetDef->getOperand(1)))
1265  FirstOffsetDef = dyn_cast<Instruction>(FirstOffsetDef->getOperand(0));
1266 
1267  // Give up if FirstOffsetDef is an Add or Sub with constant.
1268  // Because it may not profitable at all due to constant folding.
1269  if (FirstOffsetDef)
1270  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FirstOffsetDef)) {
1271  unsigned opc = BO->getOpcode();
1272  if ((opc == Instruction::Add || opc == Instruction::Sub) &&
1273  (isa<ConstantInt>(BO->getOperand(0)) ||
1274  isa<ConstantInt>(BO->getOperand(1))))
1275  return false;
1276  }
1277  return true;
1278 }
1279 
1280 bool SeparateConstOffsetFromGEP::hasMoreThanOneUseInLoop(Value *V, Loop *L) {
1281  int UsesInLoop = 0;
1282  for (User *U : V->users()) {
1283  if (Instruction *User = dyn_cast<Instruction>(U))
1284  if (L->contains(User))
1285  if (++UsesInLoop > 1)
1286  return true;
1287  }
1288  return false;
1289 }
1290 
1291 void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,
1292  GetElementPtrInst *Second) {
1293  Value *Offset1 = First->getOperand(1);
1294  Value *Offset2 = Second->getOperand(1);
1295  First->setOperand(1, Offset2);
1296  Second->setOperand(1, Offset1);
1297 
1298  // We changed p+o+c to p+c+o, p+c may not be inbound anymore.
1299  const DataLayout &DAL = First->getModule()->getDataLayout();
1301  cast<PointerType>(First->getType())->getAddressSpace()),
1302  0);
1303  Value *NewBase =
1304  First->stripAndAccumulateInBoundsConstantOffsets(DAL, Offset);
1305  uint64_t ObjectSize;
1306  if (!getObjectSize(NewBase, ObjectSize, DAL, TLI) ||
1307  Offset.ugt(ObjectSize)) {
1308  First->setIsInBounds(false);
1309  Second->setIsInBounds(false);
1310  } else
1311  First->setIsInBounds(true);
1312 }
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWSub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:945
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:402
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:771
This class represents lattice values for constants.
Definition: AllocatorList.h:23
BinaryOps getOpcode() const
Definition: InstrTypes.h:402
INITIALIZE_PASS_BEGIN(SeparateConstOffsetFromGEP, "separate-const-offset-from-gep", "Split GEPs to a variadic base and a constant offset for better CSE", false, false) INITIALIZE_PASS_END(SeparateConstOffsetFromGEP
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:66
The main scalar evolution driver.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value *> IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:907
static cl::opt< bool > VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false), cl::desc("Verify this pass produces no dead code"), cl::Hidden)
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:743
F(f)
This class represents a sign extension of integer types.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:635
Hexagon Common GEP
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
bool hasNoSignedWrap() const
Determine whether the no signed wrap flag is set.
op_iterator op_begin()
Definition: User.h:229
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:289
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:273
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:439
Class to represent struct types.
Definition: DerivedTypes.h:238
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:779
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1118
This file implements a class to represent arbitrary precision integral constant values and operations...
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:759
static const T * Find(StringRef S, ArrayRef< T > A)
Find KV in array using binary search.
bool programUndefinedIfFullPoison(const Instruction *PoisonI)
Return true if this function can prove that if PoisonI is executed and yields a full-poison value (al...
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1958
Instruction * clone() const
Create a copy of &#39;this&#39; instruction that is identical in all ways except the following: ...
Key
PAL metadata keys.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1963
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:246
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
This class represents a no-op cast from one type to another.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition: Value.h:602
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:291
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
void initializeSeparateConstOffsetFromGEPPass(PassRegistry &)
Value * getOperand(unsigned i) const
Definition: User.h:169
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:881
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Wrapper pass for TargetTransformInfo.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction...
Definition: Instruction.cpp:73
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
This file contains the declarations for the subclasses of Constant, which represent the different fla...
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Represent the analysis usage information of a pass.
op_iterator op_end()
Definition: User.h:231
static cl::opt< bool > DisableSeparateConstOffsetFromGEP("disable-separate-const-offset-from-gep", cl::init(false), cl::desc("Do not separate the constant offset from a GEP instruction"), cl::Hidden)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:59
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:440
static wasm::ValType getType(const TargetRegisterClass *RC)
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1186
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:224
PointerType * getInt8PtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer to an 8-bit integer value.
Definition: IRBuilder.h:421
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:519
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1152
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Value * CreateGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1676
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:115
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:191
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:275
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:248
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:653
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
Definition: PatternMatch.h:937
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
unsigned logBase2() const
Definition: APInt.h:1756
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
Class for arbitrary precision integers.
Definition: APInt.h:69
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:463
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
iterator_range< user_iterator > users()
Definition: Value.h:420
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:373
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1561
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1207
bool ugt(const APInt &RHS) const
Unsigned greather than comparison.
Definition: APInt.h:1254
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:160
This class represents an analyzed expression in the program.
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:102
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
#define I(x, y, z)
Definition: MD5.cpp:58
bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if LHS and RHS have no common bits set.
Type * getResultElementType() const
Definition: Instructions.h:983
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
bool hasNoUnsignedWrap() const
Determine whether the no unsigned wrap flag is set.
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1953
iterator_range< df_iterator< T > > depth_first(const T &G)
separate const offset from gep
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:503
aarch64 promote const
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:359
LLVM Value Representation.
Definition: Value.h:74
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:1208
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:433
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
This pass exposes codegen information to IR-level passes.
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:178
bool hasAllConstantIndices() const
Return true if all of the indices of this GEP are constant integers.
const BasicBlock * getParent() const
Definition: Instruction.h:66
gep_type_iterator gep_type_begin(const User *GEP)