LLVM  16.0.0git
SeparateConstOffsetFromGEP.cpp
Go to the documentation of this file.
1 //===- SeparateConstOffsetFromGEP.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Loop unrolling may create many similar GEPs for array accesses.
10 // e.g., a 2-level loop
11 //
12 // float a[32][32]; // global variable
13 //
14 // for (int i = 0; i < 2; ++i) {
15 // for (int j = 0; j < 2; ++j) {
16 // ...
17 // ... = a[x + i][y + j];
18 // ...
19 // }
20 // }
21 //
22 // will probably be unrolled to:
23 //
24 // gep %a, 0, %x, %y; load
25 // gep %a, 0, %x, %y + 1; load
26 // gep %a, 0, %x + 1, %y; load
27 // gep %a, 0, %x + 1, %y + 1; load
28 //
29 // LLVM's GVN does not use partial redundancy elimination yet, and is thus
30 // unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs
31 // significant slowdown in targets with limited addressing modes. For instance,
32 // because the PTX target does not support the reg+reg addressing mode, the
33 // NVPTX backend emits PTX code that literally computes the pointer address of
34 // each GEP, wasting tons of registers. It emits the following PTX for the
35 // first load and similar PTX for other loads.
36 //
37 // mov.u32 %r1, %x;
38 // mov.u32 %r2, %y;
39 // mul.wide.u32 %rl2, %r1, 128;
40 // mov.u64 %rl3, a;
41 // add.s64 %rl4, %rl3, %rl2;
42 // mul.wide.u32 %rl5, %r2, 4;
43 // add.s64 %rl6, %rl4, %rl5;
44 // ld.global.f32 %f1, [%rl6];
45 //
46 // To reduce the register pressure, the optimization implemented in this file
47 // merges the common part of a group of GEPs, so we can compute each pointer
48 // address by adding a simple offset to the common part, saving many registers.
49 //
50 // It works by splitting each GEP into a variadic base and a constant offset.
51 // The variadic base can be computed once and reused by multiple GEPs, and the
52 // constant offsets can be nicely folded into the reg+immediate addressing mode
53 // (supported by most targets) without using any extra register.
54 //
55 // For instance, we transform the four GEPs and four loads in the above example
56 // into:
57 //
58 // base = gep a, 0, x, y
59 // load base
60 // laod base + 1 * sizeof(float)
61 // load base + 32 * sizeof(float)
62 // load base + 33 * sizeof(float)
63 //
64 // Given the transformed IR, a backend that supports the reg+immediate
65 // addressing mode can easily fold the pointer arithmetics into the loads. For
66 // example, the NVPTX backend can easily fold the pointer arithmetics into the
67 // ld.global.f32 instructions, and the resultant PTX uses much fewer registers.
68 //
69 // mov.u32 %r1, %tid.x;
70 // mov.u32 %r2, %tid.y;
71 // mul.wide.u32 %rl2, %r1, 128;
72 // mov.u64 %rl3, a;
73 // add.s64 %rl4, %rl3, %rl2;
74 // mul.wide.u32 %rl5, %r2, 4;
75 // add.s64 %rl6, %rl4, %rl5;
76 // ld.global.f32 %f1, [%rl6]; // so far the same as unoptimized PTX
77 // ld.global.f32 %f2, [%rl6+4]; // much better
78 // ld.global.f32 %f3, [%rl6+128]; // much better
79 // ld.global.f32 %f4, [%rl6+132]; // much better
80 //
81 // Another improvement enabled by the LowerGEP flag is to lower a GEP with
82 // multiple indices to either multiple GEPs with a single index or arithmetic
83 // operations (depending on whether the target uses alias analysis in codegen).
84 // Such transformation can have following benefits:
85 // (1) It can always extract constants in the indices of structure type.
86 // (2) After such Lowering, there are more optimization opportunities such as
87 // CSE, LICM and CGP.
88 //
89 // E.g. The following GEPs have multiple indices:
90 // BB1:
91 // %p = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 3
92 // load %p
93 // ...
94 // BB2:
95 // %p2 = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 2
96 // load %p2
97 // ...
98 //
99 // We can not do CSE to the common part related to index "i64 %i". Lowering
100 // GEPs can achieve such goals.
101 // If the target does not use alias analysis in codegen, this pass will
102 // lower a GEP with multiple indices into arithmetic operations:
103 // BB1:
104 // %1 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
105 // %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
106 // %3 = add i64 %1, %2 ; CSE opportunity
107 // %4 = mul i64 %j1, length_of_struct
108 // %5 = add i64 %3, %4
109 // %6 = add i64 %3, struct_field_3 ; Constant offset
110 // %p = inttoptr i64 %6 to i32*
111 // load %p
112 // ...
113 // BB2:
114 // %7 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
115 // %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
116 // %9 = add i64 %7, %8 ; CSE opportunity
117 // %10 = mul i64 %j2, length_of_struct
118 // %11 = add i64 %9, %10
119 // %12 = add i64 %11, struct_field_2 ; Constant offset
120 // %p = inttoptr i64 %12 to i32*
121 // load %p2
122 // ...
123 //
124 // If the target uses alias analysis in codegen, this pass will lower a GEP
125 // with multiple indices into multiple GEPs with a single index:
126 // BB1:
127 // %1 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
128 // %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
129 // %3 = getelementptr i8* %1, i64 %2 ; CSE opportunity
130 // %4 = mul i64 %j1, length_of_struct
131 // %5 = getelementptr i8* %3, i64 %4
132 // %6 = getelementptr i8* %5, struct_field_3 ; Constant offset
133 // %p = bitcast i8* %6 to i32*
134 // load %p
135 // ...
136 // BB2:
137 // %7 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
138 // %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
139 // %9 = getelementptr i8* %7, i64 %8 ; CSE opportunity
140 // %10 = mul i64 %j2, length_of_struct
141 // %11 = getelementptr i8* %9, i64 %10
142 // %12 = getelementptr i8* %11, struct_field_2 ; Constant offset
143 // %p2 = bitcast i8* %12 to i32*
144 // load %p2
145 // ...
146 //
147 // Lowering GEPs can also benefit other passes such as LICM and CGP.
148 // LICM (Loop Invariant Code Motion) can not hoist/sink a GEP of multiple
149 // indices if one of the index is variant. If we lower such GEP into invariant
150 // parts and variant parts, LICM can hoist/sink those invariant parts.
151 // CGP (CodeGen Prepare) tries to sink address calculations that match the
152 // target's addressing modes. A GEP with multiple indices may not match and will
153 // not be sunk. If we lower such GEP into smaller parts, CGP may sink some of
154 // them. So we end up with a better addressing mode.
155 //
156 //===----------------------------------------------------------------------===//
157 
159 #include "llvm/ADT/APInt.h"
160 #include "llvm/ADT/DenseMap.h"
162 #include "llvm/ADT/SmallVector.h"
163 #include "llvm/Analysis/LoopInfo.h"
169 #include "llvm/IR/BasicBlock.h"
170 #include "llvm/IR/Constant.h"
171 #include "llvm/IR/Constants.h"
172 #include "llvm/IR/DataLayout.h"
173 #include "llvm/IR/DerivedTypes.h"
174 #include "llvm/IR/Dominators.h"
175 #include "llvm/IR/Function.h"
177 #include "llvm/IR/IRBuilder.h"
178 #include "llvm/IR/Instruction.h"
179 #include "llvm/IR/Instructions.h"
180 #include "llvm/IR/Module.h"
181 #include "llvm/IR/PassManager.h"
182 #include "llvm/IR/PatternMatch.h"
183 #include "llvm/IR/Type.h"
184 #include "llvm/IR/User.h"
185 #include "llvm/IR/Value.h"
186 #include "llvm/InitializePasses.h"
187 #include "llvm/Pass.h"
188 #include "llvm/Support/Casting.h"
192 #include "llvm/Transforms/Scalar.h"
194 #include <cassert>
195 #include <cstdint>
196 #include <string>
197 
198 using namespace llvm;
199 using namespace llvm::PatternMatch;
200 
202  "disable-separate-const-offset-from-gep", cl::init(false),
203  cl::desc("Do not separate the constant offset from a GEP instruction"),
204  cl::Hidden);
205 
206 // Setting this flag may emit false positives when the input module already
207 // contains dead instructions. Therefore, we set it only in unit tests that are
208 // free of dead code.
209 static cl::opt<bool>
210  VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false),
211  cl::desc("Verify this pass produces no dead code"),
212  cl::Hidden);
213 
214 namespace {
215 
216 /// A helper class for separating a constant offset from a GEP index.
217 ///
218 /// In real programs, a GEP index may be more complicated than a simple addition
219 /// of something and a constant integer which can be trivially splitted. For
220 /// example, to split ((a << 3) | 5) + b, we need to search deeper for the
221 /// constant offset, so that we can separate the index to (a << 3) + b and 5.
222 ///
223 /// Therefore, this class looks into the expression that computes a given GEP
224 /// index, and tries to find a constant integer that can be hoisted to the
225 /// outermost level of the expression as an addition. Not every constant in an
226 /// expression can jump out. e.g., we cannot transform (b * (a + 5)) to (b * a +
227 /// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case,
228 /// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15).
229 class ConstantOffsetExtractor {
230 public:
231  /// Extracts a constant offset from the given GEP index. It returns the
232  /// new index representing the remainder (equal to the original index minus
233  /// the constant offset), or nullptr if we cannot extract a constant offset.
234  /// \p Idx The given GEP index
235  /// \p GEP The given GEP
236  /// \p UserChainTail Outputs the tail of UserChain so that we can
237  /// garbage-collect unused instructions in UserChain.
238  static Value *Extract(Value *Idx, GetElementPtrInst *GEP,
239  User *&UserChainTail, const DominatorTree *DT);
240 
241  /// Looks for a constant offset from the given GEP index without extracting
242  /// it. It returns the numeric value of the extracted constant offset (0 if
243  /// failed). The meaning of the arguments are the same as Extract.
244  static int64_t Find(Value *Idx, GetElementPtrInst *GEP,
245  const DominatorTree *DT);
246 
247 private:
248  ConstantOffsetExtractor(Instruction *InsertionPt, const DominatorTree *DT)
249  : IP(InsertionPt), DL(InsertionPt->getModule()->getDataLayout()), DT(DT) {
250  }
251 
252  /// Searches the expression that computes V for a non-zero constant C s.t.
253  /// V can be reassociated into the form V' + C. If the searching is
254  /// successful, returns C and update UserChain as a def-use chain from C to V;
255  /// otherwise, UserChain is empty.
256  ///
257  /// \p V The given expression
258  /// \p SignExtended Whether V will be sign-extended in the computation of the
259  /// GEP index
260  /// \p ZeroExtended Whether V will be zero-extended in the computation of the
261  /// GEP index
262  /// \p NonNegative Whether V is guaranteed to be non-negative. For example,
263  /// an index of an inbounds GEP is guaranteed to be
264  /// non-negative. Levaraging this, we can better split
265  /// inbounds GEPs.
266  APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative);
267 
268  /// A helper function to look into both operands of a binary operator.
269  APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended,
270  bool ZeroExtended);
271 
272  /// After finding the constant offset C from the GEP index I, we build a new
273  /// index I' s.t. I' + C = I. This function builds and returns the new
274  /// index I' according to UserChain produced by function "find".
275  ///
276  /// The building conceptually takes two steps:
277  /// 1) iteratively distribute s/zext towards the leaves of the expression tree
278  /// that computes I
279  /// 2) reassociate the expression tree to the form I' + C.
280  ///
281  /// For example, to extract the 5 from sext(a + (b + 5)), we first distribute
282  /// sext to a, b and 5 so that we have
283  /// sext(a) + (sext(b) + 5).
284  /// Then, we reassociate it to
285  /// (sext(a) + sext(b)) + 5.
286  /// Given this form, we know I' is sext(a) + sext(b).
287  Value *rebuildWithoutConstOffset();
288 
289  /// After the first step of rebuilding the GEP index without the constant
290  /// offset, distribute s/zext to the operands of all operators in UserChain.
291  /// e.g., zext(sext(a + (b + 5)) (assuming no overflow) =>
292  /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))).
293  ///
294  /// The function also updates UserChain to point to new subexpressions after
295  /// distributing s/zext. e.g., the old UserChain of the above example is
296  /// 5 -> b + 5 -> a + (b + 5) -> sext(...) -> zext(sext(...)),
297  /// and the new UserChain is
298  /// zext(sext(5)) -> zext(sext(b)) + zext(sext(5)) ->
299  /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))
300  ///
301  /// \p ChainIndex The index to UserChain. ChainIndex is initially
302  /// UserChain.size() - 1, and is decremented during
303  /// the recursion.
304  Value *distributeExtsAndCloneChain(unsigned ChainIndex);
305 
306  /// Reassociates the GEP index to the form I' + C and returns I'.
307  Value *removeConstOffset(unsigned ChainIndex);
308 
309  /// A helper function to apply ExtInsts, a list of s/zext, to value V.
310  /// e.g., if ExtInsts = [sext i32 to i64, zext i16 to i32], this function
311  /// returns "sext i32 (zext i16 V to i32) to i64".
312  Value *applyExts(Value *V);
313 
314  /// A helper function that returns whether we can trace into the operands
315  /// of binary operator BO for a constant offset.
316  ///
317  /// \p SignExtended Whether BO is surrounded by sext
318  /// \p ZeroExtended Whether BO is surrounded by zext
319  /// \p NonNegative Whether BO is known to be non-negative, e.g., an in-bound
320  /// array index.
321  bool CanTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO,
322  bool NonNegative);
323 
324  /// The path from the constant offset to the old GEP index. e.g., if the GEP
325  /// index is "a * b + (c + 5)". After running function find, UserChain[0] will
326  /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and
327  /// UserChain[2] will be the entire expression "a * b + (c + 5)".
328  ///
329  /// This path helps to rebuild the new GEP index.
330  SmallVector<User *, 8> UserChain;
331 
332  /// A data structure used in rebuildWithoutConstOffset. Contains all
333  /// sext/zext instructions along UserChain.
335 
336  /// Insertion position of cloned instructions.
337  Instruction *IP;
338 
339  const DataLayout &DL;
340  const DominatorTree *DT;
341 };
342 
343 /// A pass that tries to split every GEP in the function into a variadic
344 /// base and a constant offset. It is a FunctionPass because searching for the
345 /// constant offset may inspect other basic blocks.
346 class SeparateConstOffsetFromGEPLegacyPass : public FunctionPass {
347 public:
348  static char ID;
349 
350  SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false)
351  : FunctionPass(ID), LowerGEP(LowerGEP) {
354  }
355 
356  void getAnalysisUsage(AnalysisUsage &AU) const override {
361  AU.setPreservesCFG();
363  }
364 
365  bool runOnFunction(Function &F) override;
366 
367 private:
368  bool LowerGEP;
369 };
370 
371 /// A pass that tries to split every GEP in the function into a variadic
372 /// base and a constant offset. It is a FunctionPass because searching for the
373 /// constant offset may inspect other basic blocks.
374 class SeparateConstOffsetFromGEP {
375 public:
376  SeparateConstOffsetFromGEP(
377  DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI,
378  TargetLibraryInfo *TLI,
379  function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LowerGEP)
380  : DT(DT), SE(SE), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP) {}
381 
382  bool run(Function &F);
383 
384 private:
385  /// Tries to split the given GEP into a variadic base and a constant offset,
386  /// and returns true if the splitting succeeds.
387  bool splitGEP(GetElementPtrInst *GEP);
388 
389  /// Lower a GEP with multiple indices into multiple GEPs with a single index.
390  /// Function splitGEP already split the original GEP into a variadic part and
391  /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
392  /// variadic part into a set of GEPs with a single index and applies
393  /// AccumulativeByteOffset to it.
394  /// \p Variadic The variadic part of the original GEP.
395  /// \p AccumulativeByteOffset The constant offset.
396  void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic,
397  int64_t AccumulativeByteOffset);
398 
399  /// Lower a GEP with multiple indices into ptrtoint+arithmetics+inttoptr form.
400  /// Function splitGEP already split the original GEP into a variadic part and
401  /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
402  /// variadic part into a set of arithmetic operations and applies
403  /// AccumulativeByteOffset to it.
404  /// \p Variadic The variadic part of the original GEP.
405  /// \p AccumulativeByteOffset The constant offset.
406  void lowerToArithmetics(GetElementPtrInst *Variadic,
407  int64_t AccumulativeByteOffset);
408 
409  /// Finds the constant offset within each index and accumulates them. If
410  /// LowerGEP is true, it finds in indices of both sequential and structure
411  /// types, otherwise it only finds in sequential indices. The output
412  /// NeedsExtraction indicates whether we successfully find a non-zero constant
413  /// offset.
414  int64_t accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction);
415 
416  /// Canonicalize array indices to pointer-size integers. This helps to
417  /// simplify the logic of splitting a GEP. For example, if a + b is a
418  /// pointer-size integer, we have
419  /// gep base, a + b = gep (gep base, a), b
420  /// However, this equality may not hold if the size of a + b is smaller than
421  /// the pointer size, because LLVM conceptually sign-extends GEP indices to
422  /// pointer size before computing the address
423  /// (http://llvm.org/docs/LangRef.html#id181).
424  ///
425  /// This canonicalization is very likely already done in clang and
426  /// instcombine. Therefore, the program will probably remain the same.
427  ///
428  /// Returns true if the module changes.
429  ///
430  /// Verified in @i32_add in split-gep.ll
431  bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP);
432 
433  /// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow.
434  /// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting
435  /// the constant offset. After extraction, it becomes desirable to reunion the
436  /// distributed sexts. For example,
437  ///
438  /// &a[sext(i +nsw (j +nsw 5)]
439  /// => distribute &a[sext(i) +nsw (sext(j) +nsw 5)]
440  /// => constant extraction &a[sext(i) + sext(j)] + 5
441  /// => reunion &a[sext(i +nsw j)] + 5
442  bool reuniteExts(Function &F);
443 
444  /// A helper that reunites sexts in an instruction.
445  bool reuniteExts(Instruction *I);
446 
447  /// Find the closest dominator of <Dominatee> that is equivalent to <Key>.
448  Instruction *findClosestMatchingDominator(
449  const SCEV *Key, Instruction *Dominatee,
450  DenseMap<const SCEV *, SmallVector<Instruction *, 2>> &DominatingExprs);
451 
452  /// Verify F is free of dead code.
453  void verifyNoDeadCode(Function &F);
454 
455  bool hasMoreThanOneUseInLoop(Value *v, Loop *L);
456 
457  // Swap the index operand of two GEP.
458  void swapGEPOperand(GetElementPtrInst *First, GetElementPtrInst *Second);
459 
460  // Check if it is safe to swap operand of two GEP.
461  bool isLegalToSwapOperand(GetElementPtrInst *First, GetElementPtrInst *Second,
462  Loop *CurLoop);
463 
464  const DataLayout *DL = nullptr;
465  DominatorTree *DT = nullptr;
466  ScalarEvolution *SE;
467  LoopInfo *LI;
468  TargetLibraryInfo *TLI;
469  // Retrieved lazily since not always used.
471 
472  /// Whether to lower a GEP with multiple indices into arithmetic operations or
473  /// multiple GEPs with a single index.
474  bool LowerGEP;
475 
478 };
479 
480 } // end anonymous namespace
481 
483 
485  SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
486  "Split GEPs to a variadic base and a constant offset for better CSE", false,
487  false)
494  SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
495  "Split GEPs to a variadic base and a constant offset for better CSE", false,
496  false)
497 
499  return new SeparateConstOffsetFromGEPLegacyPass(LowerGEP);
500 }
501 
502 bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
503  bool ZeroExtended,
504  BinaryOperator *BO,
505  bool NonNegative) {
506  // We only consider ADD, SUB and OR, because a non-zero constant found in
507  // expressions composed of these operations can be easily hoisted as a
508  // constant offset by reassociation.
509  if (BO->getOpcode() != Instruction::Add &&
510  BO->getOpcode() != Instruction::Sub &&
511  BO->getOpcode() != Instruction::Or) {
512  return false;
513  }
514 
515  Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1);
516  // Do not trace into "or" unless it is equivalent to "add". If LHS and RHS
517  // don't have common bits, (LHS | RHS) is equivalent to (LHS + RHS).
518  // FIXME: this does not appear to be covered by any tests
519  // (with x86/aarch64 backends at least)
520  if (BO->getOpcode() == Instruction::Or &&
521  !haveNoCommonBitsSet(LHS, RHS, DL, nullptr, BO, DT))
522  return false;
523 
524  // In addition, tracing into BO requires that its surrounding s/zext (if
525  // any) is distributable to both operands.
526  //
527  // Suppose BO = A op B.
528  // SignExtended | ZeroExtended | Distributable?
529  // --------------+--------------+----------------------------------
530  // 0 | 0 | true because no s/zext exists
531  // 0 | 1 | zext(BO) == zext(A) op zext(B)
532  // 1 | 0 | sext(BO) == sext(A) op sext(B)
533  // 1 | 1 | zext(sext(BO)) ==
534  // | | zext(sext(A)) op zext(sext(B))
535  if (BO->getOpcode() == Instruction::Add && !ZeroExtended && NonNegative) {
536  // If a + b >= 0 and (a >= 0 or b >= 0), then
537  // sext(a + b) = sext(a) + sext(b)
538  // even if the addition is not marked nsw.
539  //
540  // Leveraging this invariant, we can trace into an sext'ed inbound GEP
541  // index if the constant offset is non-negative.
542  //
543  // Verified in @sext_add in split-gep.ll.
544  if (ConstantInt *ConstLHS = dyn_cast<ConstantInt>(LHS)) {
545  if (!ConstLHS->isNegative())
546  return true;
547  }
548  if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) {
549  if (!ConstRHS->isNegative())
550  return true;
551  }
552  }
553 
554  // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B)
555  // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B)
556  if (BO->getOpcode() == Instruction::Add ||
557  BO->getOpcode() == Instruction::Sub) {
558  if (SignExtended && !BO->hasNoSignedWrap())
559  return false;
560  if (ZeroExtended && !BO->hasNoUnsignedWrap())
561  return false;
562  }
563 
564  return true;
565 }
566 
567 APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO,
568  bool SignExtended,
569  bool ZeroExtended) {
570  // Save off the current height of the chain, in case we need to restore it.
571  size_t ChainLength = UserChain.size();
572 
573  // BO being non-negative does not shed light on whether its operands are
574  // non-negative. Clear the NonNegative flag here.
575  APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended,
576  /* NonNegative */ false);
577  // If we found a constant offset in the left operand, stop and return that.
578  // This shortcut might cause us to miss opportunities of combining the
579  // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9.
580  // However, such cases are probably already handled by -instcombine,
581  // given this pass runs after the standard optimizations.
582  if (ConstantOffset != 0) return ConstantOffset;
583 
584  // Reset the chain back to where it was when we started exploring this node,
585  // since visiting the LHS didn't pan out.
586  UserChain.resize(ChainLength);
587 
588  ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended,
589  /* NonNegative */ false);
590  // If U is a sub operator, negate the constant offset found in the right
591  // operand.
592  if (BO->getOpcode() == Instruction::Sub)
593  ConstantOffset = -ConstantOffset;
594 
595  // If RHS wasn't a suitable candidate either, reset the chain again.
596  if (ConstantOffset == 0)
597  UserChain.resize(ChainLength);
598 
599  return ConstantOffset;
600 }
601 
602 APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended,
603  bool ZeroExtended, bool NonNegative) {
604  // TODO(jingyue): We could trace into integer/pointer casts, such as
605  // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only
606  // integers because it gives good enough results for our benchmarks.
607  unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
608 
609  // We cannot do much with Values that are not a User, such as an Argument.
610  User *U = dyn_cast<User>(V);
611  if (U == nullptr) return APInt(BitWidth, 0);
612 
613  APInt ConstantOffset(BitWidth, 0);
614  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
615  // Hooray, we found it!
616  ConstantOffset = CI->getValue();
617  } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) {
618  // Trace into subexpressions for more hoisting opportunities.
619  if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative))
620  ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
621  } else if (isa<TruncInst>(V)) {
622  ConstantOffset =
623  find(U->getOperand(0), SignExtended, ZeroExtended, NonNegative)
624  .trunc(BitWidth);
625  } else if (isa<SExtInst>(V)) {
626  ConstantOffset = find(U->getOperand(0), /* SignExtended */ true,
627  ZeroExtended, NonNegative).sext(BitWidth);
628  } else if (isa<ZExtInst>(V)) {
629  // As an optimization, we can clear the SignExtended flag because
630  // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll.
631  //
632  // Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0.
633  ConstantOffset =
634  find(U->getOperand(0), /* SignExtended */ false,
635  /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth);
636  }
637 
638  // If we found a non-zero constant offset, add it to the path for
639  // rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't
640  // help this optimization.
641  if (ConstantOffset != 0)
642  UserChain.push_back(U);
643  return ConstantOffset;
644 }
645 
646 Value *ConstantOffsetExtractor::applyExts(Value *V) {
647  Value *Current = V;
648  // ExtInsts is built in the use-def order. Therefore, we apply them to V
649  // in the reversed order.
650  for (CastInst *I : llvm::reverse(ExtInsts)) {
651  if (Constant *C = dyn_cast<Constant>(Current)) {
652  // If Current is a constant, apply s/zext using ConstantExpr::getCast.
653  // ConstantExpr::getCast emits a ConstantInt if C is a ConstantInt.
654  Current = ConstantExpr::getCast(I->getOpcode(), C, I->getType());
655  } else {
656  Instruction *Ext = I->clone();
657  Ext->setOperand(0, Current);
658  Ext->insertBefore(IP);
659  Current = Ext;
660  }
661  }
662  return Current;
663 }
664 
665 Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() {
666  distributeExtsAndCloneChain(UserChain.size() - 1);
667  // Remove all nullptrs (used to be s/zext) from UserChain.
668  unsigned NewSize = 0;
669  for (User *I : UserChain) {
670  if (I != nullptr) {
671  UserChain[NewSize] = I;
672  NewSize++;
673  }
674  }
675  UserChain.resize(NewSize);
676  return removeConstOffset(UserChain.size() - 1);
677 }
678 
679 Value *
680 ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned ChainIndex) {
681  User *U = UserChain[ChainIndex];
682  if (ChainIndex == 0) {
683  assert(isa<ConstantInt>(U));
684  // If U is a ConstantInt, applyExts will return a ConstantInt as well.
685  return UserChain[ChainIndex] = cast<ConstantInt>(applyExts(U));
686  }
687 
688  if (CastInst *Cast = dyn_cast<CastInst>(U)) {
689  assert(
690  (isa<SExtInst>(Cast) || isa<ZExtInst>(Cast) || isa<TruncInst>(Cast)) &&
691  "Only following instructions can be traced: sext, zext & trunc");
692  ExtInsts.push_back(Cast);
693  UserChain[ChainIndex] = nullptr;
694  return distributeExtsAndCloneChain(ChainIndex - 1);
695  }
696 
697  // Function find only trace into BinaryOperator and CastInst.
698  BinaryOperator *BO = cast<BinaryOperator>(U);
699  // OpNo = which operand of BO is UserChain[ChainIndex - 1]
700  unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
701  Value *TheOther = applyExts(BO->getOperand(1 - OpNo));
702  Value *NextInChain = distributeExtsAndCloneChain(ChainIndex - 1);
703 
704  BinaryOperator *NewBO = nullptr;
705  if (OpNo == 0) {
706  NewBO = BinaryOperator::Create(BO->getOpcode(), NextInChain, TheOther,
707  BO->getName(), IP);
708  } else {
709  NewBO = BinaryOperator::Create(BO->getOpcode(), TheOther, NextInChain,
710  BO->getName(), IP);
711  }
712  return UserChain[ChainIndex] = NewBO;
713 }
714 
715 Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
716  if (ChainIndex == 0) {
717  assert(isa<ConstantInt>(UserChain[ChainIndex]));
718  return ConstantInt::getNullValue(UserChain[ChainIndex]->getType());
719  }
720 
721  BinaryOperator *BO = cast<BinaryOperator>(UserChain[ChainIndex]);
722  assert((BO->use_empty() || BO->hasOneUse()) &&
723  "distributeExtsAndCloneChain clones each BinaryOperator in "
724  "UserChain, so no one should be used more than "
725  "once");
726 
727  unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
728  assert(BO->getOperand(OpNo) == UserChain[ChainIndex - 1]);
729  Value *NextInChain = removeConstOffset(ChainIndex - 1);
730  Value *TheOther = BO->getOperand(1 - OpNo);
731 
732  // If NextInChain is 0 and not the LHS of a sub, we can simplify the
733  // sub-expression to be just TheOther.
734  if (ConstantInt *CI = dyn_cast<ConstantInt>(NextInChain)) {
735  if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == 0))
736  return TheOther;
737  }
738 
739  BinaryOperator::BinaryOps NewOp = BO->getOpcode();
740  if (BO->getOpcode() == Instruction::Or) {
741  // Rebuild "or" as "add", because "or" may be invalid for the new
742  // expression.
743  //
744  // For instance, given
745  // a | (b + 5) where a and b + 5 have no common bits,
746  // we can extract 5 as the constant offset.
747  //
748  // However, reusing the "or" in the new index would give us
749  // (a | b) + 5
750  // which does not equal a | (b + 5).
751  //
752  // Replacing the "or" with "add" is fine, because
753  // a | (b + 5) = a + (b + 5) = (a + b) + 5
754  NewOp = Instruction::Add;
755  }
756 
757  BinaryOperator *NewBO;
758  if (OpNo == 0) {
759  NewBO = BinaryOperator::Create(NewOp, NextInChain, TheOther, "", IP);
760  } else {
761  NewBO = BinaryOperator::Create(NewOp, TheOther, NextInChain, "", IP);
762  }
763  NewBO->takeName(BO);
764  return NewBO;
765 }
766 
767 Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,
768  User *&UserChainTail,
769  const DominatorTree *DT) {
770  ConstantOffsetExtractor Extractor(GEP, DT);
771  // Find a non-zero constant offset first.
772  APInt ConstantOffset =
773  Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
774  GEP->isInBounds());
775  if (ConstantOffset == 0) {
776  UserChainTail = nullptr;
777  return nullptr;
778  }
779  // Separates the constant offset from the GEP index.
780  Value *IdxWithoutConstOffset = Extractor.rebuildWithoutConstOffset();
781  UserChainTail = Extractor.UserChain.back();
782  return IdxWithoutConstOffset;
783 }
784 
786  const DominatorTree *DT) {
787  // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.
788  return ConstantOffsetExtractor(GEP, DT)
789  .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
790  GEP->isInBounds())
791  .getSExtValue();
792 }
793 
794 bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToPointerSize(
796  bool Changed = false;
797  Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
799  for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end();
800  I != E; ++I, ++GTI) {
801  // Skip struct member indices which must be i32.
802  if (GTI.isSequential()) {
803  if ((*I)->getType() != IntPtrTy) {
804  *I = CastInst::CreateIntegerCast(*I, IntPtrTy, true, "idxprom", GEP);
805  Changed = true;
806  }
807  }
808  }
809  return Changed;
810 }
811 
812 int64_t
813 SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
814  bool &NeedsExtraction) {
815  NeedsExtraction = false;
816  int64_t AccumulativeByteOffset = 0;
818  for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
819  if (GTI.isSequential()) {
820  // Tries to extract a constant offset from this GEP index.
821  int64_t ConstantOffset =
822  ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP, DT);
823  if (ConstantOffset != 0) {
824  NeedsExtraction = true;
825  // A GEP may have multiple indices. We accumulate the extracted
826  // constant offset to a byte offset, and later offset the remainder of
827  // the original GEP with this byte offset.
828  AccumulativeByteOffset +=
829  ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType());
830  }
831  } else if (LowerGEP) {
832  StructType *StTy = GTI.getStructType();
833  uint64_t Field = cast<ConstantInt>(GEP->getOperand(I))->getZExtValue();
834  // Skip field 0 as the offset is always 0.
835  if (Field != 0) {
836  NeedsExtraction = true;
837  AccumulativeByteOffset +=
838  DL->getStructLayout(StTy)->getElementOffset(Field);
839  }
840  }
841  }
842  return AccumulativeByteOffset;
843 }
844 
845 void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
846  GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) {
847  IRBuilder<> Builder(Variadic);
848  Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
849 
850  Type *I8PtrTy =
851  Builder.getInt8PtrTy(Variadic->getType()->getPointerAddressSpace());
852  Value *ResultPtr = Variadic->getOperand(0);
853  Loop *L = LI->getLoopFor(Variadic->getParent());
854  // Check if the base is not loop invariant or used more than once.
855  bool isSwapCandidate =
856  L && L->isLoopInvariant(ResultPtr) &&
857  !hasMoreThanOneUseInLoop(ResultPtr, L);
858  Value *FirstResult = nullptr;
859 
860  if (ResultPtr->getType() != I8PtrTy)
861  ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
862 
863  gep_type_iterator GTI = gep_type_begin(*Variadic);
864  // Create an ugly GEP for each sequential index. We don't create GEPs for
865  // structure indices, as they are accumulated in the constant offset index.
866  for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
867  if (GTI.isSequential()) {
868  Value *Idx = Variadic->getOperand(I);
869  // Skip zero indices.
870  if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
871  if (CI->isZero())
872  continue;
873 
874  APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
875  DL->getTypeAllocSize(GTI.getIndexedType()));
876  // Scale the index by element size.
877  if (ElementSize != 1) {
878  if (ElementSize.isPowerOf2()) {
879  Idx = Builder.CreateShl(
880  Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2()));
881  } else {
882  Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize));
883  }
884  }
885  // Create an ugly GEP with a single index for each index.
886  ResultPtr =
887  Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Idx, "uglygep");
888  if (FirstResult == nullptr)
889  FirstResult = ResultPtr;
890  }
891  }
892 
893  // Create a GEP with the constant offset index.
894  if (AccumulativeByteOffset != 0) {
895  Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset);
896  ResultPtr =
897  Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep");
898  } else
899  isSwapCandidate = false;
900 
901  // If we created a GEP with constant index, and the base is loop invariant,
902  // then we swap the first one with it, so LICM can move constant GEP out
903  // later.
904  auto *FirstGEP = dyn_cast_or_null<GetElementPtrInst>(FirstResult);
905  auto *SecondGEP = dyn_cast<GetElementPtrInst>(ResultPtr);
906  if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L))
907  swapGEPOperand(FirstGEP, SecondGEP);
908 
909  if (ResultPtr->getType() != Variadic->getType())
910  ResultPtr = Builder.CreateBitCast(ResultPtr, Variadic->getType());
911 
912  Variadic->replaceAllUsesWith(ResultPtr);
913  Variadic->eraseFromParent();
914 }
915 
916 void
917 SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
918  int64_t AccumulativeByteOffset) {
919  IRBuilder<> Builder(Variadic);
920  Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
921 
922  Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy);
923  gep_type_iterator GTI = gep_type_begin(*Variadic);
924  // Create ADD/SHL/MUL arithmetic operations for each sequential indices. We
925  // don't create arithmetics for structure indices, as they are accumulated
926  // in the constant offset index.
927  for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
928  if (GTI.isSequential()) {
929  Value *Idx = Variadic->getOperand(I);
930  // Skip zero indices.
931  if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
932  if (CI->isZero())
933  continue;
934 
935  APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
936  DL->getTypeAllocSize(GTI.getIndexedType()));
937  // Scale the index by element size.
938  if (ElementSize != 1) {
939  if (ElementSize.isPowerOf2()) {
940  Idx = Builder.CreateShl(
941  Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2()));
942  } else {
943  Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize));
944  }
945  }
946  // Create an ADD for each index.
947  ResultPtr = Builder.CreateAdd(ResultPtr, Idx);
948  }
949  }
950 
951  // Create an ADD for the constant offset index.
952  if (AccumulativeByteOffset != 0) {
953  ResultPtr = Builder.CreateAdd(
954  ResultPtr, ConstantInt::get(IntPtrTy, AccumulativeByteOffset));
955  }
956 
957  ResultPtr = Builder.CreateIntToPtr(ResultPtr, Variadic->getType());
958  Variadic->replaceAllUsesWith(ResultPtr);
959  Variadic->eraseFromParent();
960 }
961 
962 bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
963  // Skip vector GEPs.
964  if (GEP->getType()->isVectorTy())
965  return false;
966 
967  // The backend can already nicely handle the case where all indices are
968  // constant.
969  if (GEP->hasAllConstantIndices())
970  return false;
971 
972  bool Changed = canonicalizeArrayIndicesToPointerSize(GEP);
973 
974  bool NeedsExtraction;
975  int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction);
976 
977  if (!NeedsExtraction)
978  return Changed;
979 
980  TargetTransformInfo &TTI = GetTTI(*GEP->getFunction());
981 
982  // If LowerGEP is disabled, before really splitting the GEP, check whether the
983  // backend supports the addressing mode we are about to produce. If no, this
984  // splitting probably won't be beneficial.
985  // If LowerGEP is enabled, even the extracted constant offset can not match
986  // the addressing mode, we can still do optimizations to other lowered parts
987  // of variable indices. Therefore, we don't check for addressing modes in that
988  // case.
989  if (!LowerGEP) {
990  unsigned AddrSpace = GEP->getPointerAddressSpace();
991  if (!TTI.isLegalAddressingMode(GEP->getResultElementType(),
992  /*BaseGV=*/nullptr, AccumulativeByteOffset,
993  /*HasBaseReg=*/true, /*Scale=*/0,
994  AddrSpace)) {
995  return Changed;
996  }
997  }
998 
999  // Remove the constant offset in each sequential index. The resultant GEP
1000  // computes the variadic base.
1001  // Notice that we don't remove struct field indices here. If LowerGEP is
1002  // disabled, a structure index is not accumulated and we still use the old
1003  // one. If LowerGEP is enabled, a structure index is accumulated in the
1004  // constant offset. LowerToSingleIndexGEPs or lowerToArithmetics will later
1005  // handle the constant offset and won't need a new structure index.
1007  for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
1008  if (GTI.isSequential()) {
1009  // Splits this GEP index into a variadic part and a constant offset, and
1010  // uses the variadic part as the new index.
1011  Value *OldIdx = GEP->getOperand(I);
1012  User *UserChainTail;
1013  Value *NewIdx =
1014  ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail, DT);
1015  if (NewIdx != nullptr) {
1016  // Switches to the index with the constant offset removed.
1017  GEP->setOperand(I, NewIdx);
1018  // After switching to the new index, we can garbage-collect UserChain
1019  // and the old index if they are not used.
1022  }
1023  }
1024  }
1025 
1026  // Clear the inbounds attribute because the new index may be off-bound.
1027  // e.g.,
1028  //
1029  // b = add i64 a, 5
1030  // addr = gep inbounds float, float* p, i64 b
1031  //
1032  // is transformed to:
1033  //
1034  // addr2 = gep float, float* p, i64 a ; inbounds removed
1035  // addr = gep inbounds float, float* addr2, i64 5
1036  //
1037  // If a is -4, although the old index b is in bounds, the new index a is
1038  // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
1039  // inbounds keyword is not present, the offsets are added to the base
1040  // address with silently-wrapping two's complement arithmetic".
1041  // Therefore, the final code will be a semantically equivalent.
1042  //
1043  // TODO(jingyue): do some range analysis to keep as many inbounds as
1044  // possible. GEPs with inbounds are more friendly to alias analysis.
1045  bool GEPWasInBounds = GEP->isInBounds();
1046  GEP->setIsInBounds(false);
1047 
1048  // Lowers a GEP to either GEPs with a single index or arithmetic operations.
1049  if (LowerGEP) {
1050  // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to
1051  // arithmetic operations if the target uses alias analysis in codegen.
1052  if (TTI.useAA())
1053  lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);
1054  else
1055  lowerToArithmetics(GEP, AccumulativeByteOffset);
1056  return true;
1057  }
1058 
1059  // No need to create another GEP if the accumulative byte offset is 0.
1060  if (AccumulativeByteOffset == 0)
1061  return true;
1062 
1063  // Offsets the base with the accumulative byte offset.
1064  //
1065  // %gep ; the base
1066  // ... %gep ...
1067  //
1068  // => add the offset
1069  //
1070  // %gep2 ; clone of %gep
1071  // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
1072  // %gep ; will be removed
1073  // ... %gep ...
1074  //
1075  // => replace all uses of %gep with %new.gep and remove %gep
1076  //
1077  // %gep2 ; clone of %gep
1078  // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
1079  // ... %new.gep ...
1080  //
1081  // If AccumulativeByteOffset is not a multiple of sizeof(*%gep), we emit an
1082  // uglygep (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep):
1083  // bitcast %gep2 to i8*, add the offset, and bitcast the result back to the
1084  // type of %gep.
1085  //
1086  // %gep2 ; clone of %gep
1087  // %0 = bitcast %gep2 to i8*
1088  // %uglygep = gep %0, <offset>
1089  // %new.gep = bitcast %uglygep to <type of %gep>
1090  // ... %new.gep ...
1091  Instruction *NewGEP = GEP->clone();
1092  NewGEP->insertBefore(GEP);
1093 
1094  // Per ANSI C standard, signed / unsigned = unsigned and signed % unsigned =
1095  // unsigned.. Therefore, we cast ElementTypeSizeOfGEP to signed because it is
1096  // used with unsigned integers later.
1097  int64_t ElementTypeSizeOfGEP = static_cast<int64_t>(
1098  DL->getTypeAllocSize(GEP->getResultElementType()));
1099  Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
1100  if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) {
1101  // Very likely. As long as %gep is naturally aligned, the byte offset we
1102  // extracted should be a multiple of sizeof(*%gep).
1103  int64_t Index = AccumulativeByteOffset / ElementTypeSizeOfGEP;
1104  NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
1105  ConstantInt::get(IntPtrTy, Index, true),
1106  GEP->getName(), GEP);
1107  NewGEP->copyMetadata(*GEP);
1108  // Inherit the inbounds attribute of the original GEP.
1109  cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
1110  } else {
1111  // Unlikely but possible. For example,
1112  // #pragma pack(1)
1113  // struct S {
1114  // int a[3];
1115  // int64 b[8];
1116  // };
1117  // #pragma pack()
1118  //
1119  // Suppose the gep before extraction is &s[i + 1].b[j + 3]. After
1120  // extraction, it becomes &s[i].b[j] and AccumulativeByteOffset is
1121  // sizeof(S) + 3 * sizeof(int64) = 100, which is not a multiple of
1122  // sizeof(int64).
1123  //
1124  // Emit an uglygep in this case.
1125  Type *I8PtrTy = Type::getInt8PtrTy(GEP->getContext(),
1126  GEP->getPointerAddressSpace());
1127  NewGEP = new BitCastInst(NewGEP, I8PtrTy, "", GEP);
1128  NewGEP = GetElementPtrInst::Create(
1129  Type::getInt8Ty(GEP->getContext()), NewGEP,
1130  ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "uglygep",
1131  GEP);
1132  NewGEP->copyMetadata(*GEP);
1133  // Inherit the inbounds attribute of the original GEP.
1134  cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
1135  if (GEP->getType() != I8PtrTy)
1136  NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP);
1137  }
1138 
1139  GEP->replaceAllUsesWith(NewGEP);
1140  GEP->eraseFromParent();
1141 
1142  return true;
1143 }
1144 
1146  if (skipFunction(F))
1147  return false;
1148  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1149  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1150  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1151  auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1152  auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
1153  return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1154  };
1155  SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP);
1156  return Impl.run(F);
1157 }
1158 
1161  return false;
1162 
1163  DL = &F.getParent()->getDataLayout();
1164  bool Changed = false;
1165  for (BasicBlock &B : F) {
1166  if (!DT->isReachableFromEntry(&B))
1167  continue;
1168 
1170  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I))
1171  Changed |= splitGEP(GEP);
1172  // No need to split GEP ConstantExprs because all its indices are constant
1173  // already.
1174  }
1175 
1176  Changed |= reuniteExts(F);
1177 
1178  if (VerifyNoDeadCode)
1179  verifyNoDeadCode(F);
1180 
1181  return Changed;
1182 }
1183 
1184 Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(
1185  const SCEV *Key, Instruction *Dominatee,
1186  DenseMap<const SCEV *, SmallVector<Instruction *, 2>> &DominatingExprs) {
1187  auto Pos = DominatingExprs.find(Key);
1188  if (Pos == DominatingExprs.end())
1189  return nullptr;
1190 
1191  auto &Candidates = Pos->second;
1192  // Because we process the basic blocks in pre-order of the dominator tree, a
1193  // candidate that doesn't dominate the current instruction won't dominate any
1194  // future instruction either. Therefore, we pop it out of the stack. This
1195  // optimization makes the algorithm O(n).
1196  while (!Candidates.empty()) {
1197  Instruction *Candidate = Candidates.back();
1198  if (DT->dominates(Candidate, Dominatee))
1199  return Candidate;
1200  Candidates.pop_back();
1201  }
1202  return nullptr;
1203 }
1204 
1205 bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
1206  if (!SE->isSCEVable(I->getType()))
1207  return false;
1208 
1209  // Dom: LHS+RHS
1210  // I: sext(LHS)+sext(RHS)
1211  // If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom).
1212  // TODO: handle zext
1213  Value *LHS = nullptr, *RHS = nullptr;
1214  if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
1215  if (LHS->getType() == RHS->getType()) {
1216  const SCEV *Key =
1217  SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
1218  if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingAdds)) {
1219  Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
1220  NewSExt->takeName(I);
1221  I->replaceAllUsesWith(NewSExt);
1223  return true;
1224  }
1225  }
1226  } else if (match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
1227  if (LHS->getType() == RHS->getType()) {
1228  const SCEV *Key =
1229  SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
1230  if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingSubs)) {
1231  Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
1232  NewSExt->takeName(I);
1233  I->replaceAllUsesWith(NewSExt);
1235  return true;
1236  }
1237  }
1238  }
1239 
1240  // Add I to DominatingExprs if it's an add/sub that can't sign overflow.
1241  if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS)))) {
1242  if (programUndefinedIfPoison(I)) {
1243  const SCEV *Key =
1244  SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
1245  DominatingAdds[Key].push_back(I);
1246  }
1247  } else if (match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
1248  if (programUndefinedIfPoison(I)) {
1249  const SCEV *Key =
1250  SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
1251  DominatingSubs[Key].push_back(I);
1252  }
1253  }
1254  return false;
1255 }
1256 
1257 bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
1258  bool Changed = false;
1259  DominatingAdds.clear();
1260  DominatingSubs.clear();
1261  for (const auto Node : depth_first(DT)) {
1262  BasicBlock *BB = Node->getBlock();
1264  Changed |= reuniteExts(&I);
1265  }
1266  return Changed;
1267 }
1268 
1269 void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
1270  for (BasicBlock &B : F) {
1271  for (Instruction &I : B) {
1272  if (isInstructionTriviallyDead(&I)) {
1273  std::string ErrMessage;
1274  raw_string_ostream RSO(ErrMessage);
1275  RSO << "Dead instruction detected!\n" << I << "\n";
1276  llvm_unreachable(RSO.str().c_str());
1277  }
1278  }
1279  }
1280 }
1281 
1282 bool SeparateConstOffsetFromGEP::isLegalToSwapOperand(
1283  GetElementPtrInst *FirstGEP, GetElementPtrInst *SecondGEP, Loop *CurLoop) {
1284  if (!FirstGEP || !FirstGEP->hasOneUse())
1285  return false;
1286 
1287  if (!SecondGEP || FirstGEP->getParent() != SecondGEP->getParent())
1288  return false;
1289 
1290  if (FirstGEP == SecondGEP)
1291  return false;
1292 
1293  unsigned FirstNum = FirstGEP->getNumOperands();
1294  unsigned SecondNum = SecondGEP->getNumOperands();
1295  // Give up if the number of operands are not 2.
1296  if (FirstNum != SecondNum || FirstNum != 2)
1297  return false;
1298 
1299  Value *FirstBase = FirstGEP->getOperand(0);
1300  Value *SecondBase = SecondGEP->getOperand(0);
1301  Value *FirstOffset = FirstGEP->getOperand(1);
1302  // Give up if the index of the first GEP is loop invariant.
1303  if (CurLoop->isLoopInvariant(FirstOffset))
1304  return false;
1305 
1306  // Give up if base doesn't have same type.
1307  if (FirstBase->getType() != SecondBase->getType())
1308  return false;
1309 
1310  Instruction *FirstOffsetDef = dyn_cast<Instruction>(FirstOffset);
1311 
1312  // Check if the second operand of first GEP has constant coefficient.
1313  // For an example, for the following code, we won't gain anything by
1314  // hoisting the second GEP out because the second GEP can be folded away.
1315  // %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256
1316  // %67 = shl i64 %scevgep.sum.ur159, 2
1317  // %uglygep160 = getelementptr i8* %65, i64 %67
1318  // %uglygep161 = getelementptr i8* %uglygep160, i64 -1024
1319 
1320  // Skip constant shift instruction which may be generated by Splitting GEPs.
1321  if (FirstOffsetDef && FirstOffsetDef->isShift() &&
1322  isa<ConstantInt>(FirstOffsetDef->getOperand(1)))
1323  FirstOffsetDef = dyn_cast<Instruction>(FirstOffsetDef->getOperand(0));
1324 
1325  // Give up if FirstOffsetDef is an Add or Sub with constant.
1326  // Because it may not profitable at all due to constant folding.
1327  if (FirstOffsetDef)
1328  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FirstOffsetDef)) {
1329  unsigned opc = BO->getOpcode();
1330  if ((opc == Instruction::Add || opc == Instruction::Sub) &&
1331  (isa<ConstantInt>(BO->getOperand(0)) ||
1332  isa<ConstantInt>(BO->getOperand(1))))
1333  return false;
1334  }
1335  return true;
1336 }
1337 
1338 bool SeparateConstOffsetFromGEP::hasMoreThanOneUseInLoop(Value *V, Loop *L) {
1339  int UsesInLoop = 0;
1340  for (User *U : V->users()) {
1341  if (Instruction *User = dyn_cast<Instruction>(U))
1342  if (L->contains(User))
1343  if (++UsesInLoop > 1)
1344  return true;
1345  }
1346  return false;
1347 }
1348 
1349 void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,
1350  GetElementPtrInst *Second) {
1351  Value *Offset1 = First->getOperand(1);
1352  Value *Offset2 = Second->getOperand(1);
1353  First->setOperand(1, Offset2);
1354  Second->setOperand(1, Offset1);
1355 
1356  // We changed p+o+c to p+c+o, p+c may not be inbound anymore.
1357  const DataLayout &DAL = First->getModule()->getDataLayout();
1359  cast<PointerType>(First->getType())->getAddressSpace()),
1360  0);
1361  Value *NewBase =
1362  First->stripAndAccumulateInBoundsConstantOffsets(DAL, Offset);
1363  uint64_t ObjectSize;
1364  if (!getObjectSize(NewBase, ObjectSize, DAL, TLI) ||
1365  Offset.ugt(ObjectSize)) {
1366  First->setIsInBounds(false);
1367  Second->setIsInBounds(false);
1368  } else
1369  First->setIsInBounds(true);
1370 }
1371 
1374  auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1375  auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
1376  auto *LI = &AM.getResult<LoopAnalysis>(F);
1377  auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
1378  auto GetTTI = [&AM](Function &F) -> TargetTransformInfo & {
1379  return AM.getResult<TargetIRAnalysis>(F);
1380  };
1381  SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP);
1382  if (!Impl.run(F))
1383  return PreservedAnalyses::all();
1384  PreservedAnalyses PA;
1385  PA.preserveSet<CFGAnalyses>();
1386  return PA;
1387 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:518
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2570
llvm::GetElementPtrInst::setIsInBounds
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Definition: Instructions.cpp:1834
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition: ScalarEvolution.h:2143
constant
we should consider alternate ways to model stack dependencies Lots of things could be done in WebAssemblyTargetTransformInfo cpp there are numerous optimization related hooks that can be overridden in WebAssemblyTargetLowering Instead of the OptimizeReturned which should consider preserving the returned attribute through to MachineInstrs and extending the MemIntrinsicResults pass to do this optimization on calls too That would also let the WebAssemblyPeephole pass clean up dead defs for such as it does for stores Consider implementing and or getMachineCombinerPatterns Find a clean way to fix the problem which leads to the Shrink Wrapping pass being run after the WebAssembly PEI pass When setting multiple variables to the same constant
Definition: README.txt:91
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::haveNoCommonBitsSet
bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if LHS and RHS have no common bits set.
Definition: ValueTracking.cpp:267
llvm::createSeparateConstOffsetFromGEPPass
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
Definition: SeparateConstOffsetFromGEP.cpp:498
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:291
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
Scalar.h
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:546
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:138
llvm::APInt::isPowerOf2
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:425
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:628
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5256
GetElementPtrTypeIterator.h
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1181
llvm::PatternMatch::m_Add
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:979
ErrorHandling.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:172
llvm::IRBuilder<>
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::Instruction::hasNoUnsignedWrap
bool hasNoUnsignedWrap() const
Determine whether the no unsigned wrap flag is set.
Definition: Instruction.cpp:161
ValueTracking.h
Local.h
llvm::Instruction::insertBefore
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:83
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
APInt.h
to
Should compile to
Definition: README.txt:449
llvm::programUndefinedIfPoison
bool programUndefinedIfPoison(const Instruction *Inst)
Definition: ValueTracking.cpp:5756
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
DenseMap.h
Module.h
MemoryBuiltins.h
llvm::Instruction::isShift
bool isShift() const
Definition: Instruction.h:171
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1290
llvm::Instruction::copyMetadata
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Definition: Instruction.cpp:867
llvm::CastInst::CreateIntegerCast
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
Definition: Instructions.cpp:3416
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:123
and
We currently generate a but we really shouldn eax ecx xorl edx divl ecx eax divl ecx movl eax ret A similar code sequence works for division We currently compile i32 v2 eax eax jo LBB1_2 and
Definition: README.txt:1271
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:237
DepthFirstIterator.h
llvm::SeparateConstOffsetFromGEPPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
Definition: SeparateConstOffsetFromGEP.cpp:1373
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
a
=0.0 ? 0.0 :(a > 0.0 ? 1.0 :-1.0) a
Definition: README.txt:489
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
llvm::DataLayout::getIndexSizeInBits
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:422
Instruction.h
CommandLine.h
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
CSE
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
Definition: SeparateConstOffsetFromGEP.cpp:495
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::AMDGPU::PALMD::Key
Key
PAL metadata keys.
Definition: AMDGPUMetadata.h:486
IP
Definition: NVPTXLowerArgs.cpp:168
TargetLibraryInfo.h
false
Definition: StackSlotColoring.cpp:141
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
First
into llvm powi allowing the code generator to produce balanced multiplication trees First
Definition: README.txt:54
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:392
llvm::Instruction
Definition: Instruction.h:42
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
llvm::PatternMatch::m_NSWSub
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWSub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1164
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2173
PatternMatch.h
llvm::getObjectSize
bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
Definition: MemoryBuiltins.cpp:614
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:344
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Type.h
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition: PointerTypeAnalysis.cpp:101
LoopInfo.h
llvm::PatternMatch::m_NSWAdd
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1156
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36
BasicBlock.h
llvm::cl::opt< bool >
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:159
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:416
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:468
uint64_t
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2626
llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1610
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
llvm::generic_gep_type_iterator::getStructType
StructType * getStructType() const
Definition: GetElementPtrTypeIterator.h:114
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::APInt::logBase2
unsigned logBase2() const
Definition: APInt.h:1652
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:929
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:596
IRBuilder.h
SeparateConstOffsetFromGEP.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::PatternMatch::m_Sub
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:991
base
therefore end up llgh r3 lr r0 br r14 but truncating the load would lh r3 br r14 Functions ret i64 and ought to be implemented ngr r0 br r14 but two address optimizations reverse the order of the AND and ngr r2 lgr r0 br r14 CodeGen SystemZ and ll has several examples of this Out of range displacements are usually handled by loading the full address into a register In many cases it would be better to create an anchor point instead E g i64 base
Definition: README.txt:125
llvm::ConstantExpr::getCast
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1946
llvm::generic_gep_type_iterator::getIndexedType
Type * getIndexedType() const
Definition: GetElementPtrTypeIterator.h:70
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:335
Find
static const T * Find(StringRef S, ArrayRef< T > A)
Find KV in array using binary search.
Definition: MCSubtargetInfo.cpp:25
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
getType
static M68kRelType getType(unsigned Kind, MCSymbolRefExpr::VariantKind &Modifier, bool &IsPCRel)
Definition: M68kELFObjectWriter.cpp:48
llvm::GetElementPtrInst::Create
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:955
llvm::PatternMatch::m_SExt
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Definition: PatternMatch.h:1623
llvm::isInstructionTriviallyDead
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:395
llvm::LoopInfo
Definition: LoopInfo.h:1105
llvm::BinaryOperator
Definition: InstrTypes.h:188
DataLayout.h
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
llvm::generic_gep_type_iterator::isSequential
bool isSequential() const
Definition: GetElementPtrTypeIterator.h:112
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
DisableSeparateConstOffsetFromGEP
static cl::opt< bool > DisableSeparateConstOffsetFromGEP("disable-separate-const-offset-from-gep", cl::init(false), cl::desc("Do not separate the constant offset from a GEP instruction"), cl::Hidden)
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:429
llvm::initializeSeparateConstOffsetFromGEPLegacyPassPass
void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &)
llvm::SExtInst
This class represents a sign extension of integer types.
Definition: Instructions.h:4889
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
for
this could be done in SelectionDAGISel along with other special for
Definition: README.txt:104
gep
separate const offset from gep
Definition: SeparateConstOffsetFromGEP.cpp:494
llvm::MCID::Variadic
@ Variadic
Definition: MCInstrDesc.h:149
llvm::depth_first
iterator_range< df_iterator< T > > depth_first(const T &G)
Definition: DepthFirstIterator.h:230
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
Constant.h
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:350
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
Casting.h
Function.h
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
better
In x86 we generate this spiffy xmm0 xmm0 ret in x86 we generate this which could be better
Definition: README-SSE.txt:537
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:773
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
Instructions.h
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
SmallVector.h
User.h
llvm::Instruction::hasNoSignedWrap
bool hasNoSignedWrap() const
Determine whether the no signed wrap flag is set.
Definition: Instruction.cpp:165
Dominators.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
TargetTransformInfo.h
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:365
DerivedTypes.h
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::OptimizedStructLayoutField
A field in a structure.
Definition: OptimizedStructLayout.h:45
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:381
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep", "Split GEPs to a variadic base and a constant offset for better CSE", false, false) INITIALIZE_PASS_END(SeparateConstOffsetFromGEPLegacyPass
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:413
raw_ostream.h
llvm::BinaryOperator::Create
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Definition: Instructions.cpp:2846
Value.h
llvm::TargetTransformInfo::useAA
bool useAA() const
Definition: TargetTransformInfo.cpp:484
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:443
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
VerifyNoDeadCode
static cl::opt< bool > VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false), cl::desc("Verify this pass produces no dead code"), cl::Hidden)
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1265
llvm::TargetTransformInfo::isLegalAddressingMode
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Definition: TargetTransformInfo.cpp:349
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38