LLVM 18.0.0git
SeparateConstOffsetFromGEP.cpp
Go to the documentation of this file.
1//===- SeparateConstOffsetFromGEP.cpp -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Loop unrolling may create many similar GEPs for array accesses.
10// e.g., a 2-level loop
11//
12// float a[32][32]; // global variable
13//
14// for (int i = 0; i < 2; ++i) {
15// for (int j = 0; j < 2; ++j) {
16// ...
17// ... = a[x + i][y + j];
18// ...
19// }
20// }
21//
22// will probably be unrolled to:
23//
24// gep %a, 0, %x, %y; load
25// gep %a, 0, %x, %y + 1; load
26// gep %a, 0, %x + 1, %y; load
27// gep %a, 0, %x + 1, %y + 1; load
28//
29// LLVM's GVN does not use partial redundancy elimination yet, and is thus
30// unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs
31// significant slowdown in targets with limited addressing modes. For instance,
32// because the PTX target does not support the reg+reg addressing mode, the
33// NVPTX backend emits PTX code that literally computes the pointer address of
34// each GEP, wasting tons of registers. It emits the following PTX for the
35// first load and similar PTX for other loads.
36//
37// mov.u32 %r1, %x;
38// mov.u32 %r2, %y;
39// mul.wide.u32 %rl2, %r1, 128;
40// mov.u64 %rl3, a;
41// add.s64 %rl4, %rl3, %rl2;
42// mul.wide.u32 %rl5, %r2, 4;
43// add.s64 %rl6, %rl4, %rl5;
44// ld.global.f32 %f1, [%rl6];
45//
46// To reduce the register pressure, the optimization implemented in this file
47// merges the common part of a group of GEPs, so we can compute each pointer
48// address by adding a simple offset to the common part, saving many registers.
49//
50// It works by splitting each GEP into a variadic base and a constant offset.
51// The variadic base can be computed once and reused by multiple GEPs, and the
52// constant offsets can be nicely folded into the reg+immediate addressing mode
53// (supported by most targets) without using any extra register.
54//
55// For instance, we transform the four GEPs and four loads in the above example
56// into:
57//
58// base = gep a, 0, x, y
59// load base
60// laod base + 1 * sizeof(float)
61// load base + 32 * sizeof(float)
62// load base + 33 * sizeof(float)
63//
64// Given the transformed IR, a backend that supports the reg+immediate
65// addressing mode can easily fold the pointer arithmetics into the loads. For
66// example, the NVPTX backend can easily fold the pointer arithmetics into the
67// ld.global.f32 instructions, and the resultant PTX uses much fewer registers.
68//
69// mov.u32 %r1, %tid.x;
70// mov.u32 %r2, %tid.y;
71// mul.wide.u32 %rl2, %r1, 128;
72// mov.u64 %rl3, a;
73// add.s64 %rl4, %rl3, %rl2;
74// mul.wide.u32 %rl5, %r2, 4;
75// add.s64 %rl6, %rl4, %rl5;
76// ld.global.f32 %f1, [%rl6]; // so far the same as unoptimized PTX
77// ld.global.f32 %f2, [%rl6+4]; // much better
78// ld.global.f32 %f3, [%rl6+128]; // much better
79// ld.global.f32 %f4, [%rl6+132]; // much better
80//
81// Another improvement enabled by the LowerGEP flag is to lower a GEP with
82// multiple indices to either multiple GEPs with a single index or arithmetic
83// operations (depending on whether the target uses alias analysis in codegen).
84// Such transformation can have following benefits:
85// (1) It can always extract constants in the indices of structure type.
86// (2) After such Lowering, there are more optimization opportunities such as
87// CSE, LICM and CGP.
88//
89// E.g. The following GEPs have multiple indices:
90// BB1:
91// %p = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 3
92// load %p
93// ...
94// BB2:
95// %p2 = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 2
96// load %p2
97// ...
98//
99// We can not do CSE to the common part related to index "i64 %i". Lowering
100// GEPs can achieve such goals.
101// If the target does not use alias analysis in codegen, this pass will
102// lower a GEP with multiple indices into arithmetic operations:
103// BB1:
104// %1 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
105// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
106// %3 = add i64 %1, %2 ; CSE opportunity
107// %4 = mul i64 %j1, length_of_struct
108// %5 = add i64 %3, %4
109// %6 = add i64 %3, struct_field_3 ; Constant offset
110// %p = inttoptr i64 %6 to i32*
111// load %p
112// ...
113// BB2:
114// %7 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
115// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
116// %9 = add i64 %7, %8 ; CSE opportunity
117// %10 = mul i64 %j2, length_of_struct
118// %11 = add i64 %9, %10
119// %12 = add i64 %11, struct_field_2 ; Constant offset
120// %p = inttoptr i64 %12 to i32*
121// load %p2
122// ...
123//
124// If the target uses alias analysis in codegen, this pass will lower a GEP
125// with multiple indices into multiple GEPs with a single index:
126// BB1:
127// %1 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
128// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
129// %3 = getelementptr i8* %1, i64 %2 ; CSE opportunity
130// %4 = mul i64 %j1, length_of_struct
131// %5 = getelementptr i8* %3, i64 %4
132// %6 = getelementptr i8* %5, struct_field_3 ; Constant offset
133// %p = bitcast i8* %6 to i32*
134// load %p
135// ...
136// BB2:
137// %7 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
138// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
139// %9 = getelementptr i8* %7, i64 %8 ; CSE opportunity
140// %10 = mul i64 %j2, length_of_struct
141// %11 = getelementptr i8* %9, i64 %10
142// %12 = getelementptr i8* %11, struct_field_2 ; Constant offset
143// %p2 = bitcast i8* %12 to i32*
144// load %p2
145// ...
146//
147// Lowering GEPs can also benefit other passes such as LICM and CGP.
148// LICM (Loop Invariant Code Motion) can not hoist/sink a GEP of multiple
149// indices if one of the index is variant. If we lower such GEP into invariant
150// parts and variant parts, LICM can hoist/sink those invariant parts.
151// CGP (CodeGen Prepare) tries to sink address calculations that match the
152// target's addressing modes. A GEP with multiple indices may not match and will
153// not be sunk. If we lower such GEP into smaller parts, CGP may sink some of
154// them. So we end up with a better addressing mode.
155//
156//===----------------------------------------------------------------------===//
157
159#include "llvm/ADT/APInt.h"
160#include "llvm/ADT/DenseMap.h"
162#include "llvm/ADT/SmallVector.h"
168#include "llvm/IR/BasicBlock.h"
169#include "llvm/IR/Constant.h"
170#include "llvm/IR/Constants.h"
171#include "llvm/IR/DataLayout.h"
172#include "llvm/IR/DerivedTypes.h"
173#include "llvm/IR/Dominators.h"
174#include "llvm/IR/Function.h"
176#include "llvm/IR/IRBuilder.h"
177#include "llvm/IR/Instruction.h"
178#include "llvm/IR/Instructions.h"
179#include "llvm/IR/Module.h"
180#include "llvm/IR/PassManager.h"
181#include "llvm/IR/PatternMatch.h"
182#include "llvm/IR/Type.h"
183#include "llvm/IR/User.h"
184#include "llvm/IR/Value.h"
186#include "llvm/Pass.h"
187#include "llvm/Support/Casting.h"
193#include <cassert>
194#include <cstdint>
195#include <string>
196
197using namespace llvm;
198using namespace llvm::PatternMatch;
199
201 "disable-separate-const-offset-from-gep", cl::init(false),
202 cl::desc("Do not separate the constant offset from a GEP instruction"),
203 cl::Hidden);
204
205// Setting this flag may emit false positives when the input module already
206// contains dead instructions. Therefore, we set it only in unit tests that are
207// free of dead code.
208static cl::opt<bool>
209 VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false),
210 cl::desc("Verify this pass produces no dead code"),
211 cl::Hidden);
212
213namespace {
214
215/// A helper class for separating a constant offset from a GEP index.
216///
217/// In real programs, a GEP index may be more complicated than a simple addition
218/// of something and a constant integer which can be trivially splitted. For
219/// example, to split ((a << 3) | 5) + b, we need to search deeper for the
220/// constant offset, so that we can separate the index to (a << 3) + b and 5.
221///
222/// Therefore, this class looks into the expression that computes a given GEP
223/// index, and tries to find a constant integer that can be hoisted to the
224/// outermost level of the expression as an addition. Not every constant in an
225/// expression can jump out. e.g., we cannot transform (b * (a + 5)) to (b * a +
226/// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case,
227/// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15).
228class ConstantOffsetExtractor {
229public:
230 /// Extracts a constant offset from the given GEP index. It returns the
231 /// new index representing the remainder (equal to the original index minus
232 /// the constant offset), or nullptr if we cannot extract a constant offset.
233 /// \p Idx The given GEP index
234 /// \p GEP The given GEP
235 /// \p UserChainTail Outputs the tail of UserChain so that we can
236 /// garbage-collect unused instructions in UserChain.
237 static Value *Extract(Value *Idx, GetElementPtrInst *GEP,
238 User *&UserChainTail, const DominatorTree *DT);
239
240 /// Looks for a constant offset from the given GEP index without extracting
241 /// it. It returns the numeric value of the extracted constant offset (0 if
242 /// failed). The meaning of the arguments are the same as Extract.
243 static int64_t Find(Value *Idx, GetElementPtrInst *GEP,
244 const DominatorTree *DT);
245
246private:
247 ConstantOffsetExtractor(Instruction *InsertionPt, const DominatorTree *DT)
248 : IP(InsertionPt), DL(InsertionPt->getModule()->getDataLayout()), DT(DT) {
249 }
250
251 /// Searches the expression that computes V for a non-zero constant C s.t.
252 /// V can be reassociated into the form V' + C. If the searching is
253 /// successful, returns C and update UserChain as a def-use chain from C to V;
254 /// otherwise, UserChain is empty.
255 ///
256 /// \p V The given expression
257 /// \p SignExtended Whether V will be sign-extended in the computation of the
258 /// GEP index
259 /// \p ZeroExtended Whether V will be zero-extended in the computation of the
260 /// GEP index
261 /// \p NonNegative Whether V is guaranteed to be non-negative. For example,
262 /// an index of an inbounds GEP is guaranteed to be
263 /// non-negative. Levaraging this, we can better split
264 /// inbounds GEPs.
265 APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative);
266
267 /// A helper function to look into both operands of a binary operator.
268 APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended,
269 bool ZeroExtended);
270
271 /// After finding the constant offset C from the GEP index I, we build a new
272 /// index I' s.t. I' + C = I. This function builds and returns the new
273 /// index I' according to UserChain produced by function "find".
274 ///
275 /// The building conceptually takes two steps:
276 /// 1) iteratively distribute s/zext towards the leaves of the expression tree
277 /// that computes I
278 /// 2) reassociate the expression tree to the form I' + C.
279 ///
280 /// For example, to extract the 5 from sext(a + (b + 5)), we first distribute
281 /// sext to a, b and 5 so that we have
282 /// sext(a) + (sext(b) + 5).
283 /// Then, we reassociate it to
284 /// (sext(a) + sext(b)) + 5.
285 /// Given this form, we know I' is sext(a) + sext(b).
286 Value *rebuildWithoutConstOffset();
287
288 /// After the first step of rebuilding the GEP index without the constant
289 /// offset, distribute s/zext to the operands of all operators in UserChain.
290 /// e.g., zext(sext(a + (b + 5)) (assuming no overflow) =>
291 /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))).
292 ///
293 /// The function also updates UserChain to point to new subexpressions after
294 /// distributing s/zext. e.g., the old UserChain of the above example is
295 /// 5 -> b + 5 -> a + (b + 5) -> sext(...) -> zext(sext(...)),
296 /// and the new UserChain is
297 /// zext(sext(5)) -> zext(sext(b)) + zext(sext(5)) ->
298 /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))
299 ///
300 /// \p ChainIndex The index to UserChain. ChainIndex is initially
301 /// UserChain.size() - 1, and is decremented during
302 /// the recursion.
303 Value *distributeExtsAndCloneChain(unsigned ChainIndex);
304
305 /// Reassociates the GEP index to the form I' + C and returns I'.
306 Value *removeConstOffset(unsigned ChainIndex);
307
308 /// A helper function to apply ExtInsts, a list of s/zext, to value V.
309 /// e.g., if ExtInsts = [sext i32 to i64, zext i16 to i32], this function
310 /// returns "sext i32 (zext i16 V to i32) to i64".
311 Value *applyExts(Value *V);
312
313 /// A helper function that returns whether we can trace into the operands
314 /// of binary operator BO for a constant offset.
315 ///
316 /// \p SignExtended Whether BO is surrounded by sext
317 /// \p ZeroExtended Whether BO is surrounded by zext
318 /// \p NonNegative Whether BO is known to be non-negative, e.g., an in-bound
319 /// array index.
320 bool CanTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO,
321 bool NonNegative);
322
323 /// The path from the constant offset to the old GEP index. e.g., if the GEP
324 /// index is "a * b + (c + 5)". After running function find, UserChain[0] will
325 /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and
326 /// UserChain[2] will be the entire expression "a * b + (c + 5)".
327 ///
328 /// This path helps to rebuild the new GEP index.
329 SmallVector<User *, 8> UserChain;
330
331 /// A data structure used in rebuildWithoutConstOffset. Contains all
332 /// sext/zext instructions along UserChain.
334
335 /// Insertion position of cloned instructions.
336 Instruction *IP;
337
338 const DataLayout &DL;
339 const DominatorTree *DT;
340};
341
342/// A pass that tries to split every GEP in the function into a variadic
343/// base and a constant offset. It is a FunctionPass because searching for the
344/// constant offset may inspect other basic blocks.
345class SeparateConstOffsetFromGEPLegacyPass : public FunctionPass {
346public:
347 static char ID;
348
349 SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false)
350 : FunctionPass(ID), LowerGEP(LowerGEP) {
353 }
354
355 void getAnalysisUsage(AnalysisUsage &AU) const override {
359 AU.setPreservesCFG();
361 }
362
363 bool runOnFunction(Function &F) override;
364
365private:
366 bool LowerGEP;
367};
368
369/// A pass that tries to split every GEP in the function into a variadic
370/// base and a constant offset. It is a FunctionPass because searching for the
371/// constant offset may inspect other basic blocks.
372class SeparateConstOffsetFromGEP {
373public:
374 SeparateConstOffsetFromGEP(
376 function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LowerGEP)
377 : DT(DT), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP) {}
378
379 bool run(Function &F);
380
381private:
382 /// Track the operands of an add or sub.
383 using ExprKey = std::pair<Value *, Value *>;
384
385 /// Create a pair for use as a map key for a commutable operation.
386 static ExprKey createNormalizedCommutablePair(Value *A, Value *B) {
387 if (A < B)
388 return {A, B};
389 return {B, A};
390 }
391
392 /// Tries to split the given GEP into a variadic base and a constant offset,
393 /// and returns true if the splitting succeeds.
394 bool splitGEP(GetElementPtrInst *GEP);
395
396 /// Lower a GEP with multiple indices into multiple GEPs with a single index.
397 /// Function splitGEP already split the original GEP into a variadic part and
398 /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
399 /// variadic part into a set of GEPs with a single index and applies
400 /// AccumulativeByteOffset to it.
401 /// \p Variadic The variadic part of the original GEP.
402 /// \p AccumulativeByteOffset The constant offset.
403 void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic,
404 int64_t AccumulativeByteOffset);
405
406 /// Lower a GEP with multiple indices into ptrtoint+arithmetics+inttoptr form.
407 /// Function splitGEP already split the original GEP into a variadic part and
408 /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
409 /// variadic part into a set of arithmetic operations and applies
410 /// AccumulativeByteOffset to it.
411 /// \p Variadic The variadic part of the original GEP.
412 /// \p AccumulativeByteOffset The constant offset.
413 void lowerToArithmetics(GetElementPtrInst *Variadic,
414 int64_t AccumulativeByteOffset);
415
416 /// Finds the constant offset within each index and accumulates them. If
417 /// LowerGEP is true, it finds in indices of both sequential and structure
418 /// types, otherwise it only finds in sequential indices. The output
419 /// NeedsExtraction indicates whether we successfully find a non-zero constant
420 /// offset.
421 int64_t accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction);
422
423 /// Canonicalize array indices to pointer-size integers. This helps to
424 /// simplify the logic of splitting a GEP. For example, if a + b is a
425 /// pointer-size integer, we have
426 /// gep base, a + b = gep (gep base, a), b
427 /// However, this equality may not hold if the size of a + b is smaller than
428 /// the pointer size, because LLVM conceptually sign-extends GEP indices to
429 /// pointer size before computing the address
430 /// (http://llvm.org/docs/LangRef.html#id181).
431 ///
432 /// This canonicalization is very likely already done in clang and
433 /// instcombine. Therefore, the program will probably remain the same.
434 ///
435 /// Returns true if the module changes.
436 ///
437 /// Verified in @i32_add in split-gep.ll
438 bool canonicalizeArrayIndicesToIndexSize(GetElementPtrInst *GEP);
439
440 /// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow.
441 /// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting
442 /// the constant offset. After extraction, it becomes desirable to reunion the
443 /// distributed sexts. For example,
444 ///
445 /// &a[sext(i +nsw (j +nsw 5)]
446 /// => distribute &a[sext(i) +nsw (sext(j) +nsw 5)]
447 /// => constant extraction &a[sext(i) + sext(j)] + 5
448 /// => reunion &a[sext(i +nsw j)] + 5
449 bool reuniteExts(Function &F);
450
451 /// A helper that reunites sexts in an instruction.
452 bool reuniteExts(Instruction *I);
453
454 /// Find the closest dominator of <Dominatee> that is equivalent to <Key>.
455 Instruction *findClosestMatchingDominator(
456 ExprKey Key, Instruction *Dominatee,
457 DenseMap<ExprKey, SmallVector<Instruction *, 2>> &DominatingExprs);
458
459 /// Verify F is free of dead code.
460 void verifyNoDeadCode(Function &F);
461
462 bool hasMoreThanOneUseInLoop(Value *v, Loop *L);
463
464 // Swap the index operand of two GEP.
465 void swapGEPOperand(GetElementPtrInst *First, GetElementPtrInst *Second);
466
467 // Check if it is safe to swap operand of two GEP.
468 bool isLegalToSwapOperand(GetElementPtrInst *First, GetElementPtrInst *Second,
469 Loop *CurLoop);
470
471 const DataLayout *DL = nullptr;
472 DominatorTree *DT = nullptr;
473 LoopInfo *LI;
475 // Retrieved lazily since not always used.
477
478 /// Whether to lower a GEP with multiple indices into arithmetic operations or
479 /// multiple GEPs with a single index.
480 bool LowerGEP;
481
484};
485
486} // end anonymous namespace
487
488char SeparateConstOffsetFromGEPLegacyPass::ID = 0;
489
491 SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
492 "Split GEPs to a variadic base and a constant offset for better CSE", false,
493 false)
500 SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
501 "Split GEPs to a variadic base and a constant offset for better CSE", false,
502 false)
503
505 return new SeparateConstOffsetFromGEPLegacyPass(LowerGEP);
506}
507
508bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
509 bool ZeroExtended,
510 BinaryOperator *BO,
511 bool NonNegative) {
512 // We only consider ADD, SUB and OR, because a non-zero constant found in
513 // expressions composed of these operations can be easily hoisted as a
514 // constant offset by reassociation.
515 if (BO->getOpcode() != Instruction::Add &&
516 BO->getOpcode() != Instruction::Sub &&
517 BO->getOpcode() != Instruction::Or) {
518 return false;
519 }
520
521 Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1);
522 // Do not trace into "or" unless it is equivalent to "add". If LHS and RHS
523 // don't have common bits, (LHS | RHS) is equivalent to (LHS + RHS).
524 // FIXME: this does not appear to be covered by any tests
525 // (with x86/aarch64 backends at least)
526 if (BO->getOpcode() == Instruction::Or &&
527 !haveNoCommonBitsSet(LHS, RHS, DL, nullptr, BO, DT))
528 return false;
529
530 // FIXME: We don't currently support constants from the RHS of subs,
531 // when we are zero-extended, because we need a way to zero-extended
532 // them before they are negated.
533 if (ZeroExtended && !SignExtended && BO->getOpcode() == Instruction::Sub)
534 return false;
535
536 // In addition, tracing into BO requires that its surrounding s/zext (if
537 // any) is distributable to both operands.
538 //
539 // Suppose BO = A op B.
540 // SignExtended | ZeroExtended | Distributable?
541 // --------------+--------------+----------------------------------
542 // 0 | 0 | true because no s/zext exists
543 // 0 | 1 | zext(BO) == zext(A) op zext(B)
544 // 1 | 0 | sext(BO) == sext(A) op sext(B)
545 // 1 | 1 | zext(sext(BO)) ==
546 // | | zext(sext(A)) op zext(sext(B))
547 if (BO->getOpcode() == Instruction::Add && !ZeroExtended && NonNegative) {
548 // If a + b >= 0 and (a >= 0 or b >= 0), then
549 // sext(a + b) = sext(a) + sext(b)
550 // even if the addition is not marked nsw.
551 //
552 // Leveraging this invariant, we can trace into an sext'ed inbound GEP
553 // index if the constant offset is non-negative.
554 //
555 // Verified in @sext_add in split-gep.ll.
556 if (ConstantInt *ConstLHS = dyn_cast<ConstantInt>(LHS)) {
557 if (!ConstLHS->isNegative())
558 return true;
559 }
560 if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) {
561 if (!ConstRHS->isNegative())
562 return true;
563 }
564 }
565
566 // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B)
567 // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B)
568 if (BO->getOpcode() == Instruction::Add ||
569 BO->getOpcode() == Instruction::Sub) {
570 if (SignExtended && !BO->hasNoSignedWrap())
571 return false;
572 if (ZeroExtended && !BO->hasNoUnsignedWrap())
573 return false;
574 }
575
576 return true;
577}
578
579APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO,
580 bool SignExtended,
581 bool ZeroExtended) {
582 // Save off the current height of the chain, in case we need to restore it.
583 size_t ChainLength = UserChain.size();
584
585 // BO being non-negative does not shed light on whether its operands are
586 // non-negative. Clear the NonNegative flag here.
587 APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended,
588 /* NonNegative */ false);
589 // If we found a constant offset in the left operand, stop and return that.
590 // This shortcut might cause us to miss opportunities of combining the
591 // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9.
592 // However, such cases are probably already handled by -instcombine,
593 // given this pass runs after the standard optimizations.
594 if (ConstantOffset != 0) return ConstantOffset;
595
596 // Reset the chain back to where it was when we started exploring this node,
597 // since visiting the LHS didn't pan out.
598 UserChain.resize(ChainLength);
599
600 ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended,
601 /* NonNegative */ false);
602 // If U is a sub operator, negate the constant offset found in the right
603 // operand.
604 if (BO->getOpcode() == Instruction::Sub)
605 ConstantOffset = -ConstantOffset;
606
607 // If RHS wasn't a suitable candidate either, reset the chain again.
608 if (ConstantOffset == 0)
609 UserChain.resize(ChainLength);
610
611 return ConstantOffset;
612}
613
614APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended,
615 bool ZeroExtended, bool NonNegative) {
616 // TODO(jingyue): We could trace into integer/pointer casts, such as
617 // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only
618 // integers because it gives good enough results for our benchmarks.
619 unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
620
621 // We cannot do much with Values that are not a User, such as an Argument.
622 User *U = dyn_cast<User>(V);
623 if (U == nullptr) return APInt(BitWidth, 0);
624
625 APInt ConstantOffset(BitWidth, 0);
626 if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
627 // Hooray, we found it!
628 ConstantOffset = CI->getValue();
629 } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) {
630 // Trace into subexpressions for more hoisting opportunities.
631 if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative))
632 ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
633 } else if (isa<TruncInst>(V)) {
634 ConstantOffset =
635 find(U->getOperand(0), SignExtended, ZeroExtended, NonNegative)
636 .trunc(BitWidth);
637 } else if (isa<SExtInst>(V)) {
638 ConstantOffset = find(U->getOperand(0), /* SignExtended */ true,
639 ZeroExtended, NonNegative).sext(BitWidth);
640 } else if (isa<ZExtInst>(V)) {
641 // As an optimization, we can clear the SignExtended flag because
642 // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll.
643 //
644 // Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0.
645 ConstantOffset =
646 find(U->getOperand(0), /* SignExtended */ false,
647 /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth);
648 }
649
650 // If we found a non-zero constant offset, add it to the path for
651 // rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't
652 // help this optimization.
653 if (ConstantOffset != 0)
654 UserChain.push_back(U);
655 return ConstantOffset;
656}
657
658Value *ConstantOffsetExtractor::applyExts(Value *V) {
659 Value *Current = V;
660 // ExtInsts is built in the use-def order. Therefore, we apply them to V
661 // in the reversed order.
662 for (CastInst *I : llvm::reverse(ExtInsts)) {
663 if (Constant *C = dyn_cast<Constant>(Current)) {
664 // If Current is a constant, apply s/zext using ConstantExpr::getCast.
665 // ConstantExpr::getCast emits a ConstantInt if C is a ConstantInt.
666 Current = ConstantExpr::getCast(I->getOpcode(), C, I->getType());
667 } else {
668 Instruction *Ext = I->clone();
669 Ext->setOperand(0, Current);
670 Ext->insertBefore(IP);
671 Current = Ext;
672 }
673 }
674 return Current;
675}
676
677Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() {
678 distributeExtsAndCloneChain(UserChain.size() - 1);
679 // Remove all nullptrs (used to be s/zext) from UserChain.
680 unsigned NewSize = 0;
681 for (User *I : UserChain) {
682 if (I != nullptr) {
683 UserChain[NewSize] = I;
684 NewSize++;
685 }
686 }
687 UserChain.resize(NewSize);
688 return removeConstOffset(UserChain.size() - 1);
689}
690
691Value *
692ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned ChainIndex) {
693 User *U = UserChain[ChainIndex];
694 if (ChainIndex == 0) {
695 assert(isa<ConstantInt>(U));
696 // If U is a ConstantInt, applyExts will return a ConstantInt as well.
697 return UserChain[ChainIndex] = cast<ConstantInt>(applyExts(U));
698 }
699
700 if (CastInst *Cast = dyn_cast<CastInst>(U)) {
701 assert(
702 (isa<SExtInst>(Cast) || isa<ZExtInst>(Cast) || isa<TruncInst>(Cast)) &&
703 "Only following instructions can be traced: sext, zext & trunc");
704 ExtInsts.push_back(Cast);
705 UserChain[ChainIndex] = nullptr;
706 return distributeExtsAndCloneChain(ChainIndex - 1);
707 }
708
709 // Function find only trace into BinaryOperator and CastInst.
710 BinaryOperator *BO = cast<BinaryOperator>(U);
711 // OpNo = which operand of BO is UserChain[ChainIndex - 1]
712 unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
713 Value *TheOther = applyExts(BO->getOperand(1 - OpNo));
714 Value *NextInChain = distributeExtsAndCloneChain(ChainIndex - 1);
715
716 BinaryOperator *NewBO = nullptr;
717 if (OpNo == 0) {
718 NewBO = BinaryOperator::Create(BO->getOpcode(), NextInChain, TheOther,
719 BO->getName(), IP);
720 } else {
721 NewBO = BinaryOperator::Create(BO->getOpcode(), TheOther, NextInChain,
722 BO->getName(), IP);
723 }
724 return UserChain[ChainIndex] = NewBO;
725}
726
727Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
728 if (ChainIndex == 0) {
729 assert(isa<ConstantInt>(UserChain[ChainIndex]));
730 return ConstantInt::getNullValue(UserChain[ChainIndex]->getType());
731 }
732
733 BinaryOperator *BO = cast<BinaryOperator>(UserChain[ChainIndex]);
734 assert((BO->use_empty() || BO->hasOneUse()) &&
735 "distributeExtsAndCloneChain clones each BinaryOperator in "
736 "UserChain, so no one should be used more than "
737 "once");
738
739 unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
740 assert(BO->getOperand(OpNo) == UserChain[ChainIndex - 1]);
741 Value *NextInChain = removeConstOffset(ChainIndex - 1);
742 Value *TheOther = BO->getOperand(1 - OpNo);
743
744 // If NextInChain is 0 and not the LHS of a sub, we can simplify the
745 // sub-expression to be just TheOther.
746 if (ConstantInt *CI = dyn_cast<ConstantInt>(NextInChain)) {
747 if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == 0))
748 return TheOther;
749 }
750
752 if (BO->getOpcode() == Instruction::Or) {
753 // Rebuild "or" as "add", because "or" may be invalid for the new
754 // expression.
755 //
756 // For instance, given
757 // a | (b + 5) where a and b + 5 have no common bits,
758 // we can extract 5 as the constant offset.
759 //
760 // However, reusing the "or" in the new index would give us
761 // (a | b) + 5
762 // which does not equal a | (b + 5).
763 //
764 // Replacing the "or" with "add" is fine, because
765 // a | (b + 5) = a + (b + 5) = (a + b) + 5
766 NewOp = Instruction::Add;
767 }
768
769 BinaryOperator *NewBO;
770 if (OpNo == 0) {
771 NewBO = BinaryOperator::Create(NewOp, NextInChain, TheOther, "", IP);
772 } else {
773 NewBO = BinaryOperator::Create(NewOp, TheOther, NextInChain, "", IP);
774 }
775 NewBO->takeName(BO);
776 return NewBO;
777}
778
779Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,
780 User *&UserChainTail,
781 const DominatorTree *DT) {
782 ConstantOffsetExtractor Extractor(GEP, DT);
783 // Find a non-zero constant offset first.
784 APInt ConstantOffset =
785 Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
786 GEP->isInBounds());
787 if (ConstantOffset == 0) {
788 UserChainTail = nullptr;
789 return nullptr;
790 }
791 // Separates the constant offset from the GEP index.
792 Value *IdxWithoutConstOffset = Extractor.rebuildWithoutConstOffset();
793 UserChainTail = Extractor.UserChain.back();
794 return IdxWithoutConstOffset;
795}
796
797int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP,
798 const DominatorTree *DT) {
799 // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.
800 return ConstantOffsetExtractor(GEP, DT)
801 .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
802 GEP->isInBounds())
803 .getSExtValue();
804}
805
806bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToIndexSize(
808 bool Changed = false;
809 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
811 for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end();
812 I != E; ++I, ++GTI) {
813 // Skip struct member indices which must be i32.
814 if (GTI.isSequential()) {
815 if ((*I)->getType() != PtrIdxTy) {
816 *I = CastInst::CreateIntegerCast(*I, PtrIdxTy, true, "idxprom", GEP);
817 Changed = true;
818 }
819 }
820 }
821 return Changed;
822}
823
824int64_t
825SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
826 bool &NeedsExtraction) {
827 NeedsExtraction = false;
828 int64_t AccumulativeByteOffset = 0;
830 for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
831 if (GTI.isSequential()) {
832 // Constant offsets of scalable types are not really constant.
833 if (GTI.getIndexedType()->isScalableTy())
834 continue;
835
836 // Tries to extract a constant offset from this GEP index.
837 int64_t ConstantOffset =
838 ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP, DT);
839 if (ConstantOffset != 0) {
840 NeedsExtraction = true;
841 // A GEP may have multiple indices. We accumulate the extracted
842 // constant offset to a byte offset, and later offset the remainder of
843 // the original GEP with this byte offset.
844 AccumulativeByteOffset +=
845 ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType());
846 }
847 } else if (LowerGEP) {
848 StructType *StTy = GTI.getStructType();
849 uint64_t Field = cast<ConstantInt>(GEP->getOperand(I))->getZExtValue();
850 // Skip field 0 as the offset is always 0.
851 if (Field != 0) {
852 NeedsExtraction = true;
853 AccumulativeByteOffset +=
854 DL->getStructLayout(StTy)->getElementOffset(Field);
855 }
856 }
857 }
858 return AccumulativeByteOffset;
859}
860
861void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
862 GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) {
863 IRBuilder<> Builder(Variadic);
864 Type *PtrIndexTy = DL->getIndexType(Variadic->getType());
865
866 Value *ResultPtr = Variadic->getOperand(0);
867 Loop *L = LI->getLoopFor(Variadic->getParent());
868 // Check if the base is not loop invariant or used more than once.
869 bool isSwapCandidate =
870 L && L->isLoopInvariant(ResultPtr) &&
871 !hasMoreThanOneUseInLoop(ResultPtr, L);
872 Value *FirstResult = nullptr;
873
874 gep_type_iterator GTI = gep_type_begin(*Variadic);
875 // Create an ugly GEP for each sequential index. We don't create GEPs for
876 // structure indices, as they are accumulated in the constant offset index.
877 for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
878 if (GTI.isSequential()) {
879 Value *Idx = Variadic->getOperand(I);
880 // Skip zero indices.
881 if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
882 if (CI->isZero())
883 continue;
884
885 APInt ElementSize = APInt(PtrIndexTy->getIntegerBitWidth(),
886 DL->getTypeAllocSize(GTI.getIndexedType()));
887 // Scale the index by element size.
888 if (ElementSize != 1) {
889 if (ElementSize.isPowerOf2()) {
890 Idx = Builder.CreateShl(
891 Idx, ConstantInt::get(PtrIndexTy, ElementSize.logBase2()));
892 } else {
893 Idx =
894 Builder.CreateMul(Idx, ConstantInt::get(PtrIndexTy, ElementSize));
895 }
896 }
897 // Create an ugly GEP with a single index for each index.
898 ResultPtr =
899 Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Idx, "uglygep");
900 if (FirstResult == nullptr)
901 FirstResult = ResultPtr;
902 }
903 }
904
905 // Create a GEP with the constant offset index.
906 if (AccumulativeByteOffset != 0) {
907 Value *Offset = ConstantInt::get(PtrIndexTy, AccumulativeByteOffset);
908 ResultPtr =
909 Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep");
910 } else
911 isSwapCandidate = false;
912
913 // If we created a GEP with constant index, and the base is loop invariant,
914 // then we swap the first one with it, so LICM can move constant GEP out
915 // later.
916 auto *FirstGEP = dyn_cast_or_null<GetElementPtrInst>(FirstResult);
917 auto *SecondGEP = dyn_cast<GetElementPtrInst>(ResultPtr);
918 if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L))
919 swapGEPOperand(FirstGEP, SecondGEP);
920
921 Variadic->replaceAllUsesWith(ResultPtr);
922 Variadic->eraseFromParent();
923}
924
925void
926SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
927 int64_t AccumulativeByteOffset) {
928 IRBuilder<> Builder(Variadic);
929 Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
930 assert(IntPtrTy == DL->getIndexType(Variadic->getType()) &&
931 "Pointer type must match index type for arithmetic-based lowering of "
932 "split GEPs");
933
934 Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy);
935 gep_type_iterator GTI = gep_type_begin(*Variadic);
936 // Create ADD/SHL/MUL arithmetic operations for each sequential indices. We
937 // don't create arithmetics for structure indices, as they are accumulated
938 // in the constant offset index.
939 for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
940 if (GTI.isSequential()) {
941 Value *Idx = Variadic->getOperand(I);
942 // Skip zero indices.
943 if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
944 if (CI->isZero())
945 continue;
946
947 APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
948 DL->getTypeAllocSize(GTI.getIndexedType()));
949 // Scale the index by element size.
950 if (ElementSize != 1) {
951 if (ElementSize.isPowerOf2()) {
952 Idx = Builder.CreateShl(
953 Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2()));
954 } else {
955 Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize));
956 }
957 }
958 // Create an ADD for each index.
959 ResultPtr = Builder.CreateAdd(ResultPtr, Idx);
960 }
961 }
962
963 // Create an ADD for the constant offset index.
964 if (AccumulativeByteOffset != 0) {
965 ResultPtr = Builder.CreateAdd(
966 ResultPtr, ConstantInt::get(IntPtrTy, AccumulativeByteOffset));
967 }
968
969 ResultPtr = Builder.CreateIntToPtr(ResultPtr, Variadic->getType());
970 Variadic->replaceAllUsesWith(ResultPtr);
971 Variadic->eraseFromParent();
972}
973
974bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
975 // Skip vector GEPs.
976 if (GEP->getType()->isVectorTy())
977 return false;
978
979 // The backend can already nicely handle the case where all indices are
980 // constant.
981 if (GEP->hasAllConstantIndices())
982 return false;
983
984 bool Changed = canonicalizeArrayIndicesToIndexSize(GEP);
985
986 bool NeedsExtraction;
987 int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction);
988
989 if (!NeedsExtraction)
990 return Changed;
991
992 TargetTransformInfo &TTI = GetTTI(*GEP->getFunction());
993
994 // If LowerGEP is disabled, before really splitting the GEP, check whether the
995 // backend supports the addressing mode we are about to produce. If no, this
996 // splitting probably won't be beneficial.
997 // If LowerGEP is enabled, even the extracted constant offset can not match
998 // the addressing mode, we can still do optimizations to other lowered parts
999 // of variable indices. Therefore, we don't check for addressing modes in that
1000 // case.
1001 if (!LowerGEP) {
1002 unsigned AddrSpace = GEP->getPointerAddressSpace();
1003 if (!TTI.isLegalAddressingMode(GEP->getResultElementType(),
1004 /*BaseGV=*/nullptr, AccumulativeByteOffset,
1005 /*HasBaseReg=*/true, /*Scale=*/0,
1006 AddrSpace)) {
1007 return Changed;
1008 }
1009 }
1010
1011 // Remove the constant offset in each sequential index. The resultant GEP
1012 // computes the variadic base.
1013 // Notice that we don't remove struct field indices here. If LowerGEP is
1014 // disabled, a structure index is not accumulated and we still use the old
1015 // one. If LowerGEP is enabled, a structure index is accumulated in the
1016 // constant offset. LowerToSingleIndexGEPs or lowerToArithmetics will later
1017 // handle the constant offset and won't need a new structure index.
1019 for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
1020 if (GTI.isSequential()) {
1021 // Constant offsets of scalable types are not really constant.
1022 if (GTI.getIndexedType()->isScalableTy())
1023 continue;
1024
1025 // Splits this GEP index into a variadic part and a constant offset, and
1026 // uses the variadic part as the new index.
1027 Value *OldIdx = GEP->getOperand(I);
1028 User *UserChainTail;
1029 Value *NewIdx =
1030 ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail, DT);
1031 if (NewIdx != nullptr) {
1032 // Switches to the index with the constant offset removed.
1033 GEP->setOperand(I, NewIdx);
1034 // After switching to the new index, we can garbage-collect UserChain
1035 // and the old index if they are not used.
1038 }
1039 }
1040 }
1041
1042 // Clear the inbounds attribute because the new index may be off-bound.
1043 // e.g.,
1044 //
1045 // b = add i64 a, 5
1046 // addr = gep inbounds float, float* p, i64 b
1047 //
1048 // is transformed to:
1049 //
1050 // addr2 = gep float, float* p, i64 a ; inbounds removed
1051 // addr = gep inbounds float, float* addr2, i64 5
1052 //
1053 // If a is -4, although the old index b is in bounds, the new index a is
1054 // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
1055 // inbounds keyword is not present, the offsets are added to the base
1056 // address with silently-wrapping two's complement arithmetic".
1057 // Therefore, the final code will be a semantically equivalent.
1058 //
1059 // TODO(jingyue): do some range analysis to keep as many inbounds as
1060 // possible. GEPs with inbounds are more friendly to alias analysis.
1061 bool GEPWasInBounds = GEP->isInBounds();
1062 GEP->setIsInBounds(false);
1063
1064 // Lowers a GEP to either GEPs with a single index or arithmetic operations.
1065 if (LowerGEP) {
1066 // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to
1067 // arithmetic operations if the target uses alias analysis in codegen.
1068 // Additionally, pointers that aren't integral (and so can't be safely
1069 // converted to integers) or those whose offset size is different from their
1070 // pointer size (which means that doing integer arithmetic on them could
1071 // affect that data) can't be lowered in this way.
1072 unsigned AddrSpace = GEP->getPointerAddressSpace();
1073 bool PointerHasExtraData = DL->getPointerSizeInBits(AddrSpace) !=
1074 DL->getIndexSizeInBits(AddrSpace);
1075 if (TTI.useAA() || DL->isNonIntegralAddressSpace(AddrSpace) ||
1076 PointerHasExtraData)
1077 lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);
1078 else
1079 lowerToArithmetics(GEP, AccumulativeByteOffset);
1080 return true;
1081 }
1082
1083 // No need to create another GEP if the accumulative byte offset is 0.
1084 if (AccumulativeByteOffset == 0)
1085 return true;
1086
1087 // Offsets the base with the accumulative byte offset.
1088 //
1089 // %gep ; the base
1090 // ... %gep ...
1091 //
1092 // => add the offset
1093 //
1094 // %gep2 ; clone of %gep
1095 // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
1096 // %gep ; will be removed
1097 // ... %gep ...
1098 //
1099 // => replace all uses of %gep with %new.gep and remove %gep
1100 //
1101 // %gep2 ; clone of %gep
1102 // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
1103 // ... %new.gep ...
1104 //
1105 // If AccumulativeByteOffset is not a multiple of sizeof(*%gep), we emit an
1106 // uglygep (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep):
1107 // bitcast %gep2 to i8*, add the offset, and bitcast the result back to the
1108 // type of %gep.
1109 //
1110 // %gep2 ; clone of %gep
1111 // %0 = bitcast %gep2 to i8*
1112 // %uglygep = gep %0, <offset>
1113 // %new.gep = bitcast %uglygep to <type of %gep>
1114 // ... %new.gep ...
1115 Instruction *NewGEP = GEP->clone();
1116 NewGEP->insertBefore(GEP);
1117
1118 // Per ANSI C standard, signed / unsigned = unsigned and signed % unsigned =
1119 // unsigned.. Therefore, we cast ElementTypeSizeOfGEP to signed because it is
1120 // used with unsigned integers later.
1121 int64_t ElementTypeSizeOfGEP = static_cast<int64_t>(
1122 DL->getTypeAllocSize(GEP->getResultElementType()));
1123 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
1124 if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) {
1125 // Very likely. As long as %gep is naturally aligned, the byte offset we
1126 // extracted should be a multiple of sizeof(*%gep).
1127 int64_t Index = AccumulativeByteOffset / ElementTypeSizeOfGEP;
1128 NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
1129 ConstantInt::get(PtrIdxTy, Index, true),
1130 GEP->getName(), GEP);
1131 NewGEP->copyMetadata(*GEP);
1132 // Inherit the inbounds attribute of the original GEP.
1133 cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
1134 } else {
1135 // Unlikely but possible. For example,
1136 // #pragma pack(1)
1137 // struct S {
1138 // int a[3];
1139 // int64 b[8];
1140 // };
1141 // #pragma pack()
1142 //
1143 // Suppose the gep before extraction is &s[i + 1].b[j + 3]. After
1144 // extraction, it becomes &s[i].b[j] and AccumulativeByteOffset is
1145 // sizeof(S) + 3 * sizeof(int64) = 100, which is not a multiple of
1146 // sizeof(int64).
1147 //
1148 // Emit an uglygep in this case.
1150 NewGEP = cast<Instruction>(Builder.CreateGEP(
1151 Builder.getInt8Ty(), NewGEP,
1152 {ConstantInt::get(PtrIdxTy, AccumulativeByteOffset, true)}, "uglygep",
1153 GEPWasInBounds));
1154 NewGEP->copyMetadata(*GEP);
1155 }
1156
1157 GEP->replaceAllUsesWith(NewGEP);
1158 GEP->eraseFromParent();
1159
1160 return true;
1161}
1162
1163bool SeparateConstOffsetFromGEPLegacyPass::runOnFunction(Function &F) {
1164 if (skipFunction(F))
1165 return false;
1166 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1167 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1168 auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1169 auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
1170 return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1171 };
1172 SeparateConstOffsetFromGEP Impl(DT, LI, TLI, GetTTI, LowerGEP);
1173 return Impl.run(F);
1174}
1175
1176bool SeparateConstOffsetFromGEP::run(Function &F) {
1178 return false;
1179
1180 DL = &F.getParent()->getDataLayout();
1181 bool Changed = false;
1182 for (BasicBlock &B : F) {
1183 if (!DT->isReachableFromEntry(&B))
1184 continue;
1185
1187 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I))
1188 Changed |= splitGEP(GEP);
1189 // No need to split GEP ConstantExprs because all its indices are constant
1190 // already.
1191 }
1192
1193 Changed |= reuniteExts(F);
1194
1195 if (VerifyNoDeadCode)
1196 verifyNoDeadCode(F);
1197
1198 return Changed;
1199}
1200
1201Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(
1202 ExprKey Key, Instruction *Dominatee,
1203 DenseMap<ExprKey, SmallVector<Instruction *, 2>> &DominatingExprs) {
1204 auto Pos = DominatingExprs.find(Key);
1205 if (Pos == DominatingExprs.end())
1206 return nullptr;
1207
1208 auto &Candidates = Pos->second;
1209 // Because we process the basic blocks in pre-order of the dominator tree, a
1210 // candidate that doesn't dominate the current instruction won't dominate any
1211 // future instruction either. Therefore, we pop it out of the stack. This
1212 // optimization makes the algorithm O(n).
1213 while (!Candidates.empty()) {
1214 Instruction *Candidate = Candidates.back();
1215 if (DT->dominates(Candidate, Dominatee))
1216 return Candidate;
1217 Candidates.pop_back();
1218 }
1219 return nullptr;
1220}
1221
1222bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
1223 if (!I->getType()->isIntOrIntVectorTy())
1224 return false;
1225
1226 // Dom: LHS+RHS
1227 // I: sext(LHS)+sext(RHS)
1228 // If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom).
1229 // TODO: handle zext
1230 Value *LHS = nullptr, *RHS = nullptr;
1231 if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
1232 if (LHS->getType() == RHS->getType()) {
1233 ExprKey Key = createNormalizedCommutablePair(LHS, RHS);
1234 if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingAdds)) {
1235 Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
1236 NewSExt->takeName(I);
1237 I->replaceAllUsesWith(NewSExt);
1239 return true;
1240 }
1241 }
1242 } else if (match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
1243 if (LHS->getType() == RHS->getType()) {
1244 if (auto *Dom =
1245 findClosestMatchingDominator({LHS, RHS}, I, DominatingSubs)) {
1246 Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
1247 NewSExt->takeName(I);
1248 I->replaceAllUsesWith(NewSExt);
1250 return true;
1251 }
1252 }
1253 }
1254
1255 // Add I to DominatingExprs if it's an add/sub that can't sign overflow.
1256 if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS)))) {
1258 ExprKey Key = createNormalizedCommutablePair(LHS, RHS);
1259 DominatingAdds[Key].push_back(I);
1260 }
1261 } else if (match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
1263 DominatingSubs[{LHS, RHS}].push_back(I);
1264 }
1265 return false;
1266}
1267
1268bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
1269 bool Changed = false;
1270 DominatingAdds.clear();
1271 DominatingSubs.clear();
1272 for (const auto Node : depth_first(DT)) {
1273 BasicBlock *BB = Node->getBlock();
1275 Changed |= reuniteExts(&I);
1276 }
1277 return Changed;
1278}
1279
1280void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
1281 for (BasicBlock &B : F) {
1282 for (Instruction &I : B) {
1284 std::string ErrMessage;
1285 raw_string_ostream RSO(ErrMessage);
1286 RSO << "Dead instruction detected!\n" << I << "\n";
1287 llvm_unreachable(RSO.str().c_str());
1288 }
1289 }
1290 }
1291}
1292
1293bool SeparateConstOffsetFromGEP::isLegalToSwapOperand(
1294 GetElementPtrInst *FirstGEP, GetElementPtrInst *SecondGEP, Loop *CurLoop) {
1295 if (!FirstGEP || !FirstGEP->hasOneUse())
1296 return false;
1297
1298 if (!SecondGEP || FirstGEP->getParent() != SecondGEP->getParent())
1299 return false;
1300
1301 if (FirstGEP == SecondGEP)
1302 return false;
1303
1304 unsigned FirstNum = FirstGEP->getNumOperands();
1305 unsigned SecondNum = SecondGEP->getNumOperands();
1306 // Give up if the number of operands are not 2.
1307 if (FirstNum != SecondNum || FirstNum != 2)
1308 return false;
1309
1310 Value *FirstBase = FirstGEP->getOperand(0);
1311 Value *SecondBase = SecondGEP->getOperand(0);
1312 Value *FirstOffset = FirstGEP->getOperand(1);
1313 // Give up if the index of the first GEP is loop invariant.
1314 if (CurLoop->isLoopInvariant(FirstOffset))
1315 return false;
1316
1317 // Give up if base doesn't have same type.
1318 if (FirstBase->getType() != SecondBase->getType())
1319 return false;
1320
1321 Instruction *FirstOffsetDef = dyn_cast<Instruction>(FirstOffset);
1322
1323 // Check if the second operand of first GEP has constant coefficient.
1324 // For an example, for the following code, we won't gain anything by
1325 // hoisting the second GEP out because the second GEP can be folded away.
1326 // %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256
1327 // %67 = shl i64 %scevgep.sum.ur159, 2
1328 // %uglygep160 = getelementptr i8* %65, i64 %67
1329 // %uglygep161 = getelementptr i8* %uglygep160, i64 -1024
1330
1331 // Skip constant shift instruction which may be generated by Splitting GEPs.
1332 if (FirstOffsetDef && FirstOffsetDef->isShift() &&
1333 isa<ConstantInt>(FirstOffsetDef->getOperand(1)))
1334 FirstOffsetDef = dyn_cast<Instruction>(FirstOffsetDef->getOperand(0));
1335
1336 // Give up if FirstOffsetDef is an Add or Sub with constant.
1337 // Because it may not profitable at all due to constant folding.
1338 if (FirstOffsetDef)
1339 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FirstOffsetDef)) {
1340 unsigned opc = BO->getOpcode();
1341 if ((opc == Instruction::Add || opc == Instruction::Sub) &&
1342 (isa<ConstantInt>(BO->getOperand(0)) ||
1343 isa<ConstantInt>(BO->getOperand(1))))
1344 return false;
1345 }
1346 return true;
1347}
1348
1349bool SeparateConstOffsetFromGEP::hasMoreThanOneUseInLoop(Value *V, Loop *L) {
1350 int UsesInLoop = 0;
1351 for (User *U : V->users()) {
1352 if (Instruction *User = dyn_cast<Instruction>(U))
1353 if (L->contains(User))
1354 if (++UsesInLoop > 1)
1355 return true;
1356 }
1357 return false;
1358}
1359
1360void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,
1361 GetElementPtrInst *Second) {
1362 Value *Offset1 = First->getOperand(1);
1363 Value *Offset2 = Second->getOperand(1);
1364 First->setOperand(1, Offset2);
1365 Second->setOperand(1, Offset1);
1366
1367 // We changed p+o+c to p+c+o, p+c may not be inbound anymore.
1368 const DataLayout &DAL = First->getModule()->getDataLayout();
1370 cast<PointerType>(First->getType())->getAddressSpace()),
1371 0);
1372 Value *NewBase =
1373 First->stripAndAccumulateInBoundsConstantOffsets(DAL, Offset);
1374 uint64_t ObjectSize;
1375 if (!getObjectSize(NewBase, ObjectSize, DAL, TLI) ||
1376 Offset.ugt(ObjectSize)) {
1377 First->setIsInBounds(false);
1378 Second->setIsInBounds(false);
1379 } else
1380 First->setIsInBounds(true);
1381}
1382
1384 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1386 ->printPipeline(OS, MapClassName2PassName);
1387 OS << '<';
1388 if (LowerGEP)
1389 OS << "lower-gep";
1390 OS << '>';
1391}
1392
1395 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1396 auto *LI = &AM.getResult<LoopAnalysis>(F);
1397 auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
1398 auto GetTTI = [&AM](Function &F) -> TargetTransformInfo & {
1399 return AM.getResult<TargetIRAnalysis>(F);
1400 };
1401 SeparateConstOffsetFromGEP Impl(DT, LI, TLI, GetTTI, LowerGEP);
1402 if (!Impl.run(F))
1403 return PreservedAnalyses::all();
1406 return PA;
1407}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
aarch64 promote const
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
assume Assume Builder
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
@ NonNegative
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
Hexagon Common GEP
static const T * Find(StringRef S, ArrayRef< T > A)
Find KV in array using binary search.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
static cl::opt< bool > DisableSeparateConstOffsetFromGEP("disable-separate-const-offset-from-gep", cl::init(false), cl::desc("Do not separate the constant offset from a GEP instruction"), cl::Hidden)
static cl::opt< bool > VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false), cl::desc("Verify this pass produces no dead code"), cl::Hidden)
separate const offset from gep
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:76
unsigned logBase2() const
Definition: APInt.h:1696
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
BinaryOps getOpcode() const
Definition: InstrTypes.h:391
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:428
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1957
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:420
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:314
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:940
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:966
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2625
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:89
const BasicBlock * getParent() const
Definition: Instruction.h:90
bool isShift() const
Definition: Instruction.h:202
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:569
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:594
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:47
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
This class represents a sign extension of integer types.
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Class to represent struct types.
Definition: DerivedTypes.h:213
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
bool use_empty() const
Definition: Value.h:344
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:384
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Key
PAL metadata keys.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:982
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWSub(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:994
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:440
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1747
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:529
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:666
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:398
bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:429
bool programUndefinedIfPoison(const Instruction *Inst)
void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &)
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if LHS and RHS have no common bits set.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:184
gep_type_iterator gep_type_begin(const User *GEP)
iterator_range< df_iterator< T > > depth_first(const T &G)
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:371