LLVM 17.0.0git
SeparateConstOffsetFromGEP.cpp
Go to the documentation of this file.
1//===- SeparateConstOffsetFromGEP.cpp -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Loop unrolling may create many similar GEPs for array accesses.
10// e.g., a 2-level loop
11//
12// float a[32][32]; // global variable
13//
14// for (int i = 0; i < 2; ++i) {
15// for (int j = 0; j < 2; ++j) {
16// ...
17// ... = a[x + i][y + j];
18// ...
19// }
20// }
21//
22// will probably be unrolled to:
23//
24// gep %a, 0, %x, %y; load
25// gep %a, 0, %x, %y + 1; load
26// gep %a, 0, %x + 1, %y; load
27// gep %a, 0, %x + 1, %y + 1; load
28//
29// LLVM's GVN does not use partial redundancy elimination yet, and is thus
30// unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs
31// significant slowdown in targets with limited addressing modes. For instance,
32// because the PTX target does not support the reg+reg addressing mode, the
33// NVPTX backend emits PTX code that literally computes the pointer address of
34// each GEP, wasting tons of registers. It emits the following PTX for the
35// first load and similar PTX for other loads.
36//
37// mov.u32 %r1, %x;
38// mov.u32 %r2, %y;
39// mul.wide.u32 %rl2, %r1, 128;
40// mov.u64 %rl3, a;
41// add.s64 %rl4, %rl3, %rl2;
42// mul.wide.u32 %rl5, %r2, 4;
43// add.s64 %rl6, %rl4, %rl5;
44// ld.global.f32 %f1, [%rl6];
45//
46// To reduce the register pressure, the optimization implemented in this file
47// merges the common part of a group of GEPs, so we can compute each pointer
48// address by adding a simple offset to the common part, saving many registers.
49//
50// It works by splitting each GEP into a variadic base and a constant offset.
51// The variadic base can be computed once and reused by multiple GEPs, and the
52// constant offsets can be nicely folded into the reg+immediate addressing mode
53// (supported by most targets) without using any extra register.
54//
55// For instance, we transform the four GEPs and four loads in the above example
56// into:
57//
58// base = gep a, 0, x, y
59// load base
60// laod base + 1 * sizeof(float)
61// load base + 32 * sizeof(float)
62// load base + 33 * sizeof(float)
63//
64// Given the transformed IR, a backend that supports the reg+immediate
65// addressing mode can easily fold the pointer arithmetics into the loads. For
66// example, the NVPTX backend can easily fold the pointer arithmetics into the
67// ld.global.f32 instructions, and the resultant PTX uses much fewer registers.
68//
69// mov.u32 %r1, %tid.x;
70// mov.u32 %r2, %tid.y;
71// mul.wide.u32 %rl2, %r1, 128;
72// mov.u64 %rl3, a;
73// add.s64 %rl4, %rl3, %rl2;
74// mul.wide.u32 %rl5, %r2, 4;
75// add.s64 %rl6, %rl4, %rl5;
76// ld.global.f32 %f1, [%rl6]; // so far the same as unoptimized PTX
77// ld.global.f32 %f2, [%rl6+4]; // much better
78// ld.global.f32 %f3, [%rl6+128]; // much better
79// ld.global.f32 %f4, [%rl6+132]; // much better
80//
81// Another improvement enabled by the LowerGEP flag is to lower a GEP with
82// multiple indices to either multiple GEPs with a single index or arithmetic
83// operations (depending on whether the target uses alias analysis in codegen).
84// Such transformation can have following benefits:
85// (1) It can always extract constants in the indices of structure type.
86// (2) After such Lowering, there are more optimization opportunities such as
87// CSE, LICM and CGP.
88//
89// E.g. The following GEPs have multiple indices:
90// BB1:
91// %p = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 3
92// load %p
93// ...
94// BB2:
95// %p2 = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 2
96// load %p2
97// ...
98//
99// We can not do CSE to the common part related to index "i64 %i". Lowering
100// GEPs can achieve such goals.
101// If the target does not use alias analysis in codegen, this pass will
102// lower a GEP with multiple indices into arithmetic operations:
103// BB1:
104// %1 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
105// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
106// %3 = add i64 %1, %2 ; CSE opportunity
107// %4 = mul i64 %j1, length_of_struct
108// %5 = add i64 %3, %4
109// %6 = add i64 %3, struct_field_3 ; Constant offset
110// %p = inttoptr i64 %6 to i32*
111// load %p
112// ...
113// BB2:
114// %7 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
115// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
116// %9 = add i64 %7, %8 ; CSE opportunity
117// %10 = mul i64 %j2, length_of_struct
118// %11 = add i64 %9, %10
119// %12 = add i64 %11, struct_field_2 ; Constant offset
120// %p = inttoptr i64 %12 to i32*
121// load %p2
122// ...
123//
124// If the target uses alias analysis in codegen, this pass will lower a GEP
125// with multiple indices into multiple GEPs with a single index:
126// BB1:
127// %1 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
128// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
129// %3 = getelementptr i8* %1, i64 %2 ; CSE opportunity
130// %4 = mul i64 %j1, length_of_struct
131// %5 = getelementptr i8* %3, i64 %4
132// %6 = getelementptr i8* %5, struct_field_3 ; Constant offset
133// %p = bitcast i8* %6 to i32*
134// load %p
135// ...
136// BB2:
137// %7 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
138// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
139// %9 = getelementptr i8* %7, i64 %8 ; CSE opportunity
140// %10 = mul i64 %j2, length_of_struct
141// %11 = getelementptr i8* %9, i64 %10
142// %12 = getelementptr i8* %11, struct_field_2 ; Constant offset
143// %p2 = bitcast i8* %12 to i32*
144// load %p2
145// ...
146//
147// Lowering GEPs can also benefit other passes such as LICM and CGP.
148// LICM (Loop Invariant Code Motion) can not hoist/sink a GEP of multiple
149// indices if one of the index is variant. If we lower such GEP into invariant
150// parts and variant parts, LICM can hoist/sink those invariant parts.
151// CGP (CodeGen Prepare) tries to sink address calculations that match the
152// target's addressing modes. A GEP with multiple indices may not match and will
153// not be sunk. If we lower such GEP into smaller parts, CGP may sink some of
154// them. So we end up with a better addressing mode.
155//
156//===----------------------------------------------------------------------===//
157
159#include "llvm/ADT/APInt.h"
160#include "llvm/ADT/DenseMap.h"
162#include "llvm/ADT/SmallVector.h"
169#include "llvm/IR/BasicBlock.h"
170#include "llvm/IR/Constant.h"
171#include "llvm/IR/Constants.h"
172#include "llvm/IR/DataLayout.h"
173#include "llvm/IR/DerivedTypes.h"
174#include "llvm/IR/Dominators.h"
175#include "llvm/IR/Function.h"
177#include "llvm/IR/IRBuilder.h"
178#include "llvm/IR/Instruction.h"
179#include "llvm/IR/Instructions.h"
180#include "llvm/IR/Module.h"
181#include "llvm/IR/PassManager.h"
182#include "llvm/IR/PatternMatch.h"
183#include "llvm/IR/Type.h"
184#include "llvm/IR/User.h"
185#include "llvm/IR/Value.h"
187#include "llvm/Pass.h"
188#include "llvm/Support/Casting.h"
194#include <cassert>
195#include <cstdint>
196#include <string>
197
198using namespace llvm;
199using namespace llvm::PatternMatch;
200
202 "disable-separate-const-offset-from-gep", cl::init(false),
203 cl::desc("Do not separate the constant offset from a GEP instruction"),
204 cl::Hidden);
205
206// Setting this flag may emit false positives when the input module already
207// contains dead instructions. Therefore, we set it only in unit tests that are
208// free of dead code.
209static cl::opt<bool>
210 VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false),
211 cl::desc("Verify this pass produces no dead code"),
212 cl::Hidden);
213
214namespace {
215
216/// A helper class for separating a constant offset from a GEP index.
217///
218/// In real programs, a GEP index may be more complicated than a simple addition
219/// of something and a constant integer which can be trivially splitted. For
220/// example, to split ((a << 3) | 5) + b, we need to search deeper for the
221/// constant offset, so that we can separate the index to (a << 3) + b and 5.
222///
223/// Therefore, this class looks into the expression that computes a given GEP
224/// index, and tries to find a constant integer that can be hoisted to the
225/// outermost level of the expression as an addition. Not every constant in an
226/// expression can jump out. e.g., we cannot transform (b * (a + 5)) to (b * a +
227/// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case,
228/// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15).
229class ConstantOffsetExtractor {
230public:
231 /// Extracts a constant offset from the given GEP index. It returns the
232 /// new index representing the remainder (equal to the original index minus
233 /// the constant offset), or nullptr if we cannot extract a constant offset.
234 /// \p Idx The given GEP index
235 /// \p GEP The given GEP
236 /// \p UserChainTail Outputs the tail of UserChain so that we can
237 /// garbage-collect unused instructions in UserChain.
238 static Value *Extract(Value *Idx, GetElementPtrInst *GEP,
239 User *&UserChainTail, const DominatorTree *DT);
240
241 /// Looks for a constant offset from the given GEP index without extracting
242 /// it. It returns the numeric value of the extracted constant offset (0 if
243 /// failed). The meaning of the arguments are the same as Extract.
244 static int64_t Find(Value *Idx, GetElementPtrInst *GEP,
245 const DominatorTree *DT);
246
247private:
248 ConstantOffsetExtractor(Instruction *InsertionPt, const DominatorTree *DT)
249 : IP(InsertionPt), DL(InsertionPt->getModule()->getDataLayout()), DT(DT) {
250 }
251
252 /// Searches the expression that computes V for a non-zero constant C s.t.
253 /// V can be reassociated into the form V' + C. If the searching is
254 /// successful, returns C and update UserChain as a def-use chain from C to V;
255 /// otherwise, UserChain is empty.
256 ///
257 /// \p V The given expression
258 /// \p SignExtended Whether V will be sign-extended in the computation of the
259 /// GEP index
260 /// \p ZeroExtended Whether V will be zero-extended in the computation of the
261 /// GEP index
262 /// \p NonNegative Whether V is guaranteed to be non-negative. For example,
263 /// an index of an inbounds GEP is guaranteed to be
264 /// non-negative. Levaraging this, we can better split
265 /// inbounds GEPs.
266 APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative);
267
268 /// A helper function to look into both operands of a binary operator.
269 APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended,
270 bool ZeroExtended);
271
272 /// After finding the constant offset C from the GEP index I, we build a new
273 /// index I' s.t. I' + C = I. This function builds and returns the new
274 /// index I' according to UserChain produced by function "find".
275 ///
276 /// The building conceptually takes two steps:
277 /// 1) iteratively distribute s/zext towards the leaves of the expression tree
278 /// that computes I
279 /// 2) reassociate the expression tree to the form I' + C.
280 ///
281 /// For example, to extract the 5 from sext(a + (b + 5)), we first distribute
282 /// sext to a, b and 5 so that we have
283 /// sext(a) + (sext(b) + 5).
284 /// Then, we reassociate it to
285 /// (sext(a) + sext(b)) + 5.
286 /// Given this form, we know I' is sext(a) + sext(b).
287 Value *rebuildWithoutConstOffset();
288
289 /// After the first step of rebuilding the GEP index without the constant
290 /// offset, distribute s/zext to the operands of all operators in UserChain.
291 /// e.g., zext(sext(a + (b + 5)) (assuming no overflow) =>
292 /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))).
293 ///
294 /// The function also updates UserChain to point to new subexpressions after
295 /// distributing s/zext. e.g., the old UserChain of the above example is
296 /// 5 -> b + 5 -> a + (b + 5) -> sext(...) -> zext(sext(...)),
297 /// and the new UserChain is
298 /// zext(sext(5)) -> zext(sext(b)) + zext(sext(5)) ->
299 /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))
300 ///
301 /// \p ChainIndex The index to UserChain. ChainIndex is initially
302 /// UserChain.size() - 1, and is decremented during
303 /// the recursion.
304 Value *distributeExtsAndCloneChain(unsigned ChainIndex);
305
306 /// Reassociates the GEP index to the form I' + C and returns I'.
307 Value *removeConstOffset(unsigned ChainIndex);
308
309 /// A helper function to apply ExtInsts, a list of s/zext, to value V.
310 /// e.g., if ExtInsts = [sext i32 to i64, zext i16 to i32], this function
311 /// returns "sext i32 (zext i16 V to i32) to i64".
312 Value *applyExts(Value *V);
313
314 /// A helper function that returns whether we can trace into the operands
315 /// of binary operator BO for a constant offset.
316 ///
317 /// \p SignExtended Whether BO is surrounded by sext
318 /// \p ZeroExtended Whether BO is surrounded by zext
319 /// \p NonNegative Whether BO is known to be non-negative, e.g., an in-bound
320 /// array index.
321 bool CanTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO,
322 bool NonNegative);
323
324 /// The path from the constant offset to the old GEP index. e.g., if the GEP
325 /// index is "a * b + (c + 5)". After running function find, UserChain[0] will
326 /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and
327 /// UserChain[2] will be the entire expression "a * b + (c + 5)".
328 ///
329 /// This path helps to rebuild the new GEP index.
330 SmallVector<User *, 8> UserChain;
331
332 /// A data structure used in rebuildWithoutConstOffset. Contains all
333 /// sext/zext instructions along UserChain.
335
336 /// Insertion position of cloned instructions.
337 Instruction *IP;
338
339 const DataLayout &DL;
340 const DominatorTree *DT;
341};
342
343/// A pass that tries to split every GEP in the function into a variadic
344/// base and a constant offset. It is a FunctionPass because searching for the
345/// constant offset may inspect other basic blocks.
346class SeparateConstOffsetFromGEPLegacyPass : public FunctionPass {
347public:
348 static char ID;
349
350 SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false)
351 : FunctionPass(ID), LowerGEP(LowerGEP) {
354 }
355
356 void getAnalysisUsage(AnalysisUsage &AU) const override {
361 AU.setPreservesCFG();
363 }
364
365 bool runOnFunction(Function &F) override;
366
367private:
368 bool LowerGEP;
369};
370
371/// A pass that tries to split every GEP in the function into a variadic
372/// base and a constant offset. It is a FunctionPass because searching for the
373/// constant offset may inspect other basic blocks.
374class SeparateConstOffsetFromGEP {
375public:
376 SeparateConstOffsetFromGEP(
379 function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LowerGEP)
380 : DT(DT), SE(SE), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP) {}
381
382 bool run(Function &F);
383
384private:
385 /// Tries to split the given GEP into a variadic base and a constant offset,
386 /// and returns true if the splitting succeeds.
387 bool splitGEP(GetElementPtrInst *GEP);
388
389 /// Lower a GEP with multiple indices into multiple GEPs with a single index.
390 /// Function splitGEP already split the original GEP into a variadic part and
391 /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
392 /// variadic part into a set of GEPs with a single index and applies
393 /// AccumulativeByteOffset to it.
394 /// \p Variadic The variadic part of the original GEP.
395 /// \p AccumulativeByteOffset The constant offset.
396 void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic,
397 int64_t AccumulativeByteOffset);
398
399 /// Lower a GEP with multiple indices into ptrtoint+arithmetics+inttoptr form.
400 /// Function splitGEP already split the original GEP into a variadic part and
401 /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
402 /// variadic part into a set of arithmetic operations and applies
403 /// AccumulativeByteOffset to it.
404 /// \p Variadic The variadic part of the original GEP.
405 /// \p AccumulativeByteOffset The constant offset.
406 void lowerToArithmetics(GetElementPtrInst *Variadic,
407 int64_t AccumulativeByteOffset);
408
409 /// Finds the constant offset within each index and accumulates them. If
410 /// LowerGEP is true, it finds in indices of both sequential and structure
411 /// types, otherwise it only finds in sequential indices. The output
412 /// NeedsExtraction indicates whether we successfully find a non-zero constant
413 /// offset.
414 int64_t accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction);
415
416 /// Canonicalize array indices to pointer-size integers. This helps to
417 /// simplify the logic of splitting a GEP. For example, if a + b is a
418 /// pointer-size integer, we have
419 /// gep base, a + b = gep (gep base, a), b
420 /// However, this equality may not hold if the size of a + b is smaller than
421 /// the pointer size, because LLVM conceptually sign-extends GEP indices to
422 /// pointer size before computing the address
423 /// (http://llvm.org/docs/LangRef.html#id181).
424 ///
425 /// This canonicalization is very likely already done in clang and
426 /// instcombine. Therefore, the program will probably remain the same.
427 ///
428 /// Returns true if the module changes.
429 ///
430 /// Verified in @i32_add in split-gep.ll
431 bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP);
432
433 /// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow.
434 /// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting
435 /// the constant offset. After extraction, it becomes desirable to reunion the
436 /// distributed sexts. For example,
437 ///
438 /// &a[sext(i +nsw (j +nsw 5)]
439 /// => distribute &a[sext(i) +nsw (sext(j) +nsw 5)]
440 /// => constant extraction &a[sext(i) + sext(j)] + 5
441 /// => reunion &a[sext(i +nsw j)] + 5
442 bool reuniteExts(Function &F);
443
444 /// A helper that reunites sexts in an instruction.
445 bool reuniteExts(Instruction *I);
446
447 /// Find the closest dominator of <Dominatee> that is equivalent to <Key>.
448 Instruction *findClosestMatchingDominator(
449 const SCEV *Key, Instruction *Dominatee,
450 DenseMap<const SCEV *, SmallVector<Instruction *, 2>> &DominatingExprs);
451
452 /// Verify F is free of dead code.
453 void verifyNoDeadCode(Function &F);
454
455 bool hasMoreThanOneUseInLoop(Value *v, Loop *L);
456
457 // Swap the index operand of two GEP.
458 void swapGEPOperand(GetElementPtrInst *First, GetElementPtrInst *Second);
459
460 // Check if it is safe to swap operand of two GEP.
461 bool isLegalToSwapOperand(GetElementPtrInst *First, GetElementPtrInst *Second,
462 Loop *CurLoop);
463
464 const DataLayout *DL = nullptr;
465 DominatorTree *DT = nullptr;
466 ScalarEvolution *SE;
467 LoopInfo *LI;
469 // Retrieved lazily since not always used.
471
472 /// Whether to lower a GEP with multiple indices into arithmetic operations or
473 /// multiple GEPs with a single index.
474 bool LowerGEP;
475
478};
479
480} // end anonymous namespace
481
482char SeparateConstOffsetFromGEPLegacyPass::ID = 0;
483
485 SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
486 "Split GEPs to a variadic base and a constant offset for better CSE", false,
487 false)
494 SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
495 "Split GEPs to a variadic base and a constant offset for better CSE", false,
496 false)
497
499 return new SeparateConstOffsetFromGEPLegacyPass(LowerGEP);
500}
501
502bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
503 bool ZeroExtended,
504 BinaryOperator *BO,
505 bool NonNegative) {
506 // We only consider ADD, SUB and OR, because a non-zero constant found in
507 // expressions composed of these operations can be easily hoisted as a
508 // constant offset by reassociation.
509 if (BO->getOpcode() != Instruction::Add &&
510 BO->getOpcode() != Instruction::Sub &&
511 BO->getOpcode() != Instruction::Or) {
512 return false;
513 }
514
515 Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1);
516 // Do not trace into "or" unless it is equivalent to "add". If LHS and RHS
517 // don't have common bits, (LHS | RHS) is equivalent to (LHS + RHS).
518 // FIXME: this does not appear to be covered by any tests
519 // (with x86/aarch64 backends at least)
520 if (BO->getOpcode() == Instruction::Or &&
521 !haveNoCommonBitsSet(LHS, RHS, DL, nullptr, BO, DT))
522 return false;
523
524 // In addition, tracing into BO requires that its surrounding s/zext (if
525 // any) is distributable to both operands.
526 //
527 // Suppose BO = A op B.
528 // SignExtended | ZeroExtended | Distributable?
529 // --------------+--------------+----------------------------------
530 // 0 | 0 | true because no s/zext exists
531 // 0 | 1 | zext(BO) == zext(A) op zext(B)
532 // 1 | 0 | sext(BO) == sext(A) op sext(B)
533 // 1 | 1 | zext(sext(BO)) ==
534 // | | zext(sext(A)) op zext(sext(B))
535 if (BO->getOpcode() == Instruction::Add && !ZeroExtended && NonNegative) {
536 // If a + b >= 0 and (a >= 0 or b >= 0), then
537 // sext(a + b) = sext(a) + sext(b)
538 // even if the addition is not marked nsw.
539 //
540 // Leveraging this invariant, we can trace into an sext'ed inbound GEP
541 // index if the constant offset is non-negative.
542 //
543 // Verified in @sext_add in split-gep.ll.
544 if (ConstantInt *ConstLHS = dyn_cast<ConstantInt>(LHS)) {
545 if (!ConstLHS->isNegative())
546 return true;
547 }
548 if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) {
549 if (!ConstRHS->isNegative())
550 return true;
551 }
552 }
553
554 // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B)
555 // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B)
556 if (BO->getOpcode() == Instruction::Add ||
557 BO->getOpcode() == Instruction::Sub) {
558 if (SignExtended && !BO->hasNoSignedWrap())
559 return false;
560 if (ZeroExtended && !BO->hasNoUnsignedWrap())
561 return false;
562 }
563
564 return true;
565}
566
567APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO,
568 bool SignExtended,
569 bool ZeroExtended) {
570 // Save off the current height of the chain, in case we need to restore it.
571 size_t ChainLength = UserChain.size();
572
573 // BO being non-negative does not shed light on whether its operands are
574 // non-negative. Clear the NonNegative flag here.
575 APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended,
576 /* NonNegative */ false);
577 // If we found a constant offset in the left operand, stop and return that.
578 // This shortcut might cause us to miss opportunities of combining the
579 // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9.
580 // However, such cases are probably already handled by -instcombine,
581 // given this pass runs after the standard optimizations.
582 if (ConstantOffset != 0) return ConstantOffset;
583
584 // Reset the chain back to where it was when we started exploring this node,
585 // since visiting the LHS didn't pan out.
586 UserChain.resize(ChainLength);
587
588 ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended,
589 /* NonNegative */ false);
590 // If U is a sub operator, negate the constant offset found in the right
591 // operand.
592 if (BO->getOpcode() == Instruction::Sub)
593 ConstantOffset = -ConstantOffset;
594
595 // If RHS wasn't a suitable candidate either, reset the chain again.
596 if (ConstantOffset == 0)
597 UserChain.resize(ChainLength);
598
599 return ConstantOffset;
600}
601
602APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended,
603 bool ZeroExtended, bool NonNegative) {
604 // TODO(jingyue): We could trace into integer/pointer casts, such as
605 // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only
606 // integers because it gives good enough results for our benchmarks.
607 unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
608
609 // We cannot do much with Values that are not a User, such as an Argument.
610 User *U = dyn_cast<User>(V);
611 if (U == nullptr) return APInt(BitWidth, 0);
612
613 APInt ConstantOffset(BitWidth, 0);
614 if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
615 // Hooray, we found it!
616 ConstantOffset = CI->getValue();
617 } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) {
618 // Trace into subexpressions for more hoisting opportunities.
619 if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative))
620 ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
621 } else if (isa<TruncInst>(V)) {
622 ConstantOffset =
623 find(U->getOperand(0), SignExtended, ZeroExtended, NonNegative)
624 .trunc(BitWidth);
625 } else if (isa<SExtInst>(V)) {
626 ConstantOffset = find(U->getOperand(0), /* SignExtended */ true,
627 ZeroExtended, NonNegative).sext(BitWidth);
628 } else if (isa<ZExtInst>(V)) {
629 // As an optimization, we can clear the SignExtended flag because
630 // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll.
631 //
632 // Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0.
633 ConstantOffset =
634 find(U->getOperand(0), /* SignExtended */ false,
635 /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth);
636 }
637
638 // If we found a non-zero constant offset, add it to the path for
639 // rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't
640 // help this optimization.
641 if (ConstantOffset != 0)
642 UserChain.push_back(U);
643 return ConstantOffset;
644}
645
646Value *ConstantOffsetExtractor::applyExts(Value *V) {
647 Value *Current = V;
648 // ExtInsts is built in the use-def order. Therefore, we apply them to V
649 // in the reversed order.
650 for (CastInst *I : llvm::reverse(ExtInsts)) {
651 if (Constant *C = dyn_cast<Constant>(Current)) {
652 // If Current is a constant, apply s/zext using ConstantExpr::getCast.
653 // ConstantExpr::getCast emits a ConstantInt if C is a ConstantInt.
654 Current = ConstantExpr::getCast(I->getOpcode(), C, I->getType());
655 } else {
656 Instruction *Ext = I->clone();
657 Ext->setOperand(0, Current);
658 Ext->insertBefore(IP);
659 Current = Ext;
660 }
661 }
662 return Current;
663}
664
665Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() {
666 distributeExtsAndCloneChain(UserChain.size() - 1);
667 // Remove all nullptrs (used to be s/zext) from UserChain.
668 unsigned NewSize = 0;
669 for (User *I : UserChain) {
670 if (I != nullptr) {
671 UserChain[NewSize] = I;
672 NewSize++;
673 }
674 }
675 UserChain.resize(NewSize);
676 return removeConstOffset(UserChain.size() - 1);
677}
678
679Value *
680ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned ChainIndex) {
681 User *U = UserChain[ChainIndex];
682 if (ChainIndex == 0) {
683 assert(isa<ConstantInt>(U));
684 // If U is a ConstantInt, applyExts will return a ConstantInt as well.
685 return UserChain[ChainIndex] = cast<ConstantInt>(applyExts(U));
686 }
687
688 if (CastInst *Cast = dyn_cast<CastInst>(U)) {
689 assert(
690 (isa<SExtInst>(Cast) || isa<ZExtInst>(Cast) || isa<TruncInst>(Cast)) &&
691 "Only following instructions can be traced: sext, zext & trunc");
692 ExtInsts.push_back(Cast);
693 UserChain[ChainIndex] = nullptr;
694 return distributeExtsAndCloneChain(ChainIndex - 1);
695 }
696
697 // Function find only trace into BinaryOperator and CastInst.
698 BinaryOperator *BO = cast<BinaryOperator>(U);
699 // OpNo = which operand of BO is UserChain[ChainIndex - 1]
700 unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
701 Value *TheOther = applyExts(BO->getOperand(1 - OpNo));
702 Value *NextInChain = distributeExtsAndCloneChain(ChainIndex - 1);
703
704 BinaryOperator *NewBO = nullptr;
705 if (OpNo == 0) {
706 NewBO = BinaryOperator::Create(BO->getOpcode(), NextInChain, TheOther,
707 BO->getName(), IP);
708 } else {
709 NewBO = BinaryOperator::Create(BO->getOpcode(), TheOther, NextInChain,
710 BO->getName(), IP);
711 }
712 return UserChain[ChainIndex] = NewBO;
713}
714
715Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
716 if (ChainIndex == 0) {
717 assert(isa<ConstantInt>(UserChain[ChainIndex]));
718 return ConstantInt::getNullValue(UserChain[ChainIndex]->getType());
719 }
720
721 BinaryOperator *BO = cast<BinaryOperator>(UserChain[ChainIndex]);
722 assert((BO->use_empty() || BO->hasOneUse()) &&
723 "distributeExtsAndCloneChain clones each BinaryOperator in "
724 "UserChain, so no one should be used more than "
725 "once");
726
727 unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
728 assert(BO->getOperand(OpNo) == UserChain[ChainIndex - 1]);
729 Value *NextInChain = removeConstOffset(ChainIndex - 1);
730 Value *TheOther = BO->getOperand(1 - OpNo);
731
732 // If NextInChain is 0 and not the LHS of a sub, we can simplify the
733 // sub-expression to be just TheOther.
734 if (ConstantInt *CI = dyn_cast<ConstantInt>(NextInChain)) {
735 if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == 0))
736 return TheOther;
737 }
738
740 if (BO->getOpcode() == Instruction::Or) {
741 // Rebuild "or" as "add", because "or" may be invalid for the new
742 // expression.
743 //
744 // For instance, given
745 // a | (b + 5) where a and b + 5 have no common bits,
746 // we can extract 5 as the constant offset.
747 //
748 // However, reusing the "or" in the new index would give us
749 // (a | b) + 5
750 // which does not equal a | (b + 5).
751 //
752 // Replacing the "or" with "add" is fine, because
753 // a | (b + 5) = a + (b + 5) = (a + b) + 5
754 NewOp = Instruction::Add;
755 }
756
757 BinaryOperator *NewBO;
758 if (OpNo == 0) {
759 NewBO = BinaryOperator::Create(NewOp, NextInChain, TheOther, "", IP);
760 } else {
761 NewBO = BinaryOperator::Create(NewOp, TheOther, NextInChain, "", IP);
762 }
763 NewBO->takeName(BO);
764 return NewBO;
765}
766
767Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,
768 User *&UserChainTail,
769 const DominatorTree *DT) {
770 ConstantOffsetExtractor Extractor(GEP, DT);
771 // Find a non-zero constant offset first.
772 APInt ConstantOffset =
773 Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
774 GEP->isInBounds());
775 if (ConstantOffset == 0) {
776 UserChainTail = nullptr;
777 return nullptr;
778 }
779 // Separates the constant offset from the GEP index.
780 Value *IdxWithoutConstOffset = Extractor.rebuildWithoutConstOffset();
781 UserChainTail = Extractor.UserChain.back();
782 return IdxWithoutConstOffset;
783}
784
785int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP,
786 const DominatorTree *DT) {
787 // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.
788 return ConstantOffsetExtractor(GEP, DT)
789 .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
790 GEP->isInBounds())
791 .getSExtValue();
792}
793
794bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToPointerSize(
796 bool Changed = false;
797 Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
799 for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end();
800 I != E; ++I, ++GTI) {
801 // Skip struct member indices which must be i32.
802 if (GTI.isSequential()) {
803 if ((*I)->getType() != IntPtrTy) {
804 *I = CastInst::CreateIntegerCast(*I, IntPtrTy, true, "idxprom", GEP);
805 Changed = true;
806 }
807 }
808 }
809 return Changed;
810}
811
812int64_t
813SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
814 bool &NeedsExtraction) {
815 NeedsExtraction = false;
816 int64_t AccumulativeByteOffset = 0;
818 for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
819 if (GTI.isSequential()) {
820 // Constant offsets of scalable types are not really constant.
821 if (isa<ScalableVectorType>(GTI.getIndexedType()))
822 continue;
823
824 // Tries to extract a constant offset from this GEP index.
825 int64_t ConstantOffset =
826 ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP, DT);
827 if (ConstantOffset != 0) {
828 NeedsExtraction = true;
829 // A GEP may have multiple indices. We accumulate the extracted
830 // constant offset to a byte offset, and later offset the remainder of
831 // the original GEP with this byte offset.
832 AccumulativeByteOffset +=
833 ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType());
834 }
835 } else if (LowerGEP) {
836 StructType *StTy = GTI.getStructType();
837 uint64_t Field = cast<ConstantInt>(GEP->getOperand(I))->getZExtValue();
838 // Skip field 0 as the offset is always 0.
839 if (Field != 0) {
840 NeedsExtraction = true;
841 AccumulativeByteOffset +=
842 DL->getStructLayout(StTy)->getElementOffset(Field);
843 }
844 }
845 }
846 return AccumulativeByteOffset;
847}
848
849void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
850 GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) {
851 IRBuilder<> Builder(Variadic);
852 Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
853
854 Type *I8PtrTy =
855 Builder.getInt8PtrTy(Variadic->getType()->getPointerAddressSpace());
856 Value *ResultPtr = Variadic->getOperand(0);
857 Loop *L = LI->getLoopFor(Variadic->getParent());
858 // Check if the base is not loop invariant or used more than once.
859 bool isSwapCandidate =
860 L && L->isLoopInvariant(ResultPtr) &&
861 !hasMoreThanOneUseInLoop(ResultPtr, L);
862 Value *FirstResult = nullptr;
863
864 if (ResultPtr->getType() != I8PtrTy)
865 ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
866
867 gep_type_iterator GTI = gep_type_begin(*Variadic);
868 // Create an ugly GEP for each sequential index. We don't create GEPs for
869 // structure indices, as they are accumulated in the constant offset index.
870 for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
871 if (GTI.isSequential()) {
872 Value *Idx = Variadic->getOperand(I);
873 // Skip zero indices.
874 if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
875 if (CI->isZero())
876 continue;
877
878 APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
879 DL->getTypeAllocSize(GTI.getIndexedType()));
880 // Scale the index by element size.
881 if (ElementSize != 1) {
882 if (ElementSize.isPowerOf2()) {
883 Idx = Builder.CreateShl(
884 Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2()));
885 } else {
886 Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize));
887 }
888 }
889 // Create an ugly GEP with a single index for each index.
890 ResultPtr =
891 Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Idx, "uglygep");
892 if (FirstResult == nullptr)
893 FirstResult = ResultPtr;
894 }
895 }
896
897 // Create a GEP with the constant offset index.
898 if (AccumulativeByteOffset != 0) {
899 Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset);
900 ResultPtr =
901 Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep");
902 } else
903 isSwapCandidate = false;
904
905 // If we created a GEP with constant index, and the base is loop invariant,
906 // then we swap the first one with it, so LICM can move constant GEP out
907 // later.
908 auto *FirstGEP = dyn_cast_or_null<GetElementPtrInst>(FirstResult);
909 auto *SecondGEP = dyn_cast<GetElementPtrInst>(ResultPtr);
910 if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L))
911 swapGEPOperand(FirstGEP, SecondGEP);
912
913 if (ResultPtr->getType() != Variadic->getType())
914 ResultPtr = Builder.CreateBitCast(ResultPtr, Variadic->getType());
915
916 Variadic->replaceAllUsesWith(ResultPtr);
917 Variadic->eraseFromParent();
918}
919
920void
921SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
922 int64_t AccumulativeByteOffset) {
923 IRBuilder<> Builder(Variadic);
924 Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
925
926 Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy);
927 gep_type_iterator GTI = gep_type_begin(*Variadic);
928 // Create ADD/SHL/MUL arithmetic operations for each sequential indices. We
929 // don't create arithmetics for structure indices, as they are accumulated
930 // in the constant offset index.
931 for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
932 if (GTI.isSequential()) {
933 Value *Idx = Variadic->getOperand(I);
934 // Skip zero indices.
935 if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
936 if (CI->isZero())
937 continue;
938
939 APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
940 DL->getTypeAllocSize(GTI.getIndexedType()));
941 // Scale the index by element size.
942 if (ElementSize != 1) {
943 if (ElementSize.isPowerOf2()) {
944 Idx = Builder.CreateShl(
945 Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2()));
946 } else {
947 Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize));
948 }
949 }
950 // Create an ADD for each index.
951 ResultPtr = Builder.CreateAdd(ResultPtr, Idx);
952 }
953 }
954
955 // Create an ADD for the constant offset index.
956 if (AccumulativeByteOffset != 0) {
957 ResultPtr = Builder.CreateAdd(
958 ResultPtr, ConstantInt::get(IntPtrTy, AccumulativeByteOffset));
959 }
960
961 ResultPtr = Builder.CreateIntToPtr(ResultPtr, Variadic->getType());
962 Variadic->replaceAllUsesWith(ResultPtr);
963 Variadic->eraseFromParent();
964}
965
966bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
967 // Skip vector GEPs.
968 if (GEP->getType()->isVectorTy())
969 return false;
970
971 // The backend can already nicely handle the case where all indices are
972 // constant.
973 if (GEP->hasAllConstantIndices())
974 return false;
975
976 bool Changed = canonicalizeArrayIndicesToPointerSize(GEP);
977
978 bool NeedsExtraction;
979 int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction);
980
981 if (!NeedsExtraction)
982 return Changed;
983
984 TargetTransformInfo &TTI = GetTTI(*GEP->getFunction());
985
986 // If LowerGEP is disabled, before really splitting the GEP, check whether the
987 // backend supports the addressing mode we are about to produce. If no, this
988 // splitting probably won't be beneficial.
989 // If LowerGEP is enabled, even the extracted constant offset can not match
990 // the addressing mode, we can still do optimizations to other lowered parts
991 // of variable indices. Therefore, we don't check for addressing modes in that
992 // case.
993 if (!LowerGEP) {
994 unsigned AddrSpace = GEP->getPointerAddressSpace();
995 if (!TTI.isLegalAddressingMode(GEP->getResultElementType(),
996 /*BaseGV=*/nullptr, AccumulativeByteOffset,
997 /*HasBaseReg=*/true, /*Scale=*/0,
998 AddrSpace)) {
999 return Changed;
1000 }
1001 }
1002
1003 // Remove the constant offset in each sequential index. The resultant GEP
1004 // computes the variadic base.
1005 // Notice that we don't remove struct field indices here. If LowerGEP is
1006 // disabled, a structure index is not accumulated and we still use the old
1007 // one. If LowerGEP is enabled, a structure index is accumulated in the
1008 // constant offset. LowerToSingleIndexGEPs or lowerToArithmetics will later
1009 // handle the constant offset and won't need a new structure index.
1011 for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
1012 if (GTI.isSequential()) {
1013 // Constant offsets of scalable types are not really constant.
1014 if (isa<ScalableVectorType>(GTI.getIndexedType()))
1015 continue;
1016
1017 // Splits this GEP index into a variadic part and a constant offset, and
1018 // uses the variadic part as the new index.
1019 Value *OldIdx = GEP->getOperand(I);
1020 User *UserChainTail;
1021 Value *NewIdx =
1022 ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail, DT);
1023 if (NewIdx != nullptr) {
1024 // Switches to the index with the constant offset removed.
1025 GEP->setOperand(I, NewIdx);
1026 // After switching to the new index, we can garbage-collect UserChain
1027 // and the old index if they are not used.
1030 }
1031 }
1032 }
1033
1034 // Clear the inbounds attribute because the new index may be off-bound.
1035 // e.g.,
1036 //
1037 // b = add i64 a, 5
1038 // addr = gep inbounds float, float* p, i64 b
1039 //
1040 // is transformed to:
1041 //
1042 // addr2 = gep float, float* p, i64 a ; inbounds removed
1043 // addr = gep inbounds float, float* addr2, i64 5
1044 //
1045 // If a is -4, although the old index b is in bounds, the new index a is
1046 // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
1047 // inbounds keyword is not present, the offsets are added to the base
1048 // address with silently-wrapping two's complement arithmetic".
1049 // Therefore, the final code will be a semantically equivalent.
1050 //
1051 // TODO(jingyue): do some range analysis to keep as many inbounds as
1052 // possible. GEPs with inbounds are more friendly to alias analysis.
1053 bool GEPWasInBounds = GEP->isInBounds();
1054 GEP->setIsInBounds(false);
1055
1056 // Lowers a GEP to either GEPs with a single index or arithmetic operations.
1057 if (LowerGEP) {
1058 // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to
1059 // arithmetic operations if the target uses alias analysis in codegen.
1060 if (TTI.useAA())
1061 lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);
1062 else
1063 lowerToArithmetics(GEP, AccumulativeByteOffset);
1064 return true;
1065 }
1066
1067 // No need to create another GEP if the accumulative byte offset is 0.
1068 if (AccumulativeByteOffset == 0)
1069 return true;
1070
1071 // Offsets the base with the accumulative byte offset.
1072 //
1073 // %gep ; the base
1074 // ... %gep ...
1075 //
1076 // => add the offset
1077 //
1078 // %gep2 ; clone of %gep
1079 // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
1080 // %gep ; will be removed
1081 // ... %gep ...
1082 //
1083 // => replace all uses of %gep with %new.gep and remove %gep
1084 //
1085 // %gep2 ; clone of %gep
1086 // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
1087 // ... %new.gep ...
1088 //
1089 // If AccumulativeByteOffset is not a multiple of sizeof(*%gep), we emit an
1090 // uglygep (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep):
1091 // bitcast %gep2 to i8*, add the offset, and bitcast the result back to the
1092 // type of %gep.
1093 //
1094 // %gep2 ; clone of %gep
1095 // %0 = bitcast %gep2 to i8*
1096 // %uglygep = gep %0, <offset>
1097 // %new.gep = bitcast %uglygep to <type of %gep>
1098 // ... %new.gep ...
1099 Instruction *NewGEP = GEP->clone();
1100 NewGEP->insertBefore(GEP);
1101
1102 // Per ANSI C standard, signed / unsigned = unsigned and signed % unsigned =
1103 // unsigned.. Therefore, we cast ElementTypeSizeOfGEP to signed because it is
1104 // used with unsigned integers later.
1105 int64_t ElementTypeSizeOfGEP = static_cast<int64_t>(
1106 DL->getTypeAllocSize(GEP->getResultElementType()));
1107 Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
1108 if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) {
1109 // Very likely. As long as %gep is naturally aligned, the byte offset we
1110 // extracted should be a multiple of sizeof(*%gep).
1111 int64_t Index = AccumulativeByteOffset / ElementTypeSizeOfGEP;
1112 NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
1113 ConstantInt::get(IntPtrTy, Index, true),
1114 GEP->getName(), GEP);
1115 NewGEP->copyMetadata(*GEP);
1116 // Inherit the inbounds attribute of the original GEP.
1117 cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
1118 } else {
1119 // Unlikely but possible. For example,
1120 // #pragma pack(1)
1121 // struct S {
1122 // int a[3];
1123 // int64 b[8];
1124 // };
1125 // #pragma pack()
1126 //
1127 // Suppose the gep before extraction is &s[i + 1].b[j + 3]. After
1128 // extraction, it becomes &s[i].b[j] and AccumulativeByteOffset is
1129 // sizeof(S) + 3 * sizeof(int64) = 100, which is not a multiple of
1130 // sizeof(int64).
1131 //
1132 // Emit an uglygep in this case.
1134 Type *I8PtrTy =
1135 Builder.getInt8Ty()->getPointerTo(GEP->getPointerAddressSpace());
1136
1137 NewGEP = cast<Instruction>(Builder.CreateGEP(
1138 Builder.getInt8Ty(), Builder.CreateBitCast(NewGEP, I8PtrTy),
1139 {ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true)}, "uglygep",
1140 GEPWasInBounds));
1141
1142 NewGEP->copyMetadata(*GEP);
1143 NewGEP = cast<Instruction>(Builder.CreateBitCast(NewGEP, GEP->getType()));
1144 }
1145
1146 GEP->replaceAllUsesWith(NewGEP);
1147 GEP->eraseFromParent();
1148
1149 return true;
1150}
1151
1152bool SeparateConstOffsetFromGEPLegacyPass::runOnFunction(Function &F) {
1153 if (skipFunction(F))
1154 return false;
1155 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1156 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1157 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1158 auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1159 auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
1160 return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1161 };
1162 SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP);
1163 return Impl.run(F);
1164}
1165
1166bool SeparateConstOffsetFromGEP::run(Function &F) {
1168 return false;
1169
1170 DL = &F.getParent()->getDataLayout();
1171 bool Changed = false;
1172 for (BasicBlock &B : F) {
1173 if (!DT->isReachableFromEntry(&B))
1174 continue;
1175
1177 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I))
1178 Changed |= splitGEP(GEP);
1179 // No need to split GEP ConstantExprs because all its indices are constant
1180 // already.
1181 }
1182
1183 Changed |= reuniteExts(F);
1184
1185 if (VerifyNoDeadCode)
1186 verifyNoDeadCode(F);
1187
1188 return Changed;
1189}
1190
1191Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(
1192 const SCEV *Key, Instruction *Dominatee,
1193 DenseMap<const SCEV *, SmallVector<Instruction *, 2>> &DominatingExprs) {
1194 auto Pos = DominatingExprs.find(Key);
1195 if (Pos == DominatingExprs.end())
1196 return nullptr;
1197
1198 auto &Candidates = Pos->second;
1199 // Because we process the basic blocks in pre-order of the dominator tree, a
1200 // candidate that doesn't dominate the current instruction won't dominate any
1201 // future instruction either. Therefore, we pop it out of the stack. This
1202 // optimization makes the algorithm O(n).
1203 while (!Candidates.empty()) {
1204 Instruction *Candidate = Candidates.back();
1205 if (DT->dominates(Candidate, Dominatee))
1206 return Candidate;
1207 Candidates.pop_back();
1208 }
1209 return nullptr;
1210}
1211
1212bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
1213 if (!SE->isSCEVable(I->getType()))
1214 return false;
1215
1216 // Dom: LHS+RHS
1217 // I: sext(LHS)+sext(RHS)
1218 // If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom).
1219 // TODO: handle zext
1220 Value *LHS = nullptr, *RHS = nullptr;
1221 if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
1222 if (LHS->getType() == RHS->getType()) {
1223 const SCEV *Key =
1224 SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
1225 if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingAdds)) {
1226 Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
1227 NewSExt->takeName(I);
1228 I->replaceAllUsesWith(NewSExt);
1230 return true;
1231 }
1232 }
1233 } else if (match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
1234 if (LHS->getType() == RHS->getType()) {
1235 const SCEV *Key = SE->getAddExpr(
1236 SE->getUnknown(LHS), SE->getNegativeSCEV(SE->getUnknown(RHS)));
1237 if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingSubs)) {
1238 Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
1239 NewSExt->takeName(I);
1240 I->replaceAllUsesWith(NewSExt);
1242 return true;
1243 }
1244 }
1245 }
1246
1247 // Add I to DominatingExprs if it's an add/sub that can't sign overflow.
1248 if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS)))) {
1250 const SCEV *Key =
1251 SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
1252 DominatingAdds[Key].push_back(I);
1253 }
1254 } else if (match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
1256 const SCEV *Key = SE->getAddExpr(
1257 SE->getUnknown(LHS), SE->getNegativeSCEV(SE->getUnknown(RHS)));
1258 DominatingSubs[Key].push_back(I);
1259 }
1260 }
1261 return false;
1262}
1263
1264bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
1265 bool Changed = false;
1266 DominatingAdds.clear();
1267 DominatingSubs.clear();
1268 for (const auto Node : depth_first(DT)) {
1269 BasicBlock *BB = Node->getBlock();
1271 Changed |= reuniteExts(&I);
1272 }
1273 return Changed;
1274}
1275
1276void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
1277 for (BasicBlock &B : F) {
1278 for (Instruction &I : B) {
1280 std::string ErrMessage;
1281 raw_string_ostream RSO(ErrMessage);
1282 RSO << "Dead instruction detected!\n" << I << "\n";
1283 llvm_unreachable(RSO.str().c_str());
1284 }
1285 }
1286 }
1287}
1288
1289bool SeparateConstOffsetFromGEP::isLegalToSwapOperand(
1290 GetElementPtrInst *FirstGEP, GetElementPtrInst *SecondGEP, Loop *CurLoop) {
1291 if (!FirstGEP || !FirstGEP->hasOneUse())
1292 return false;
1293
1294 if (!SecondGEP || FirstGEP->getParent() != SecondGEP->getParent())
1295 return false;
1296
1297 if (FirstGEP == SecondGEP)
1298 return false;
1299
1300 unsigned FirstNum = FirstGEP->getNumOperands();
1301 unsigned SecondNum = SecondGEP->getNumOperands();
1302 // Give up if the number of operands are not 2.
1303 if (FirstNum != SecondNum || FirstNum != 2)
1304 return false;
1305
1306 Value *FirstBase = FirstGEP->getOperand(0);
1307 Value *SecondBase = SecondGEP->getOperand(0);
1308 Value *FirstOffset = FirstGEP->getOperand(1);
1309 // Give up if the index of the first GEP is loop invariant.
1310 if (CurLoop->isLoopInvariant(FirstOffset))
1311 return false;
1312
1313 // Give up if base doesn't have same type.
1314 if (FirstBase->getType() != SecondBase->getType())
1315 return false;
1316
1317 Instruction *FirstOffsetDef = dyn_cast<Instruction>(FirstOffset);
1318
1319 // Check if the second operand of first GEP has constant coefficient.
1320 // For an example, for the following code, we won't gain anything by
1321 // hoisting the second GEP out because the second GEP can be folded away.
1322 // %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256
1323 // %67 = shl i64 %scevgep.sum.ur159, 2
1324 // %uglygep160 = getelementptr i8* %65, i64 %67
1325 // %uglygep161 = getelementptr i8* %uglygep160, i64 -1024
1326
1327 // Skip constant shift instruction which may be generated by Splitting GEPs.
1328 if (FirstOffsetDef && FirstOffsetDef->isShift() &&
1329 isa<ConstantInt>(FirstOffsetDef->getOperand(1)))
1330 FirstOffsetDef = dyn_cast<Instruction>(FirstOffsetDef->getOperand(0));
1331
1332 // Give up if FirstOffsetDef is an Add or Sub with constant.
1333 // Because it may not profitable at all due to constant folding.
1334 if (FirstOffsetDef)
1335 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FirstOffsetDef)) {
1336 unsigned opc = BO->getOpcode();
1337 if ((opc == Instruction::Add || opc == Instruction::Sub) &&
1338 (isa<ConstantInt>(BO->getOperand(0)) ||
1339 isa<ConstantInt>(BO->getOperand(1))))
1340 return false;
1341 }
1342 return true;
1343}
1344
1345bool SeparateConstOffsetFromGEP::hasMoreThanOneUseInLoop(Value *V, Loop *L) {
1346 int UsesInLoop = 0;
1347 for (User *U : V->users()) {
1348 if (Instruction *User = dyn_cast<Instruction>(U))
1349 if (L->contains(User))
1350 if (++UsesInLoop > 1)
1351 return true;
1352 }
1353 return false;
1354}
1355
1356void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,
1357 GetElementPtrInst *Second) {
1358 Value *Offset1 = First->getOperand(1);
1359 Value *Offset2 = Second->getOperand(1);
1360 First->setOperand(1, Offset2);
1361 Second->setOperand(1, Offset1);
1362
1363 // We changed p+o+c to p+c+o, p+c may not be inbound anymore.
1364 const DataLayout &DAL = First->getModule()->getDataLayout();
1366 cast<PointerType>(First->getType())->getAddressSpace()),
1367 0);
1368 Value *NewBase =
1369 First->stripAndAccumulateInBoundsConstantOffsets(DAL, Offset);
1370 uint64_t ObjectSize;
1371 if (!getObjectSize(NewBase, ObjectSize, DAL, TLI) ||
1372 Offset.ugt(ObjectSize)) {
1373 First->setIsInBounds(false);
1374 Second->setIsInBounds(false);
1375 } else
1376 First->setIsInBounds(true);
1377}
1378
1380 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1382 ->printPipeline(OS, MapClassName2PassName);
1383 OS << '<';
1384 if (LowerGEP)
1385 OS << "lower-gep";
1386 OS << '>';
1387}
1388
1391 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1392 auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
1393 auto *LI = &AM.getResult<LoopAnalysis>(F);
1394 auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
1395 auto GetTTI = [&AM](Function &F) -> TargetTransformInfo & {
1396 return AM.getResult<TargetIRAnalysis>(F);
1397 };
1398 SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP);
1399 if (!Impl.run(F))
1400 return PreservedAnalyses::all();
1403 return PA;
1404}
aarch64 promote const
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
assume Assume Builder
for(auto &MBB :MF)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
@ NonNegative
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
Hexagon Common GEP
static const T * Find(StringRef S, ArrayRef< T > A)
Find KV in array using binary search.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
static cl::opt< bool > DisableSeparateConstOffsetFromGEP("disable-separate-const-offset-from-gep", cl::init(false), cl::desc("Do not separate the constant offset from a GEP instruction"), cl::Hidden)
static cl::opt< bool > VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false), cl::desc("Verify this pass produces no dead code"), cl::Hidden)
separate const offset from gep
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:75
unsigned logBase2() const
Definition: APInt.h:1707
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:432
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
BinaryOps getOpcode() const
Definition: InstrTypes.h:391
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:428
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1964
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:416
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:314
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:940
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:966
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2558
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:88
const BasicBlock * getParent() const
Definition: Instruction.h:90
bool isShift() const
Definition: Instruction.h:175
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1268
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1293
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
This class represents an analyzed expression in the program.
This class represents a sign extension of integer types.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Class to represent struct types.
Definition: DerivedTypes.h:213
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
bool use_empty() const
Definition: Value.h:344
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:381
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Key
PAL metadata keys.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:979
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWSub(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:991
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1839
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:537
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:748
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:398
bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:511
bool programUndefinedIfPoison(const Instruction *Inst)
void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &)
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if LHS and RHS have no common bits set.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:184
gep_type_iterator gep_type_begin(const User *GEP)
iterator_range< df_iterator< T > > depth_first(const T &G)
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:371