LLVM 22.0.0git
StraightLineStrengthReduce.cpp
Go to the documentation of this file.
1//===- StraightLineStrengthReduce.cpp - -----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements straight-line strength reduction (SLSR). Unlike loop
10// strength reduction, this algorithm is designed to reduce arithmetic
11// redundancy in straight-line code instead of loops. It has proven to be
12// effective in simplifying arithmetic statements derived from an unrolled loop.
13// It can also simplify the logic of SeparateConstOffsetFromGEP.
14//
15// There are many optimizations we can perform in the domain of SLSR.
16// We look for strength reduction candidates in the following forms:
17//
18// Form Add: B + i * S
19// Form Mul: (B + i) * S
20// Form GEP: &B[i * S]
21//
22// where S is an integer variable, and i is a constant integer. If we found two
23// candidates S1 and S2 in the same form and S1 dominates S2, we may rewrite S2
24// in a simpler way with respect to S1 (index delta). For example,
25//
26// S1: X = B + i * S
27// S2: Y = B + i' * S => X + (i' - i) * S
28//
29// S1: X = (B + i) * S
30// S2: Y = (B + i') * S => X + (i' - i) * S
31//
32// S1: X = &B[i * S]
33// S2: Y = &B[i' * S] => &X[(i' - i) * S]
34//
35// Note: (i' - i) * S is folded to the extent possible.
36//
37// For Add and GEP forms, we can also rewrite a candidate in a simpler way
38// with respect to other dominating candidates if their B or S are different
39// but other parts are the same. For example,
40//
41// Base Delta:
42// S1: X = B + i * S
43// S2: Y = B' + i * S => X + (B' - B)
44//
45// S1: X = &B [i * S]
46// S2: Y = &B'[i * S] => X + (B' - B)
47//
48// Stride Delta:
49// S1: X = B + i * S
50// S2: Y = B + i * S' => X + i * (S' - S)
51//
52// S1: X = &B[i * S]
53// S2: Y = &B[i * S'] => X + i * (S' - S)
54//
55// PS: Stride delta rewrite on Mul form is usually non-profitable, and Base
56// delta rewrite sometimes is profitable, so we do not support them on Mul.
57//
58// This rewriting is in general a good idea. The code patterns we focus on
59// usually come from loop unrolling, so the delta is likely the same
60// across iterations and can be reused. When that happens, the optimized form
61// takes only one add starting from the second iteration.
62//
63// When such rewriting is possible, we call S1 a "basis" of S2. When S2 has
64// multiple bases, we choose to rewrite S2 with respect to its "immediate"
65// basis, the basis that is the closest ancestor in the dominator tree.
66//
67// TODO:
68//
69// - Floating point arithmetics when fast math is enabled.
70
72#include "llvm/ADT/APInt.h"
74#include "llvm/ADT/SetVector.h"
80#include "llvm/IR/Constants.h"
81#include "llvm/IR/DataLayout.h"
83#include "llvm/IR/Dominators.h"
85#include "llvm/IR/IRBuilder.h"
86#include "llvm/IR/Instruction.h"
88#include "llvm/IR/Module.h"
89#include "llvm/IR/Operator.h"
91#include "llvm/IR/Type.h"
92#include "llvm/IR/Value.h"
94#include "llvm/Pass.h"
100#include <cassert>
101#include <cstdint>
102#include <limits>
103#include <list>
104#include <queue>
105#include <vector>
106
107using namespace llvm;
108using namespace PatternMatch;
109
110#define DEBUG_TYPE "slsr"
111
112static const unsigned UnknownAddressSpace =
113 std::numeric_limits<unsigned>::max();
114
115DEBUG_COUNTER(StraightLineStrengthReduceCounter, "slsr-counter",
116 "Controls whether rewriteCandidate is executed.");
117
118// Only for testing.
119static cl::opt<bool>
120 EnablePoisonReuseGuard("enable-poison-reuse-guard", cl::init(true),
121 cl::desc("Enable poison-reuse guard"));
122
123namespace {
124
125class StraightLineStrengthReduceLegacyPass : public FunctionPass {
126 const DataLayout *DL = nullptr;
127
128public:
129 static char ID;
130
131 StraightLineStrengthReduceLegacyPass() : FunctionPass(ID) {
134 }
135
136 void getAnalysisUsage(AnalysisUsage &AU) const override {
137 AU.addRequired<DominatorTreeWrapperPass>();
138 AU.addRequired<ScalarEvolutionWrapperPass>();
139 AU.addRequired<TargetTransformInfoWrapperPass>();
140 // We do not modify the shape of the CFG.
141 AU.setPreservesCFG();
142 }
143
144 bool doInitialization(Module &M) override {
145 DL = &M.getDataLayout();
146 return false;
147 }
148
149 bool runOnFunction(Function &F) override;
150};
151
152class StraightLineStrengthReduce {
153public:
154 StraightLineStrengthReduce(const DataLayout *DL, DominatorTree *DT,
155 ScalarEvolution *SE, TargetTransformInfo *TTI)
156 : DL(DL), DT(DT), SE(SE), TTI(TTI) {}
157
158 // SLSR candidate. Such a candidate must be in one of the forms described in
159 // the header comments.
160 struct Candidate {
161 enum Kind {
162 Invalid, // reserved for the default constructor
163 Add, // B + i * S
164 Mul, // (B + i) * S
165 GEP, // &B[..][i * S][..]
166 };
167
168 enum DKind {
169 InvalidDelta, // reserved for the default constructor
170 IndexDelta, // Delta is a constant from Index
171 BaseDelta, // Delta is a constant or variable from Base
172 StrideDelta, // Delta is a constant or variable from Stride
173 };
174
175 Candidate() = default;
176 Candidate(Kind CT, const SCEV *B, ConstantInt *Idx, Value *S,
177 Instruction *I, const SCEV *StrideSCEV)
178 : CandidateKind(CT), Base(B), Index(Idx), Stride(S), Ins(I),
179 StrideSCEV(StrideSCEV) {}
180
181 Kind CandidateKind = Invalid;
182
183 const SCEV *Base = nullptr;
184 // TODO: Swap Index and Stride's name.
185 // Note that Index and Stride of a GEP candidate do not necessarily have the
186 // same integer type. In that case, during rewriting, Stride will be
187 // sign-extended or truncated to Index's type.
188 ConstantInt *Index = nullptr;
189
190 Value *Stride = nullptr;
191
192 // The instruction this candidate corresponds to. It helps us to rewrite a
193 // candidate with respect to its immediate basis. Note that one instruction
194 // can correspond to multiple candidates depending on how you associate the
195 // expression. For instance,
196 //
197 // (a + 1) * (b + 2)
198 //
199 // can be treated as
200 //
201 // <Base: a, Index: 1, Stride: b + 2>
202 //
203 // or
204 //
205 // <Base: b, Index: 2, Stride: a + 1>
206 Instruction *Ins = nullptr;
207
208 // Points to the immediate basis of this candidate, or nullptr if we cannot
209 // find any basis for this candidate.
210 Candidate *Basis = nullptr;
211
212 DKind DeltaKind = InvalidDelta;
213
214 // Store SCEV of Stride to compute delta from different strides
215 const SCEV *StrideSCEV = nullptr;
216
217 // Points to (Y - X) that will be used to rewrite this candidate.
218 Value *Delta = nullptr;
219
220 /// Cost model: Evaluate the computational efficiency of the candidate.
221 ///
222 /// Efficiency levels (higher is better):
223 /// ZeroInst (5) - [Variable] or [Const]
224 /// OneInstOneVar (4) - [Variable + Const] or [Variable * Const]
225 /// OneInstTwoVar (3) - [Variable + Variable] or [Variable * Variable]
226 /// TwoInstOneVar (2) - [Const + Const * Variable]
227 /// TwoInstTwoVar (1) - [Variable + Const * Variable]
228 enum EfficiencyLevel : unsigned {
229 Unknown = 0,
230 TwoInstTwoVar = 1,
231 TwoInstOneVar = 2,
232 OneInstTwoVar = 3,
233 OneInstOneVar = 4,
234 ZeroInst = 5
235 };
236
237 static EfficiencyLevel
238 getComputationEfficiency(Kind CandidateKind, const ConstantInt *Index,
239 const Value *Stride, const SCEV *Base = nullptr) {
240 bool IsConstantBase = false;
241 bool IsZeroBase = false;
242 // When evaluating the efficiency of a rewrite, if the Base's SCEV is
243 // not available, conservatively assume the base is not constant.
244 if (auto *ConstBase = dyn_cast_or_null<SCEVConstant>(Base)) {
245 IsConstantBase = true;
246 IsZeroBase = ConstBase->getValue()->isZero();
247 }
248
249 bool IsConstantStride = isa<ConstantInt>(Stride);
250 bool IsZeroStride =
251 IsConstantStride && cast<ConstantInt>(Stride)->isZero();
252 // All constants
253 if (IsConstantBase && IsConstantStride)
254 return ZeroInst;
255
256 // (Base + Index) * Stride
257 if (CandidateKind == Mul) {
258 if (IsZeroStride)
259 return ZeroInst;
260 if (Index->isZero())
261 return (IsConstantStride || IsConstantBase) ? OneInstOneVar
262 : OneInstTwoVar;
263
264 if (IsConstantBase)
265 return IsZeroBase && (Index->isOne() || Index->isMinusOne())
266 ? ZeroInst
267 : OneInstOneVar;
268
269 if (IsConstantStride) {
270 auto *CI = cast<ConstantInt>(Stride);
271 return (CI->isOne() || CI->isMinusOne()) ? OneInstOneVar
272 : TwoInstOneVar;
273 }
274 return TwoInstTwoVar;
275 }
276
277 // Base + Index * Stride
278 assert(CandidateKind == Add || CandidateKind == GEP);
279 if (Index->isZero() || IsZeroStride)
280 return ZeroInst;
281
282 bool IsSimpleIndex = Index->isOne() || Index->isMinusOne();
283
284 if (IsConstantBase)
285 return IsZeroBase ? (IsSimpleIndex ? ZeroInst : OneInstOneVar)
286 : (IsSimpleIndex ? OneInstOneVar : TwoInstOneVar);
287
288 if (IsConstantStride)
289 return IsZeroStride ? ZeroInst : OneInstOneVar;
290
291 if (IsSimpleIndex)
292 return OneInstTwoVar;
293
294 return TwoInstTwoVar;
295 }
296
297 // Evaluate if the given delta is profitable to rewrite this candidate.
298 bool isProfitableRewrite(const Value &Delta, const DKind DeltaKind) const {
299 // This function cannot accurately evaluate the profit of whole expression
300 // with context. A candidate (B + I * S) cannot express whether this
301 // instruction needs to compute on its own (I * S), which may be shared
302 // with other candidates or may need instructions to compute.
303 // If the rewritten form has the same strength, still rewrite to
304 // (X + Delta) since it may expose more CSE opportunities on Delta, as
305 // unrolled loops usually have identical Delta for each unrolled body.
306 //
307 // Note, this function should only be used on Index Delta rewrite.
308 // Base and Stride delta need context info to evaluate the register
309 // pressure impact from variable delta.
310 return getComputationEfficiency(CandidateKind, Index, Stride, Base) <=
311 getRewriteEfficiency(Delta, DeltaKind);
312 }
313
314 // Evaluate the rewrite efficiency of this candidate with its Basis
315 EfficiencyLevel getRewriteEfficiency() const {
316 return Basis ? getRewriteEfficiency(*Delta, DeltaKind) : Unknown;
317 }
318
319 // Evaluate the rewrite efficiency of this candidate with a given delta
320 EfficiencyLevel getRewriteEfficiency(const Value &Delta,
321 const DKind DeltaKind) const {
322 switch (DeltaKind) {
323 case BaseDelta: // [X + Delta]
324 return getComputationEfficiency(
325 CandidateKind,
326 ConstantInt::get(cast<IntegerType>(Delta.getType()), 1), &Delta);
327 case StrideDelta: // [X + Index * Delta]
328 return getComputationEfficiency(CandidateKind, Index, &Delta);
329 case IndexDelta: // [X + Delta * Stride]
330 return getComputationEfficiency(CandidateKind,
331 cast<ConstantInt>(&Delta), Stride);
332 default:
333 return Unknown;
334 }
335 }
336
337 bool isHighEfficiency() const {
338 return getComputationEfficiency(CandidateKind, Index, Stride, Base) >=
339 OneInstOneVar;
340 }
341
342 // Verify that this candidate has valid delta components relative to the
343 // basis
344 bool hasValidDelta(const Candidate &Basis) const {
345 switch (DeltaKind) {
346 case IndexDelta:
347 // Index differs, Base and Stride must match
348 return Base == Basis.Base && StrideSCEV == Basis.StrideSCEV;
349 case StrideDelta:
350 // Stride differs, Base and Index must match
351 return Base == Basis.Base && Index == Basis.Index;
352 case BaseDelta:
353 // Base differs, Stride and Index must match
354 return StrideSCEV == Basis.StrideSCEV && Index == Basis.Index;
355 default:
356 return false;
357 }
358 }
359 };
360
361 bool runOnFunction(Function &F);
362
363private:
364 // Fetch straight-line basis for rewriting C, update C.Basis to point to it,
365 // and store the delta between C and its Basis in C.Delta.
366 void setBasisAndDeltaFor(Candidate &C);
367 // Returns whether the candidate can be folded into an addressing mode.
368 bool isFoldable(const Candidate &C, TargetTransformInfo *TTI);
369
370 // Checks whether I is in a candidate form. If so, adds all the matching forms
371 // to Candidates, and tries to find the immediate basis for each of them.
372 void allocateCandidatesAndFindBasis(Instruction *I);
373
374 // Allocate candidates and find bases for Add instructions.
375 void allocateCandidatesAndFindBasisForAdd(Instruction *I);
376
377 // Given I = LHS + RHS, factors RHS into i * S and makes (LHS + i * S) a
378 // candidate.
379 void allocateCandidatesAndFindBasisForAdd(Value *LHS, Value *RHS,
380 Instruction *I);
381 // Allocate candidates and find bases for Mul instructions.
382 void allocateCandidatesAndFindBasisForMul(Instruction *I);
383
384 // Splits LHS into Base + Index and, if succeeds, calls
385 // allocateCandidatesAndFindBasis.
386 void allocateCandidatesAndFindBasisForMul(Value *LHS, Value *RHS,
387 Instruction *I);
388
389 // Allocate candidates and find bases for GetElementPtr instructions.
390 void allocateCandidatesAndFindBasisForGEP(GetElementPtrInst *GEP);
391
392 // Adds the given form <CT, B, Idx, S> to Candidates, and finds its immediate
393 // basis.
394 void allocateCandidatesAndFindBasis(Candidate::Kind CT, const SCEV *B,
395 ConstantInt *Idx, Value *S,
396 Instruction *I);
397
398 // Rewrites candidate C with respect to Basis.
399 void rewriteCandidate(const Candidate &C);
400
401 // Emit code that computes the "bump" from Basis to C.
402 static Value *emitBump(const Candidate &Basis, const Candidate &C,
403 IRBuilder<> &Builder, const DataLayout *DL);
404
405 const DataLayout *DL = nullptr;
406 DominatorTree *DT = nullptr;
407 ScalarEvolution *SE;
408 TargetTransformInfo *TTI = nullptr;
409 std::list<Candidate> Candidates;
410
411 // Map from SCEV to instructions that represent the value,
412 // instructions are sorted in depth-first order.
413 DenseMap<const SCEV *, SmallSetVector<Instruction *, 2>> SCEVToInsts;
414
415 // Record the dependency between instructions. If C.Basis == B, we would have
416 // {B.Ins -> {C.Ins, ...}}.
417 MapVector<Instruction *, std::vector<Instruction *>> DependencyGraph;
418
419 // Map between each instruction and its possible candidates.
420 DenseMap<Instruction *, SmallVector<Candidate *, 3>> RewriteCandidates;
421
422 // All instructions that have candidates sort in topological order based on
423 // dependency graph, from roots to leaves.
424 std::vector<Instruction *> SortedCandidateInsts;
425
426 // Record all instructions that are already rewritten and will be removed
427 // later.
428 std::vector<Instruction *> DeadInstructions;
429
430 // Classify candidates against Delta kind
431 class CandidateDictTy {
432 public:
433 using CandsTy = SmallVector<Candidate *, 8>;
434 using BBToCandsTy = DenseMap<const BasicBlock *, CandsTy>;
435
436 private:
437 // Index delta Basis must have the same (Base, StrideSCEV, Inst.Type)
438 using IndexDeltaKeyTy = std::tuple<const SCEV *, const SCEV *, Type *>;
439 DenseMap<IndexDeltaKeyTy, BBToCandsTy> IndexDeltaCandidates;
440
441 // Base delta Basis must have the same (StrideSCEV, Index, Inst.Type)
442 using BaseDeltaKeyTy = std::tuple<const SCEV *, ConstantInt *, Type *>;
443 DenseMap<BaseDeltaKeyTy, BBToCandsTy> BaseDeltaCandidates;
444
445 // Stride delta Basis must have the same (Base, Index, Inst.Type)
446 using StrideDeltaKeyTy = std::tuple<const SCEV *, ConstantInt *, Type *>;
447 DenseMap<StrideDeltaKeyTy, BBToCandsTy> StrideDeltaCandidates;
448
449 public:
450 // TODO: Disable index delta on GEP after we completely move
451 // from typed GEP to PtrAdd.
452 const BBToCandsTy *getCandidatesWithDeltaKind(const Candidate &C,
453 Candidate::DKind K) const {
454 assert(K != Candidate::InvalidDelta);
455 if (K == Candidate::IndexDelta) {
456 IndexDeltaKeyTy IndexDeltaKey(C.Base, C.StrideSCEV, C.Ins->getType());
457 auto It = IndexDeltaCandidates.find(IndexDeltaKey);
458 if (It != IndexDeltaCandidates.end())
459 return &It->second;
460 } else if (K == Candidate::BaseDelta) {
461 BaseDeltaKeyTy BaseDeltaKey(C.StrideSCEV, C.Index, C.Ins->getType());
462 auto It = BaseDeltaCandidates.find(BaseDeltaKey);
463 if (It != BaseDeltaCandidates.end())
464 return &It->second;
465 } else {
466 assert(K == Candidate::StrideDelta);
467 StrideDeltaKeyTy StrideDeltaKey(C.Base, C.Index, C.Ins->getType());
468 auto It = StrideDeltaCandidates.find(StrideDeltaKey);
469 if (It != StrideDeltaCandidates.end())
470 return &It->second;
471 }
472 return nullptr;
473 }
474
475 // Pointers to C must remain valid until CandidateDict is cleared.
476 void add(Candidate &C) {
477 Type *ValueType = C.Ins->getType();
478 BasicBlock *BB = C.Ins->getParent();
479 IndexDeltaKeyTy IndexDeltaKey(C.Base, C.StrideSCEV, ValueType);
480 BaseDeltaKeyTy BaseDeltaKey(C.StrideSCEV, C.Index, ValueType);
481 StrideDeltaKeyTy StrideDeltaKey(C.Base, C.Index, ValueType);
482 IndexDeltaCandidates[IndexDeltaKey][BB].push_back(&C);
483 BaseDeltaCandidates[BaseDeltaKey][BB].push_back(&C);
484 StrideDeltaCandidates[StrideDeltaKey][BB].push_back(&C);
485 }
486 // Remove all mappings from set
487 void clear() {
488 IndexDeltaCandidates.clear();
489 BaseDeltaCandidates.clear();
490 StrideDeltaCandidates.clear();
491 }
492 } CandidateDict;
493
494 const SCEV *getAndRecordSCEV(Value *V) {
495 auto *S = SE->getSCEV(V);
498 SCEVToInsts[S].insert(cast<Instruction>(V));
499
500 return S;
501 }
502
503 bool candidatePredicate(Candidate *Basis, Candidate &C, Candidate::DKind K);
504
505 bool searchFrom(const CandidateDictTy::BBToCandsTy &BBToCands, Candidate &C,
506 Candidate::DKind K);
507
508 // Get the nearest instruction before CI that represents the value of S,
509 // return nullptr if no instruction is associated with S or S is not a
510 // reusable expression.
511 Value *getNearestValueOfSCEV(const SCEV *S, const Instruction *CI) const {
513 return nullptr;
514
515 if (auto *SU = dyn_cast<SCEVUnknown>(S))
516 return SU->getValue();
517 if (auto *SC = dyn_cast<SCEVConstant>(S))
518 return SC->getValue();
519
520 auto It = SCEVToInsts.find(S);
521 if (It == SCEVToInsts.end())
522 return nullptr;
523
524 // Instructions are sorted in depth-first order, so search for the nearest
525 // instruction by walking the list in reverse order.
526 for (Instruction *I : reverse(It->second))
527 if (DT->dominates(I, CI))
528 return I;
529
530 return nullptr;
531 }
532
533 struct DeltaInfo {
534 Candidate *Cand;
535 Candidate::DKind DeltaKind;
536 Value *Delta;
537
538 DeltaInfo()
539 : Cand(nullptr), DeltaKind(Candidate::InvalidDelta), Delta(nullptr) {}
540 DeltaInfo(Candidate *Cand, Candidate::DKind DeltaKind, Value *Delta)
541 : Cand(Cand), DeltaKind(DeltaKind), Delta(Delta) {}
542 operator bool() const { return Cand != nullptr; }
543 };
544
545 friend raw_ostream &operator<<(raw_ostream &OS, const DeltaInfo &DI);
546
547 DeltaInfo compressPath(Candidate &C, Candidate *Basis) const;
548
549 Candidate *pickRewriteCandidate(Instruction *I) const;
550 void sortCandidateInstructions();
551 Value *getDelta(const Candidate &C, const Candidate &Basis,
552 Candidate::DKind K) const;
553 static bool isSimilar(Candidate &C, Candidate &Basis, Candidate::DKind K);
554
555 // Add Basis -> C in DependencyGraph and propagate
556 // C.Stride and C.Delta's dependency to C
557 void addDependency(Candidate &C, Candidate *Basis) {
558 if (Basis)
559 DependencyGraph[Basis->Ins].emplace_back(C.Ins);
560
561 // If any candidate of Inst has a basis, then Inst will be rewritten,
562 // C must be rewritten after rewriting Inst, so we need to propagate
563 // the dependency to C
564 auto PropagateDependency = [&](Instruction *Inst) {
565 if (auto CandsIt = RewriteCandidates.find(Inst);
566 CandsIt != RewriteCandidates.end() &&
567 llvm::any_of(CandsIt->second,
568 [](Candidate *Cand) { return Cand->Basis; }))
569 DependencyGraph[Inst].emplace_back(C.Ins);
570 };
571
572 // If C has a variable delta and the delta is a candidate,
573 // propagate its dependency to C
574 if (auto *DeltaInst = dyn_cast_or_null<Instruction>(C.Delta))
575 PropagateDependency(DeltaInst);
576
577 // If the stride is a candidate, propagate its dependency to C
578 if (auto *StrideInst = dyn_cast<Instruction>(C.Stride))
579 PropagateDependency(StrideInst);
580 };
581};
582
584 const StraightLineStrengthReduce::Candidate &C) {
585 OS << "Ins: " << *C.Ins << "\n Base: " << *C.Base
586 << "\n Index: " << *C.Index << "\n Stride: " << *C.Stride
587 << "\n StrideSCEV: " << *C.StrideSCEV;
588 if (C.Basis)
589 OS << "\n Delta: " << *C.Delta << "\n Basis: \n [ " << *C.Basis << " ]";
590 return OS;
591}
592
593[[maybe_unused]] LLVM_DUMP_METHOD inline raw_ostream &
594operator<<(raw_ostream &OS, const StraightLineStrengthReduce::DeltaInfo &DI) {
595 OS << "Cand: " << *DI.Cand << "\n";
596 OS << "Delta Kind: ";
597 switch (DI.DeltaKind) {
598 case StraightLineStrengthReduce::Candidate::IndexDelta:
599 OS << "Index";
600 break;
601 case StraightLineStrengthReduce::Candidate::BaseDelta:
602 OS << "Base";
603 break;
604 case StraightLineStrengthReduce::Candidate::StrideDelta:
605 OS << "Stride";
606 break;
607 default:
608 break;
609 }
610 OS << "\nDelta: " << *DI.Delta;
611 return OS;
612}
613
614} // end anonymous namespace
615
616char StraightLineStrengthReduceLegacyPass::ID = 0;
617
618INITIALIZE_PASS_BEGIN(StraightLineStrengthReduceLegacyPass, "slsr",
619 "Straight line strength reduction", false, false)
623INITIALIZE_PASS_END(StraightLineStrengthReduceLegacyPass, "slsr",
624 "Straight line strength reduction", false, false)
625
627 return new StraightLineStrengthReduceLegacyPass();
628}
629
630// A helper function that unifies the bitwidth of A and B.
631static void unifyBitWidth(APInt &A, APInt &B) {
632 if (A.getBitWidth() < B.getBitWidth())
633 A = A.sext(B.getBitWidth());
634 else if (A.getBitWidth() > B.getBitWidth())
635 B = B.sext(A.getBitWidth());
636}
637
638Value *StraightLineStrengthReduce::getDelta(const Candidate &C,
639 const Candidate &Basis,
640 Candidate::DKind K) const {
641 if (K == Candidate::IndexDelta) {
642 APInt Idx = C.Index->getValue();
643 APInt BasisIdx = Basis.Index->getValue();
644 unifyBitWidth(Idx, BasisIdx);
645 APInt IndexDelta = Idx - BasisIdx;
646 IntegerType *DeltaType =
647 IntegerType::get(C.Ins->getContext(), IndexDelta.getBitWidth());
648 return ConstantInt::get(DeltaType, IndexDelta);
649 } else if (K == Candidate::BaseDelta || K == Candidate::StrideDelta) {
650 const SCEV *BasisPart =
651 (K == Candidate::BaseDelta) ? Basis.Base : Basis.StrideSCEV;
652 const SCEV *CandPart = (K == Candidate::BaseDelta) ? C.Base : C.StrideSCEV;
653 const SCEV *Diff = SE->getMinusSCEV(CandPart, BasisPart);
654 return getNearestValueOfSCEV(Diff, C.Ins);
655 }
656 return nullptr;
657}
658
659bool StraightLineStrengthReduce::isSimilar(Candidate &C, Candidate &Basis,
660 Candidate::DKind K) {
661 bool SameType = false;
662 switch (K) {
663 case Candidate::StrideDelta:
664 SameType = C.StrideSCEV->getType() == Basis.StrideSCEV->getType();
665 break;
666 case Candidate::BaseDelta:
667 SameType = C.Base->getType() == Basis.Base->getType();
668 break;
669 case Candidate::IndexDelta:
670 SameType = true;
671 break;
672 default:;
673 }
674 return SameType && Basis.Ins != C.Ins &&
675 Basis.CandidateKind == C.CandidateKind;
676}
677
678// Try to find a Delta that C can reuse Basis to rewrite.
679// Set C.Delta, C.Basis, and C.DeltaKind if found.
680// Return true if found a constant delta.
681// Return false if not found or the delta is not a constant.
682bool StraightLineStrengthReduce::candidatePredicate(Candidate *Basis,
683 Candidate &C,
684 Candidate::DKind K) {
685 SmallVector<Instruction *> DropPoisonGeneratingInsts;
686 // Ensure the IR of Basis->Ins is not more poisonous than its SCEV.
687 if (!isSimilar(C, *Basis, K) ||
689 !SE->canReuseInstruction(SE->getSCEV(Basis->Ins), Basis->Ins,
690 DropPoisonGeneratingInsts)))
691 return false;
692
693 assert(DT->dominates(Basis->Ins, C.Ins));
694 Value *Delta = getDelta(C, *Basis, K);
695 if (!Delta)
696 return false;
697
698 // IndexDelta rewrite is not always profitable, e.g.,
699 // X = B + 8 * S
700 // Y = B + S,
701 // rewriting Y to X - 7 * S is probably a bad idea.
702 // So, we need to check if the rewrite form's computation efficiency
703 // is better than the original form.
704 if (K == Candidate::IndexDelta &&
705 !C.isProfitableRewrite(*Delta, Candidate::IndexDelta))
706 return false;
707
708 // If there is a Delta that we can reuse Basis to rewrite C,
709 // clean up DropPoisonGeneratingInsts returned by successful
710 // SE->canReuseInstruction()
711 for (Instruction *I : DropPoisonGeneratingInsts)
712 I->dropPoisonGeneratingAnnotations();
713
714 // Record delta if none has been found yet, or the new delta is
715 // a constant that is better than the existing delta.
716 if (!C.Delta || isa<ConstantInt>(Delta)) {
717 C.Delta = Delta;
718 C.Basis = Basis;
719 C.DeltaKind = K;
720 }
721 return isa<ConstantInt>(C.Delta);
722}
723
724// return true if find a Basis with constant delta and stop searching,
725// return false if did not find a Basis or the delta is not a constant
726// and continue searching for a Basis with constant delta
727bool StraightLineStrengthReduce::searchFrom(
728 const CandidateDictTy::BBToCandsTy &BBToCands, Candidate &C,
729 Candidate::DKind K) {
730
731 // Stride delta rewrite on Mul form is usually non-profitable, and Base
732 // delta rewrite sometimes is profitable, so we do not support them on Mul.
733 if (C.CandidateKind == Candidate::Mul && K != Candidate::IndexDelta)
734 return false;
735
736 // Search dominating candidates by walking the immediate-dominator chain
737 // from the candidate's defining block upward. Visiting blocks in this
738 // order ensures we prefer the closest dominating basis.
739 const BasicBlock *BB = C.Ins->getParent();
740 while (BB) {
741 auto It = BBToCands.find(BB);
742 if (It != BBToCands.end())
743 for (Candidate *Basis : reverse(It->second))
744 if (candidatePredicate(Basis, C, K))
745 return true;
746
747 const DomTreeNode *Node = DT->getNode(BB);
748 if (!Node)
749 break;
750 Node = Node->getIDom();
751 BB = Node ? Node->getBlock() : nullptr;
752 }
753 return false;
754}
755
756void StraightLineStrengthReduce::setBasisAndDeltaFor(Candidate &C) {
757 if (const auto *BaseDeltaCandidates =
758 CandidateDict.getCandidatesWithDeltaKind(C, Candidate::BaseDelta))
759 if (searchFrom(*BaseDeltaCandidates, C, Candidate::BaseDelta)) {
760 LLVM_DEBUG(dbgs() << "Found delta from Base: " << *C.Delta << "\n");
761 return;
762 }
763
764 if (const auto *StrideDeltaCandidates =
765 CandidateDict.getCandidatesWithDeltaKind(C, Candidate::StrideDelta))
766 if (searchFrom(*StrideDeltaCandidates, C, Candidate::StrideDelta)) {
767 LLVM_DEBUG(dbgs() << "Found delta from Stride: " << *C.Delta << "\n");
768 return;
769 }
770
771 if (const auto *IndexDeltaCandidates =
772 CandidateDict.getCandidatesWithDeltaKind(C, Candidate::IndexDelta))
773 if (searchFrom(*IndexDeltaCandidates, C, Candidate::IndexDelta)) {
774 LLVM_DEBUG(dbgs() << "Found delta from Index: " << *C.Delta << "\n");
775 return;
776 }
777
778 // If we did not find a constant delta, we might have found a variable delta
779 if (C.Delta) {
780 LLVM_DEBUG({
781 dbgs() << "Found delta from ";
782 if (C.DeltaKind == Candidate::BaseDelta)
783 dbgs() << "Base: ";
784 else
785 dbgs() << "Stride: ";
786 dbgs() << *C.Delta << "\n";
787 });
788 assert(C.DeltaKind != Candidate::InvalidDelta && C.Basis);
789 }
790}
791
792// Compress the path from `Basis` to the deepest Basis in the Basis chain
793// to avoid non-profitable data dependency and improve ILP.
794// X = A + 1
795// Y = X + 1
796// Z = Y + 1
797// ->
798// X = A + 1
799// Y = A + 2
800// Z = A + 3
801// Return the delta info for C aginst the new Basis
802auto StraightLineStrengthReduce::compressPath(Candidate &C,
803 Candidate *Basis) const
804 -> DeltaInfo {
805 if (!Basis || !Basis->Basis || C.CandidateKind == Candidate::Mul)
806 return {};
807 Candidate *Root = Basis;
808 Value *NewDelta = nullptr;
809 auto NewKind = Candidate::InvalidDelta;
810
811 while (Root->Basis) {
812 Candidate *NextRoot = Root->Basis;
813 if (C.Base == NextRoot->Base && C.StrideSCEV == NextRoot->StrideSCEV &&
814 isSimilar(C, *NextRoot, Candidate::IndexDelta)) {
815 ConstantInt *CI =
816 cast<ConstantInt>(getDelta(C, *NextRoot, Candidate::IndexDelta));
817 if (CI->isZero() || CI->isOne() || isa<SCEVConstant>(C.StrideSCEV)) {
818 Root = NextRoot;
819 NewKind = Candidate::IndexDelta;
820 NewDelta = CI;
821 continue;
822 }
823 }
824
825 const SCEV *CandPart = nullptr;
826 const SCEV *BasisPart = nullptr;
827 auto CurrKind = Candidate::InvalidDelta;
828 if (C.Base == NextRoot->Base && C.Index == NextRoot->Index) {
829 CandPart = C.StrideSCEV;
830 BasisPart = NextRoot->StrideSCEV;
831 CurrKind = Candidate::StrideDelta;
832 } else if (C.StrideSCEV == NextRoot->StrideSCEV &&
833 C.Index == NextRoot->Index) {
834 CandPart = C.Base;
835 BasisPart = NextRoot->Base;
836 CurrKind = Candidate::BaseDelta;
837 } else
838 break;
839
840 assert(CandPart && BasisPart);
841 if (!isSimilar(C, *NextRoot, CurrKind))
842 break;
843
844 if (auto DeltaVal =
845 dyn_cast<SCEVConstant>(SE->getMinusSCEV(CandPart, BasisPart))) {
846 Root = NextRoot;
847 NewDelta = DeltaVal->getValue();
848 NewKind = CurrKind;
849 } else
850 break;
851 }
852
853 if (Root != Basis) {
854 assert(NewKind != Candidate::InvalidDelta && NewDelta);
855 LLVM_DEBUG(dbgs() << "Found new Basis with " << *NewDelta
856 << " from path compression.\n");
857 return {Root, NewKind, NewDelta};
858 }
859
860 return {};
861}
862
863// Topologically sort candidate instructions based on their relationship in
864// dependency graph.
865void StraightLineStrengthReduce::sortCandidateInstructions() {
866 SortedCandidateInsts.clear();
867 // An instruction may have multiple candidates that get different Basis
868 // instructions, and each candidate can get dependencies from Basis and
869 // Stride when Stride will also be rewritten by SLSR. Hence, an instruction
870 // may have multiple dependencies. Use InDegree to ensure all dependencies
871 // processed before processing itself.
872 DenseMap<Instruction *, int> InDegree;
873 for (auto &KV : DependencyGraph) {
874 InDegree.try_emplace(KV.first, 0);
875
876 for (auto *Child : KV.second) {
877 InDegree[Child]++;
878 }
879 }
880 std::queue<Instruction *> WorkList;
881 DenseSet<Instruction *> Visited;
882
883 for (auto &KV : DependencyGraph)
884 if (InDegree[KV.first] == 0)
885 WorkList.push(KV.first);
886
887 while (!WorkList.empty()) {
888 Instruction *I = WorkList.front();
889 WorkList.pop();
890 if (!Visited.insert(I).second)
891 continue;
892
893 SortedCandidateInsts.push_back(I);
894
895 for (auto *Next : DependencyGraph[I]) {
896 auto &Degree = InDegree[Next];
897 if (--Degree == 0)
898 WorkList.push(Next);
899 }
900 }
901
902 assert(SortedCandidateInsts.size() == DependencyGraph.size() &&
903 "Dependency graph should not have cycles");
904}
905
906auto StraightLineStrengthReduce::pickRewriteCandidate(Instruction *I) const
907 -> Candidate * {
908 // Return the candidate of instruction I that has the highest profit.
909 auto It = RewriteCandidates.find(I);
910 if (It == RewriteCandidates.end())
911 return nullptr;
912
913 Candidate *BestC = nullptr;
914 auto BestEfficiency = Candidate::Unknown;
915 for (Candidate *C : reverse(It->second))
916 if (C->Basis) {
917 auto Efficiency = C->getRewriteEfficiency();
918 if (Efficiency > BestEfficiency) {
919 BestEfficiency = Efficiency;
920 BestC = C;
921 }
922 }
923
924 return BestC;
925}
926
928 const TargetTransformInfo *TTI) {
929 SmallVector<const Value *, 4> Indices(GEP->indices());
930 return TTI->getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(),
932}
933
934// Returns whether (Base + Index * Stride) can be folded to an addressing mode.
935static bool isAddFoldable(const SCEV *Base, ConstantInt *Index, Value *Stride,
937 // Index->getSExtValue() may crash if Index is wider than 64-bit.
938 return Index->getBitWidth() <= 64 &&
939 TTI->isLegalAddressingMode(Base->getType(), nullptr, 0, true,
940 Index->getSExtValue(), UnknownAddressSpace);
941}
942
943bool StraightLineStrengthReduce::isFoldable(const Candidate &C,
944 TargetTransformInfo *TTI) {
945 if (C.CandidateKind == Candidate::Add)
946 return isAddFoldable(C.Base, C.Index, C.Stride, TTI);
947 if (C.CandidateKind == Candidate::GEP)
949 return false;
950}
951
952void StraightLineStrengthReduce::allocateCandidatesAndFindBasis(
953 Candidate::Kind CT, const SCEV *B, ConstantInt *Idx, Value *S,
954 Instruction *I) {
955 // Record the SCEV of S that we may use it as a variable delta.
956 // Ensure that we rewrite C with a existing IR that reproduces delta value.
957
958 Candidate C(CT, B, Idx, S, I, getAndRecordSCEV(S));
959 // If we can fold I into an addressing mode, computing I is likely free or
960 // takes only one instruction. So, we don't need to analyze or rewrite it.
961 //
962 // Currently, this algorithm can at best optimize complex computations into
963 // a `variable +/* constant` form. However, some targets have stricter
964 // constraints on the their addressing mode.
965 // For example, a `variable + constant` can only be folded to an addressing
966 // mode if the constant falls within a certain range.
967 // So, we also check if the instruction is already high efficient enough
968 // for the strength reduction algorithm.
969 if (!isFoldable(C, TTI) && !C.isHighEfficiency()) {
970 setBasisAndDeltaFor(C);
971
972 // Compress unnecessary rewrite to improve ILP
973 if (auto Res = compressPath(C, C.Basis)) {
974 C.Basis = Res.Cand;
975 C.DeltaKind = Res.DeltaKind;
976 C.Delta = Res.Delta;
977 }
978 }
979 // Regardless of whether we find a basis for C, we need to push C to the
980 // candidate list so that it can be the basis of other candidates.
981 LLVM_DEBUG(dbgs() << "Allocated Candidate: " << C << "\n");
982 Candidates.push_back(C);
983 RewriteCandidates[C.Ins].push_back(&Candidates.back());
984 CandidateDict.add(Candidates.back());
985}
986
987void StraightLineStrengthReduce::allocateCandidatesAndFindBasis(
988 Instruction *I) {
989 switch (I->getOpcode()) {
990 case Instruction::Add:
991 allocateCandidatesAndFindBasisForAdd(I);
992 break;
993 case Instruction::Mul:
994 allocateCandidatesAndFindBasisForMul(I);
995 break;
996 case Instruction::GetElementPtr:
997 allocateCandidatesAndFindBasisForGEP(cast<GetElementPtrInst>(I));
998 break;
999 }
1000}
1001
1002void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd(
1003 Instruction *I) {
1004 // Try matching B + i * S.
1005 if (!isa<IntegerType>(I->getType()))
1006 return;
1007
1008 assert(I->getNumOperands() == 2 && "isn't I an add?");
1009 Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
1010 allocateCandidatesAndFindBasisForAdd(LHS, RHS, I);
1011 if (LHS != RHS)
1012 allocateCandidatesAndFindBasisForAdd(RHS, LHS, I);
1013}
1014
1015void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd(
1016 Value *LHS, Value *RHS, Instruction *I) {
1017 Value *S = nullptr;
1018 ConstantInt *Idx = nullptr;
1019 if (match(RHS, m_Mul(m_Value(S), m_ConstantInt(Idx)))) {
1020 // I = LHS + RHS = LHS + Idx * S
1021 allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I);
1022 } else if (match(RHS, m_Shl(m_Value(S), m_ConstantInt(Idx)))) {
1023 // I = LHS + RHS = LHS + (S << Idx) = LHS + S * (1 << Idx)
1024 APInt One(Idx->getBitWidth(), 1);
1025 Idx = ConstantInt::get(Idx->getContext(), One << Idx->getValue());
1026 allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I);
1027 } else {
1028 // At least, I = LHS + 1 * RHS
1029 ConstantInt *One = ConstantInt::get(cast<IntegerType>(I->getType()), 1);
1030 allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), One, RHS,
1031 I);
1032 }
1033}
1034
1035// Returns true if A matches B + C where C is constant.
1036static bool matchesAdd(Value *A, Value *&B, ConstantInt *&C) {
1037 return match(A, m_c_Add(m_Value(B), m_ConstantInt(C)));
1038}
1039
1040// Returns true if A matches B | C where C is constant.
1041static bool matchesOr(Value *A, Value *&B, ConstantInt *&C) {
1042 return match(A, m_c_Or(m_Value(B), m_ConstantInt(C)));
1043}
1044
1045void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul(
1046 Value *LHS, Value *RHS, Instruction *I) {
1047 Value *B = nullptr;
1048 ConstantInt *Idx = nullptr;
1049 if (matchesAdd(LHS, B, Idx)) {
1050 // If LHS is in the form of "Base + Index", then I is in the form of
1051 // "(Base + Index) * RHS".
1052 allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I);
1053 } else if (matchesOr(LHS, B, Idx) && haveNoCommonBitsSet(B, Idx, *DL)) {
1054 // If LHS is in the form of "Base | Index" and Base and Index have no common
1055 // bits set, then
1056 // Base | Index = Base + Index
1057 // and I is thus in the form of "(Base + Index) * RHS".
1058 allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I);
1059 } else {
1060 // Otherwise, at least try the form (LHS + 0) * RHS.
1061 ConstantInt *Zero = ConstantInt::get(cast<IntegerType>(I->getType()), 0);
1062 allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(LHS), Zero, RHS,
1063 I);
1064 }
1065}
1066
1067void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul(
1068 Instruction *I) {
1069 // Try matching (B + i) * S.
1070 // TODO: we could extend SLSR to float and vector types.
1071 if (!isa<IntegerType>(I->getType()))
1072 return;
1073
1074 assert(I->getNumOperands() == 2 && "isn't I a mul?");
1075 Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
1076 allocateCandidatesAndFindBasisForMul(LHS, RHS, I);
1077 if (LHS != RHS) {
1078 // Symmetrically, try to split RHS to Base + Index.
1079 allocateCandidatesAndFindBasisForMul(RHS, LHS, I);
1080 }
1081}
1082
1083void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
1084 GetElementPtrInst *GEP) {
1085 // TODO: handle vector GEPs
1086 if (GEP->getType()->isVectorTy())
1087 return;
1088
1090 for (Use &Idx : GEP->indices())
1091 IndexExprs.push_back(SE->getSCEV(Idx));
1092
1094 for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
1095 if (GTI.isStruct())
1096 continue;
1097
1098 const SCEV *OrigIndexExpr = IndexExprs[I - 1];
1099 IndexExprs[I - 1] = SE->getZero(OrigIndexExpr->getType());
1100
1101 // The base of this candidate is GEP's base plus the offsets of all
1102 // indices except this current one.
1103 const SCEV *BaseExpr = SE->getGEPExpr(cast<GEPOperator>(GEP), IndexExprs);
1104 Value *ArrayIdx = GEP->getOperand(I);
1105 uint64_t ElementSize = GTI.getSequentialElementStride(*DL);
1106 IntegerType *PtrIdxTy = cast<IntegerType>(DL->getIndexType(GEP->getType()));
1107 ConstantInt *ElementSizeIdx = ConstantInt::get(PtrIdxTy, ElementSize, true);
1108 if (ArrayIdx->getType()->getIntegerBitWidth() <=
1109 DL->getIndexSizeInBits(GEP->getAddressSpace())) {
1110 // Skip factoring if ArrayIdx is wider than the index size, because
1111 // ArrayIdx is implicitly truncated to the index size.
1112 allocateCandidatesAndFindBasis(Candidate::GEP, BaseExpr, ElementSizeIdx,
1113 ArrayIdx, GEP);
1114 }
1115 // When ArrayIdx is the sext of a value, we try to factor that value as
1116 // well. Handling this case is important because array indices are
1117 // typically sign-extended to the pointer index size.
1118 Value *TruncatedArrayIdx = nullptr;
1119 if (match(ArrayIdx, m_SExt(m_Value(TruncatedArrayIdx))) &&
1120 TruncatedArrayIdx->getType()->getIntegerBitWidth() <=
1121 DL->getIndexSizeInBits(GEP->getAddressSpace())) {
1122 // Skip factoring if TruncatedArrayIdx is wider than the pointer size,
1123 // because TruncatedArrayIdx is implicitly truncated to the pointer size.
1124 allocateCandidatesAndFindBasis(Candidate::GEP, BaseExpr, ElementSizeIdx,
1125 TruncatedArrayIdx, GEP);
1126 }
1127
1128 IndexExprs[I - 1] = OrigIndexExpr;
1129 }
1130}
1131
1132Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,
1133 const Candidate &C,
1134 IRBuilder<> &Builder,
1135 const DataLayout *DL) {
1136 auto CreateMul = [&](Value *LHS, Value *RHS) {
1137 if (ConstantInt *CR = dyn_cast<ConstantInt>(RHS)) {
1138 const APInt &ConstRHS = CR->getValue();
1139 IntegerType *DeltaType =
1140 IntegerType::get(C.Ins->getContext(), ConstRHS.getBitWidth());
1141 if (ConstRHS.isPowerOf2()) {
1142 ConstantInt *Exponent =
1143 ConstantInt::get(DeltaType, ConstRHS.logBase2());
1144 return Builder.CreateShl(LHS, Exponent);
1145 }
1146 if (ConstRHS.isNegatedPowerOf2()) {
1147 ConstantInt *Exponent =
1148 ConstantInt::get(DeltaType, (-ConstRHS).logBase2());
1149 return Builder.CreateNeg(Builder.CreateShl(LHS, Exponent));
1150 }
1151 }
1152
1153 return Builder.CreateMul(LHS, RHS);
1154 };
1155
1156 Value *Delta = C.Delta;
1157 // If Delta is 0, C is a fully redundant of C.Basis,
1158 // just replace C.Ins with Basis.Ins
1159 if (ConstantInt *CI = dyn_cast<ConstantInt>(Delta);
1160 CI && CI->getValue().isZero())
1161 return nullptr;
1162
1163 if (C.DeltaKind == Candidate::IndexDelta) {
1164 APInt IndexDelta = cast<ConstantInt>(C.Delta)->getValue();
1165 // IndexDelta
1166 // X = B + i * S
1167 // Y = B + i` * S
1168 // = B + (i + IndexDelta) * S
1169 // = B + i * S + IndexDelta * S
1170 // = X + IndexDelta * S
1171 // Bump = (i' - i) * S
1172
1173 // Common case 1: if (i' - i) is 1, Bump = S.
1174 if (IndexDelta == 1)
1175 return C.Stride;
1176 // Common case 2: if (i' - i) is -1, Bump = -S.
1177 if (IndexDelta.isAllOnes())
1178 return Builder.CreateNeg(C.Stride);
1179
1180 IntegerType *DeltaType =
1181 IntegerType::get(Basis.Ins->getContext(), IndexDelta.getBitWidth());
1182 Value *ExtendedStride = Builder.CreateSExtOrTrunc(C.Stride, DeltaType);
1183
1184 return CreateMul(ExtendedStride, C.Delta);
1185 }
1186
1187 assert(C.DeltaKind == Candidate::StrideDelta ||
1188 C.DeltaKind == Candidate::BaseDelta);
1189 assert(C.CandidateKind != Candidate::Mul);
1190 // StrideDelta
1191 // X = B + i * S
1192 // Y = B + i * S'
1193 // = B + i * (S + StrideDelta)
1194 // = B + i * S + i * StrideDelta
1195 // = X + i * StrideDelta
1196 // Bump = i * (S' - S)
1197 //
1198 // BaseDelta
1199 // X = B + i * S
1200 // Y = B' + i * S
1201 // = (B + BaseDelta) + i * S
1202 // = X + BaseDelta
1203 // Bump = (B' - B).
1204 Value *Bump = C.Delta;
1205 if (C.DeltaKind == Candidate::StrideDelta) {
1206 // If this value is consumed by a GEP, promote StrideDelta before doing
1207 // StrideDelta * Index to ensure the same semantics as the original GEP.
1208 if (C.CandidateKind == Candidate::GEP) {
1209 auto *GEP = cast<GetElementPtrInst>(C.Ins);
1210 Type *NewScalarIndexTy =
1211 DL->getIndexType(GEP->getPointerOperandType()->getScalarType());
1212 Bump = Builder.CreateSExtOrTrunc(Bump, NewScalarIndexTy);
1213 }
1214 if (!C.Index->isOne()) {
1215 Value *ExtendedIndex =
1216 Builder.CreateSExtOrTrunc(C.Index, Bump->getType());
1217 Bump = CreateMul(Bump, ExtendedIndex);
1218 }
1219 }
1220 return Bump;
1221}
1222
1223void StraightLineStrengthReduce::rewriteCandidate(const Candidate &C) {
1224 if (!DebugCounter::shouldExecute(StraightLineStrengthReduceCounter))
1225 return;
1226
1227 const Candidate &Basis = *C.Basis;
1228 assert(C.Delta && C.CandidateKind == Basis.CandidateKind &&
1229 C.hasValidDelta(Basis));
1230
1231 IRBuilder<> Builder(C.Ins);
1232 Value *Bump = emitBump(Basis, C, Builder, DL);
1233 Value *Reduced = nullptr; // equivalent to but weaker than C.Ins
1234 // If delta is 0, C is a fully redundant of Basis, and Bump is nullptr,
1235 // just replace C.Ins with Basis.Ins
1236 if (!Bump)
1237 Reduced = Basis.Ins;
1238 else {
1239 switch (C.CandidateKind) {
1240 case Candidate::Add:
1241 case Candidate::Mul: {
1242 // C = Basis + Bump
1243 Value *NegBump;
1244 if (match(Bump, m_Neg(m_Value(NegBump)))) {
1245 // If Bump is a neg instruction, emit C = Basis - (-Bump).
1246 Reduced = Builder.CreateSub(Basis.Ins, NegBump);
1247 // We only use the negative argument of Bump, and Bump itself may be
1248 // trivially dead.
1250 } else {
1251 // It's tempting to preserve nsw on Bump and/or Reduced. However, it's
1252 // usually unsound, e.g.,
1253 //
1254 // X = (-2 +nsw 1) *nsw INT_MAX
1255 // Y = (-2 +nsw 3) *nsw INT_MAX
1256 // =>
1257 // Y = X + 2 * INT_MAX
1258 //
1259 // Neither + and * in the resultant expression are nsw.
1260 Reduced = Builder.CreateAdd(Basis.Ins, Bump);
1261 }
1262 break;
1263 }
1264 case Candidate::GEP: {
1265 bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds();
1266 // C = (char *)Basis + Bump
1267 Reduced = Builder.CreatePtrAdd(Basis.Ins, Bump, "", InBounds);
1268 break;
1269 }
1270 default:
1271 llvm_unreachable("C.CandidateKind is invalid");
1272 };
1273 Reduced->takeName(C.Ins);
1274 }
1275 C.Ins->replaceAllUsesWith(Reduced);
1276 DeadInstructions.push_back(C.Ins);
1277}
1278
1279bool StraightLineStrengthReduceLegacyPass::runOnFunction(Function &F) {
1280 if (skipFunction(F))
1281 return false;
1282
1283 auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1284 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1285 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1286 return StraightLineStrengthReduce(DL, DT, SE, TTI).runOnFunction(F);
1287}
1288
1289bool StraightLineStrengthReduce::runOnFunction(Function &F) {
1290 LLVM_DEBUG(dbgs() << "SLSR on Function: " << F.getName() << "\n");
1291 // Traverse the dominator tree in the depth-first order. This order makes sure
1292 // all bases of a candidate are in Candidates when we process it.
1293 for (const auto Node : depth_first(DT))
1294 for (auto &I : *(Node->getBlock()))
1295 allocateCandidatesAndFindBasis(&I);
1296
1297 // Build the dependency graph and sort candidate instructions from dependency
1298 // roots to leaves
1299 for (auto &C : Candidates) {
1300 DependencyGraph.try_emplace(C.Ins);
1301 addDependency(C, C.Basis);
1302 }
1303 sortCandidateInstructions();
1304
1305 // Rewrite candidates in the topological order that rewrites a Candidate
1306 // always before rewriting its Basis
1307 for (Instruction *I : reverse(SortedCandidateInsts))
1308 if (Candidate *C = pickRewriteCandidate(I))
1309 rewriteCandidate(*C);
1310
1311 for (auto *DeadIns : DeadInstructions)
1312 // A dead instruction may be another dead instruction's op,
1313 // don't delete an instruction twice
1314 if (DeadIns->getParent())
1316
1317 bool Ret = !DeadInstructions.empty();
1318 DeadInstructions.clear();
1319 DependencyGraph.clear();
1320 RewriteCandidates.clear();
1321 SortedCandidateInsts.clear();
1322 // First clear all references to candidates in the list
1323 CandidateDict.clear();
1324 // Then destroy the list
1325 Candidates.clear();
1326 return Ret;
1327}
1328
1329PreservedAnalyses
1331 const DataLayout *DL = &F.getDataLayout();
1332 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1333 auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
1334 auto *TTI = &AM.getResult<TargetIRAnalysis>(F);
1335
1336 if (!StraightLineStrengthReduce(DL, DT, SE, TTI).runOnFunction(F))
1337 return PreservedAnalyses::all();
1338
1344 return PA;
1345}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
static bool runOnFunction(Function &F, bool PostInlining)
Hexagon Common GEP
Module.h This file contains the declarations for the Module class.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
static bool isGEPFoldable(GetElementPtrInst *GEP, const TargetTransformInfo *TTI)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static BinaryOperator * CreateMul(Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore, Value *FlagsOp)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
static bool matchesOr(Value *A, Value *&B, ConstantInt *&C)
static bool isAddFoldable(const SCEV *Base, ConstantInt *Index, Value *Stride, TargetTransformInfo *TTI)
static void unifyBitWidth(APInt &A, APInt &B)
static bool matchesAdd(Value *A, Value *&B, ConstantInt *&C)
static const unsigned UnknownAddressSpace
static cl::opt< bool > EnablePoisonReuseGuard("enable-poison-reuse-guard", cl::init(true), cl::desc("Enable poison-reuse guard"))
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
unsigned logBase2() const
Definition APInt.h:1762
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition Constants.h:225
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:162
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
static bool shouldExecute(CounterInfo &Counter)
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
iterator end()
Definition DenseMap.h:81
Analysis pass which computes a DominatorTree.
Definition Dominators.h:283
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:321
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition IRBuilder.h:2039
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
Definition IRBuilder.h:1784
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1420
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1492
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1403
Value * CreateSExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a SExt or Trunc from the integer value V to DestTy.
Definition IRBuilder.h:2118
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1437
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
Analysis pass that exposes the ScalarEvolution for a function.
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getGEPExpr(GEPOperator *GEP, ArrayRef< const SCEV * > IndexExprs)
Returns an expression for a GEP.
LLVM_ABI bool canReuseInstruction(const SCEV *S, Instruction *I, SmallVectorImpl< Instruction * > &DropPoisonGeneratingInsts)
Check whether it is poison-safe to represent the expression S using the instruction I.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Analysis pass providing the TargetTransformInfo.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
@ TCC_Free
Expected to fold away in lowering.
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
TypeSize getSequentialElementStride(const DataLayout &DL) const
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
bool match(Val *V, const Pattern &P)
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
initializer< Ty > init(const Ty &Val)
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool haveNoCommonBitsSet(const WithCache< const Value * > &LHSCache, const WithCache< const Value * > &RHSCache, const SimplifyQuery &SQ)
Return true if LHS and RHS have no common bits set.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void initializeStraightLineStrengthReduceLegacyPassPass(PassRegistry &)
DomTreeNodeBase< BasicBlock > DomTreeNode
Definition Dominators.h:94
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
generic_gep_type_iterator<> gep_type_iterator
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
gep_type_iterator gep_type_begin(const User *GEP)
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
iterator_range< df_iterator< T > > depth_first(const T &G)
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createStraightLineStrengthReducePass()