LLVM 23.0.0git
VPlanTransforms.h
Go to the documentation of this file.
1//===- VPlanTransforms.h - Utility VPlan to VPlan transforms --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file provides utility VPlan to VPlan transformations.
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
14#define LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
15
16#include "VPlan.h"
17#include "VPlanVerifier.h"
19#include "llvm/ADT/ScopeExit.h"
22#include "llvm/Support/Regex.h"
23
24namespace llvm {
25
27class Instruction;
28class Loop;
29class LoopVersioning;
31class PHINode;
32class ScalarEvolution;
36class VPBuilder;
37class VPRecipeBuilder;
38struct VFRange;
39
42
43#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
47#endif
48
50 /// Helper to run a VPlan pass \p Pass on \p VPlan, forwarding extra arguments
51 /// to the pass. Performs verification/printing after each VPlan pass if
52 /// requested via command line options.
53 template <bool EnableVerify = true, typename PassTy, typename... ArgsTy>
54 static decltype(auto) runPass(StringRef PassName, PassTy &&Pass, VPlan &Plan,
55 ArgsTy &&...Args) {
56 scope_exit PostTransformActions{[&]() {
57#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
58 // Make sure to print before verification, so that output is more useful
59 // in case of failures:
61 (VPlanPrintAfterPasses.getNumOccurrences() > 0 &&
63 return Regex(Entry).match(PassName);
64 }))) {
65 dbgs()
66 << "VPlan for loop in '"
68 << "' after " << PassName << '\n';
71 else
72 dbgs() << Plan << '\n';
73 }
74#endif
75 if (VerifyEachVPlan && EnableVerify) {
76 if (!verifyVPlanIsValid(Plan))
77 report_fatal_error("Broken VPlan found, compilation aborted!");
78 }
79 }};
80
81 return std::forward<PassTy>(Pass)(Plan, std::forward<ArgsTy>(Args)...);
82 }
83#define RUN_VPLAN_PASS(PASS, ...) \
84 llvm::VPlanTransforms::runPass(#PASS, PASS, __VA_ARGS__)
85#define RUN_VPLAN_PASS_NO_VERIFY(PASS, ...) \
86 llvm::VPlanTransforms::runPass<false>(#PASS, PASS, __VA_ARGS__)
87
88 /// Create a base VPlan0, serving as the common starting point for all later
89 /// candidates. It consists of an initial plain CFG loop with loop blocks from
90 /// \p TheLoop being directly translated to VPBasicBlocks with VPInstruction
91 /// corresponding to the input IR.
92 ///
93 /// The created loop is wrapped in an initial skeleton to facilitate
94 /// vectorization, consisting of a vector pre-header, an exit block for the
95 /// main vector loop (middle.block) and a new block as preheader of the scalar
96 /// loop (scalar.ph). See below for an illustration. It also adds a canonical
97 /// IV and its increment, using \p InductionTy and \p IVDL, and creates a
98 /// VPValue expression for the original trip count.
99 ///
100 /// [ ] <-- Plan's entry VPIRBasicBlock, wrapping the original loop's
101 /// / \ old preheader. Will contain iteration number check and SCEV
102 /// | | expansions.
103 /// | |
104 /// / v
105 /// | [ ] <-- vector loop bypass (may consist of multiple blocks) will be
106 /// | / | added later.
107 /// | / v
108 /// || [ ] <-- vector pre header.
109 /// |/ |
110 /// | v
111 /// | [ ] \ <-- plain CFG loop wrapping original loop to be vectorized.
112 /// | [ ]_|
113 /// | |
114 /// | v
115 /// | [ ] <--- middle-block with the branch to successors
116 /// | / |
117 /// | / |
118 /// | | v
119 /// \--->[ ] <--- scalar preheader (initial a VPBasicBlock, which will be
120 /// | | replaced later by a VPIRBasicBlock wrapping the scalar
121 /// | | preheader basic block.
122 /// | |
123 /// v <-- edge from middle to exit iff epilogue is not required.
124 /// | [ ] \
125 /// | [ ]_| <-- old scalar loop to handle remainder (scalar epilogue,
126 /// | | header wrapped in VPIRBasicBlock).
127 /// \ |
128 /// \ v
129 /// >[ ] <-- original loop exit block(s), wrapped in VPIRBasicBlocks.
130 LLVM_ABI_FOR_TEST static std::unique_ptr<VPlan>
131 buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL,
132 PredicatedScalarEvolution &PSE, LoopVersioning *LVer = nullptr);
133
134 /// Replace VPPhi recipes in \p Plan's header with corresponding
135 /// VPHeaderPHIRecipe subclasses for inductions, reductions, and
136 /// fixed-order recurrences. This processes all header phis and creates
137 /// the appropriate widened recipe for each one.
138 static void createHeaderPhiRecipes(
139 VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop,
140 const MapVector<PHINode *, InductionDescriptor> &Inductions,
141 const MapVector<PHINode *, RecurrenceDescriptor> &Reductions,
142 const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
143 const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering);
144
145 /// Create VPReductionRecipes for in-loop reductions. This processes chains
146 /// of operations contributing to in-loop reductions and creates appropriate
147 /// VPReductionRecipe instances.
149 VPlan &Plan, const DenseSet<BasicBlock *> &BlocksNeedingPredication,
150 ElementCount MinVF);
151
152 /// Update \p Plan to account for all early exits. If \p Style is not
153 /// NoUncountableExit, handles uncountable early exits and checks that all
154 /// loads are dereferenceable. Returns false if a non-dereferenceable load is
155 /// found.
156 LLVM_ABI_FOR_TEST static bool
157 handleEarlyExits(VPlan &Plan, UncountableExitStyle Style, Loop *TheLoop,
158 PredicatedScalarEvolution &PSE, DominatorTree &DT,
159 AssumptionCache *AC);
160
161 /// If a check is needed to guard executing the scalar epilogue loop, it will
162 /// be added to the middle block.
163 LLVM_ABI_FOR_TEST static void addMiddleCheck(VPlan &Plan, bool TailFolded);
164
165 // Create a check to \p Plan to see if the vector loop should be executed.
166 // If \p CheckBlock is non-null, the compare and branch are placed there;
167 // ExpandSCEV recipes are always placed in Entry.
168 static void addMinimumIterationCheck(
169 VPlan &Plan, ElementCount VF, unsigned UF,
170 ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
171 bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights,
172 DebugLoc DL, PredicatedScalarEvolution &PSE,
173 VPBasicBlock *CheckBlock = nullptr);
174
175 /// Add a new check block before the vector preheader to \p Plan to check if
176 /// the main vector loop should be executed (TC >= VF * UF).
177 static void
178 addIterationCountCheckBlock(VPlan &Plan, ElementCount VF, unsigned UF,
179 bool RequiresScalarEpilogue, Loop *OrigLoop,
181 DebugLoc DL, PredicatedScalarEvolution &PSE);
182
183 /// Add a check to \p Plan to see if the epilogue vector loop should be
184 /// executed.
186 VPlan &Plan, Value *VectorTripCount, bool RequiresScalarEpilogue,
187 ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep,
188 unsigned EpilogueLoopStep, ScalarEvolution &SE);
189
190 /// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's
191 /// flat CFG into a hierarchical CFG.
192 LLVM_ABI_FOR_TEST static void createLoopRegions(VPlan &Plan);
193
194 /// Wrap runtime check block \p CheckBlock in a VPIRBB and \p Cond in a
195 /// VPValue and connect the block to \p Plan, using the VPValue as branch
196 /// condition.
197 static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock,
198 bool AddBranchWeights);
199
200 /// Replaces the VPInstructions in \p Plan with corresponding
201 /// widen recipes. Returns false if any VPInstructions could not be converted
202 /// to a wide recipe if needed.
203 LLVM_ABI_FOR_TEST static bool
205 const TargetLibraryInfo &TLI);
206
207 /// Try to legalize reductions with multiple in-loop uses. Currently only
208 /// strict and non-strict min/max reductions used by FindLastIV reductions are
209 /// supported, corresponding to computing the first and last argmin/argmax,
210 /// respectively. Otherwise return false.
211 static bool handleMultiUseReductions(VPlan &Plan,
212 OptimizationRemarkEmitter *ORE,
213 Loop *TheLoop);
214
215 /// Try to have all users of fixed-order recurrences appear after the recipe
216 /// defining their previous value, by either sinking users or hoisting recipes
217 /// defining their previous value (and its operands). Then introduce
218 /// FirstOrderRecurrenceSplice VPInstructions to combine the value from the
219 /// recurrence phis and previous values.
220 /// \returns true if all users of fixed-order recurrences could be re-arranged
221 /// as needed or false if it is not possible. In the latter case, \p Plan is
222 /// not valid.
223 static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder);
224
225 /// Check if \p Plan contains any FMaxNum or FMinNum reductions. If they do,
226 /// try to update the vector loop to exit early if any input is NaN and resume
227 /// executing in the scalar loop to handle the NaNs there. Return false if
228 /// this attempt was unsuccessful.
229 static bool handleMaxMinNumReductions(VPlan &Plan);
230
231 /// Check if \p Plan contains any FindLast reductions. If it does, try to
232 /// update the vector loop to save the appropriate state using selects
233 /// for entire vectors for both the latest mask containing at least one active
234 /// element and the corresponding data vector. Return false if this attempt
235 /// was unsuccessful.
236 static bool handleFindLastReductions(VPlan &Plan);
237
238 /// Clear NSW/NUW flags from reduction instructions if necessary.
239 static void clearReductionWrapFlags(VPlan &Plan);
240
241 /// Explicitly unroll \p Plan by \p UF.
242 static void unrollByUF(VPlan &Plan, unsigned UF);
243
244 /// Replace replicating VPReplicateRecipe, VPScalarIVStepsRecipe and
245 /// VPInstruction in \p Plan with \p VF single-scalar recipes. Replicate
246 /// regions are dissolved by replicating their blocks and their recipes \p VF
247 /// times.
248 /// TODO: Also dissolve replicate regions with live outs.
249 static void replicateByVF(VPlan &Plan, ElementCount VF);
250
251 /// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the
252 /// resulting plan to \p BestVF and \p BestUF.
253 static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
254 unsigned BestUF,
255 PredicatedScalarEvolution &PSE);
256
257 /// Try to simplify VPInstruction::ExplicitVectorLength recipes when the AVL
258 /// is known to be <= VF, replacing them with the AVL directly.
259 static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF,
260 PredicatedScalarEvolution &PSE);
261
262 /// Apply VPlan-to-VPlan optimizations to \p Plan, including induction recipe
263 /// optimizations, dead recipe removal, replicate region optimizations and
264 /// block merging.
265 LLVM_ABI_FOR_TEST static void optimize(VPlan &Plan);
266
267 /// Remove redundant VPBasicBlocks by merging them into their single
268 /// predecessor if the latter has a single successor.
269 static bool mergeBlocksIntoPredecessors(VPlan &Plan);
270
271 /// Wrap predicated VPReplicateRecipes with a mask operand in an if-then
272 /// region block and remove the mask operand. Optimize the created regions by
273 /// iteratively sinking scalar operands into the region, followed by merging
274 /// regions until no improvements are remaining.
275 static void createAndOptimizeReplicateRegions(VPlan &Plan);
276
277 /// Replace (ICMP_ULE, wide canonical IV, backedge-taken-count) checks with an
278 /// (active-lane-mask recipe, wide canonical IV, trip-count). If \p
279 /// UseActiveLaneMaskForControlFlow is true, introduce an
280 /// VPActiveLaneMaskPHIRecipe.
281 static void addActiveLaneMask(VPlan &Plan,
282 bool UseActiveLaneMaskForControlFlow);
283
284 /// Insert truncates and extends for any truncated recipe. Redundant casts
285 /// will be folded later.
286 static void
287 truncateToMinimalBitwidths(VPlan &Plan,
288 const MapVector<Instruction *, uint64_t> &MinBWs);
289
290 /// Replace symbolic strides from \p StridesMap in \p Plan with constants when
291 /// possible.
292 static void
293 replaceSymbolicStrides(VPlan &Plan, PredicatedScalarEvolution &PSE,
294 const DenseMap<Value *, const SCEV *> &StridesMap);
295
296 /// Drop poison flags from recipes that may generate a poison value that is
297 /// used after vectorization, even when their operands are not poison. Those
298 /// recipes meet the following conditions:
299 /// * Contribute to the address computation of a recipe generating a widen
300 /// memory load/store (VPWidenMemoryInstructionRecipe or
301 /// VPInterleaveRecipe).
302 /// * Such a widen memory load/store has at least one underlying Instruction
303 /// that is in a basic block that needs predication and after vectorization
304 /// the generated instruction won't be predicated.
305 /// Uses \p BlockNeedsPredication to check if a block needs predicating.
306 /// TODO: Replace BlockNeedsPredication callback with retrieving info from
307 /// VPlan directly.
308 static void dropPoisonGeneratingRecipes(
309 VPlan &Plan,
310 const std::function<bool(BasicBlock *)> &BlockNeedsPredication);
311
312 /// Add a VPCurrentIterationPHIRecipe and related recipes to \p Plan and
313 /// replaces all uses of the canonical IV except for the canonical IV
314 /// increment with a VPCurrentIterationPHIRecipe. The canonical IV is only
315 /// used to control the loop after this transformation.
316 static void
317 addExplicitVectorLength(VPlan &Plan,
318 const std::optional<unsigned> &MaxEVLSafeElements);
319
320 /// Optimize recipes which use an EVL-based header mask to VP intrinsics, for
321 /// example:
322 ///
323 /// %mask = icmp ult step-vector, EVL
324 /// %load = load %ptr, %mask
325 /// -->
326 /// %load = vp.load %ptr, EVL
327 static void optimizeEVLMasks(VPlan &Plan);
328
329 // For each Interleave Group in \p InterleaveGroups replace the Recipes
330 // widening its memory instructions with a single VPInterleaveRecipe at its
331 // insertion point.
332 static void createInterleaveGroups(
333 VPlan &Plan,
334 const SmallPtrSetImpl<const InterleaveGroup<Instruction> *>
335 &InterleaveGroups,
336 VPRecipeBuilder &RecipeBuilder, const bool &EpilogueAllowed);
337
338 /// Remove dead recipes from \p Plan.
339 static void removeDeadRecipes(VPlan &Plan);
340
341 /// Update \p Plan to account for uncountable early exits by introducing
342 /// appropriate branching logic in the latch that handles early exits and the
343 /// latch exit condition. Multiple exits are handled with a dispatch block
344 /// that determines which exit to take based on lane-by-lane semantics.
345 static void handleUncountableEarlyExits(VPlan &Plan, VPBasicBlock *HeaderVPBB,
346 VPBasicBlock *LatchVPBB,
347 VPBasicBlock *MiddleVPBB,
349
350 /// Replaces the exit condition from
351 /// (branch-on-cond eq CanonicalIVInc, VectorTripCount)
352 /// to
353 /// (branch-on-cond eq AVLNext, 0)
354 static void convertEVLExitCond(VPlan &Plan);
355
356 /// Replace loop regions with explicit CFG.
357 static void dissolveLoopRegions(VPlan &Plan);
358
359 /// Expand BranchOnTwoConds instructions into explicit CFG with
360 /// BranchOnCond instructions. Should be called after dissolveLoopRegions.
361 static void expandBranchOnTwoConds(VPlan &Plan);
362
363 /// Transform loops with variable-length stepping after region
364 /// dissolution.
365 ///
366 /// Once loop regions are replaced with explicit CFG, loops can step with
367 /// variable vector lengths instead of fixed lengths. This transformation:
368 /// * Makes CurrentIteration-Phi concrete.
369 // * Removes CanonicalIV and increment.
370 static void convertToVariableLengthStep(VPlan &Plan);
371
372 /// Lower abstract recipes to concrete ones, that can be codegen'd.
373 static void convertToConcreteRecipes(VPlan &Plan);
374
375 /// This function converts initial recipes to the abstract recipes and clamps
376 /// \p Range based on cost model for following optimizations and cost
377 /// estimations. The converted abstract recipes will lower to concrete
378 /// recipes before codegen.
379 static void convertToAbstractRecipes(VPlan &Plan, VPCostContext &Ctx,
380 VFRange &Range);
381
382 /// Perform instcombine-like simplifications on recipes in \p Plan.
383 static void simplifyRecipes(VPlan &Plan);
384
385 /// Remove BranchOnCond recipes with true or false conditions together with
386 /// removing dead edges to their successors. If \p OnlyLatches is true, only
387 /// process loop latches.
388 static void removeBranchOnConst(VPlan &Plan, bool OnlyLatches = false);
389
390 /// Perform common-subexpression-elimination on \p Plan.
391 static void cse(VPlan &Plan);
392
393 /// If there's a single exit block, optimize its phi recipes that use exiting
394 /// IV values by feeding them precomputed end values instead, possibly taken
395 /// one step backwards.
396 static void optimizeInductionLiveOutUsers(VPlan &Plan,
397 PredicatedScalarEvolution &PSE,
398 bool FoldTail);
399
400 /// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
401 static void materializeBroadcasts(VPlan &Plan);
402
403 /// Hoist single-scalar loads with invariant addresses out of the vector loop
404 /// to the preheader, if they are proven not to alias with any stores in the
405 /// plan using noalias metadata.
406 static void hoistInvariantLoads(VPlan &Plan);
407
408 /// Hoist predicated loads from the same address to the loop entry block, if
409 /// they are guaranteed to execute on both paths (i.e., in replicate regions
410 /// with complementary masks P and NOT P).
411 static void hoistPredicatedLoads(VPlan &Plan, PredicatedScalarEvolution &PSE,
412 const Loop *L);
413
414 /// Sink predicated stores to the same address with complementary predicates
415 /// (P and NOT P) to an unconditional store with select recipes for the
416 /// stored values. This eliminates branching overhead when all paths
417 /// unconditionally store to the same location.
418 static void sinkPredicatedStores(VPlan &Plan, PredicatedScalarEvolution &PSE,
419 const Loop *L);
420
421 // Materialize vector trip counts for constants early if it can simply be
422 // computed as (Original TC / VF * UF) * VF * UF.
423 static void
424 materializeConstantVectorTripCount(VPlan &Plan, ElementCount BestVF,
425 unsigned BestUF,
426 PredicatedScalarEvolution &PSE);
427
428 /// Materialize vector trip count computations to a set of VPInstructions.
429 /// \p Step is used as the step value for the trip count computation.
430 static void materializeVectorTripCount(VPlan &Plan,
431 VPBasicBlock *VectorPHVPBB,
432 bool TailByMasking,
433 bool RequiresScalarEpilogue,
434 VPValue *Step);
435
436 /// Materialize the backedge-taken count to be computed explicitly using
437 /// VPInstructions.
438 static void materializeBackedgeTakenCount(VPlan &Plan,
439 VPBasicBlock *VectorPH);
440
441 /// Add explicit Build[Struct]Vector recipes to Pack multiple scalar values
442 /// into vectors and Unpack recipes to extract scalars from vectors as
443 /// needed.
444 static void materializePacksAndUnpacks(VPlan &Plan);
445
446 /// Materialize UF, VF and VFxUF to be computed explicitly using
447 /// VPInstructions.
448 static void materializeFactors(VPlan &Plan, VPBasicBlock *VectorPH,
449 ElementCount VF);
450
451 /// Expand VPExpandSCEVRecipes in \p Plan's entry block. Each
452 /// VPExpandSCEVRecipe is replaced with a live-in wrapping the expanded IR
453 /// value. A mapping from SCEV expressions to their expanded IR value is
454 /// returned.
455 static DenseMap<const SCEV *, Value *> expandSCEVs(VPlan &Plan,
456 ScalarEvolution &SE);
457
458 /// Try to find a single VF among \p Plan's VFs for which all interleave
459 /// groups (with known minimum VF elements) can be replaced by wide loads and
460 /// stores processing VF elements, if all transformed interleave groups access
461 /// the full vector width (checked via the maximum vector register width). If
462 /// the transformation can be applied, the original \p Plan will be split in
463 /// 2:
464 /// 1. The original Plan with the single VF containing the optimized recipes
465 /// using wide loads instead of interleave groups.
466 /// 2. A new clone which contains all VFs of Plan except the optimized VF.
467 ///
468 /// This effectively is a very simple form of loop-aware SLP, where we use
469 /// interleave groups to identify candidates.
470 static std::unique_ptr<VPlan>
471 narrowInterleaveGroups(VPlan &Plan, const TargetTransformInfo &TTI);
472
473 /// Adapts the vector loop region for tail folding by introducing a header
474 /// mask and conditionally executing the content of the region:
475 ///
476 /// Vector loop region before:
477 /// +-------------------------------------------+
478 /// |%iv = ... |
479 /// |... |
480 /// |%iv.next = add %iv, vfxuf |
481 /// |branch-on-count %iv.next, vector-trip-count|
482 /// +-------------------------------------------+
483 ///
484 /// Vector loop region after:
485 /// +-------------------------------------------+
486 /// |%iv = ... |
487 /// |%wide.iv = widen-canonical-iv ... |
488 /// |%header-mask = icmp ule %wide.iv, BTC |
489 /// |branch-on-cond %header-mask |---+
490 /// +-------------------------------------------+ |
491 /// | |
492 /// v |
493 /// +-------------------------------------------+ |
494 /// | ... | |
495 /// +-------------------------------------------+ |
496 /// | |
497 /// v |
498 /// +-------------------------------------------+ |
499 /// |<phis> = phi [..., ...], [poison, header] |
500 /// |%iv.next = add %iv, vfxuf |<--+
501 /// |branch-on-count %iv.next, vector-trip-count|
502 /// +-------------------------------------------+
503 ///
504 /// Any VPInstruction::ExtractLastLanes are also updated to extract from the
505 /// last active lane of the header mask.
506 static void foldTailByMasking(VPlan &Plan);
507
508 /// Predicate and linearize the control-flow in the only loop region of
509 /// \p Plan.
510 static void introduceMasksAndLinearize(VPlan &Plan);
511
512 /// Add branch weight metadata, if the \p Plan's middle block is terminated by
513 /// a BranchOnCond recipe.
514 static void
515 addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF,
516 std::optional<unsigned> VScaleForTuning);
517
518 /// Handle users in the exit block for first order reductions in the original
519 /// exit block. The penultimate value of recurrences is fed to their LCSSA phi
520 /// users in the original exit block using the VPIRInstruction wrapping to the
521 /// LCSSA phi.
522 static void addExitUsersForFirstOrderRecurrences(VPlan &Plan, VFRange &Range);
523
524 /// Optimize FindLast reductions selecting IVs (or expressions of IVs) by
525 /// converting them to FindIV reductions, if their IV range excludes a
526 /// suitable sentinel value. For expressions of IVs, the expression is sunk
527 /// to the middle block.
528 static void optimizeFindIVReductions(VPlan &Plan,
529 PredicatedScalarEvolution &PSE, Loop &L);
530
531 /// Detect and create partial reduction recipes for scaled reductions in
532 /// \p Plan. Must be called after recipe construction. If partial reductions
533 /// are only valid for a subset of VFs in Range, Range.End is updated.
534 static void createPartialReductions(VPlan &Plan, VPCostContext &CostCtx,
535 VFRange &Range);
536
537 /// Convert load/store VPInstructions in \p Plan into widened or replicate
538 /// recipes. Non load/store input instructions are left unchanged.
539 static void makeMemOpWideningDecisions(VPlan &Plan, VFRange &Range,
540 VPRecipeBuilder &RecipeBuilder);
541};
542
543} // namespace llvm
544
545#endif // LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
static constexpr uint32_t MinItersBypassWeights[]
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
const SmallVectorImpl< MachineOperand > & Cond
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const char PassName[]
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
A struct for saving information about induction variables.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
The optimization diagnostic interface.
Pass interface - Implemented by all 'passes'.
Definition Pass.h:99
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
The main scalar evolution driver.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
VPlan-based builder utility analogous to IRBuilder.
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4337
Helper class to create VPRecipies from IR instructions.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition VPlan.cpp:835
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4518
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1095
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4655
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI_FOR_TEST cl::opt< bool > VerifyEachVPlan
LLVM_ABI_FOR_TEST cl::opt< bool > VPlanPrintAfterAll
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
UncountableExitStyle
Different methods of handling early exits.
Definition VPlan.h:83
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI_FOR_TEST cl::list< std::string > VPlanPrintAfterPasses
TargetTransformInfo TTI
LLVM_ABI_FOR_TEST bool verifyVPlanIsValid(const VPlan &Plan)
Verify invariants for general VPlans.
LLVM_ABI_FOR_TEST cl::opt< bool > VPlanPrintVectorRegionScope
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
static void handleUncountableEarlyExits(VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, VPBasicBlock *MiddleVPBB, UncountableExitStyle Style)
Update Plan to account for uncountable early exits by introducing appropriate branching logic in the ...
static LLVM_ABI_FOR_TEST bool tryToConvertVPInstructionsToVPRecipes(VPlan &Plan, const TargetLibraryInfo &TLI)
Replaces the VPInstructions in Plan with corresponding widen recipes.
static void makeMemOpWideningDecisions(VPlan &Plan, VFRange &Range, VPRecipeBuilder &RecipeBuilder)
Convert load/store VPInstructions in Plan into widened or replicate recipes.
static void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue, bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, PredicatedScalarEvolution &PSE, VPBasicBlock *CheckBlock=nullptr)
static decltype(auto) runPass(StringRef PassName, PassTy &&Pass, VPlan &Plan, ArgsTy &&...Args)
Helper to run a VPlan pass Pass on VPlan, forwarding extra arguments to the pass.
static void materializeBroadcasts(VPlan &Plan)
Add explicit broadcasts for live-ins and VPValues defined in Plan's entry block if they are used as v...
static void materializePacksAndUnpacks(VPlan &Plan)
Add explicit Build[Struct]Vector recipes to Pack multiple scalar values into vectors and Unpack recip...
static LLVM_ABI_FOR_TEST std::unique_ptr< VPlan > buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, LoopVersioning *LVer=nullptr)
Create a base VPlan0, serving as the common starting point for all later candidates.
static void createInterleaveGroups(VPlan &Plan, const SmallPtrSetImpl< const InterleaveGroup< Instruction > * > &InterleaveGroups, VPRecipeBuilder &RecipeBuilder, const bool &EpilogueAllowed)
static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF, PredicatedScalarEvolution &PSE)
Try to simplify VPInstruction::ExplicitVectorLength recipes when the AVL is known to be <= VF,...
static void removeBranchOnConst(VPlan &Plan, bool OnlyLatches=false)
Remove BranchOnCond recipes with true or false conditions together with removing dead edges to their ...
static void introduceMasksAndLinearize(VPlan &Plan)
Predicate and linearize the control-flow in the only loop region of Plan.
static void materializeFactors(VPlan &Plan, VPBasicBlock *VectorPH, ElementCount VF)
Materialize UF, VF and VFxUF to be computed explicitly using VPInstructions.
static void createInLoopReductionRecipes(VPlan &Plan, const DenseSet< BasicBlock * > &BlocksNeedingPredication, ElementCount MinVF)
Create VPReductionRecipes for in-loop reductions.
static void foldTailByMasking(VPlan &Plan)
Adapts the vector loop region for tail folding by introducing a header mask and conditionally executi...
static void materializeBackedgeTakenCount(VPlan &Plan, VPBasicBlock *VectorPH)
Materialize the backedge-taken count to be computed explicitly using VPInstructions.
static void addMinimumVectorEpilogueIterationCheck(VPlan &Plan, Value *VectorTripCount, bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep, unsigned EpilogueLoopStep, ScalarEvolution &SE)
Add a check to Plan to see if the epilogue vector loop should be executed.
static void hoistInvariantLoads(VPlan &Plan)
Hoist single-scalar loads with invariant addresses out of the vector loop to the preheader,...
static void addActiveLaneMask(VPlan &Plan, bool UseActiveLaneMaskForControlFlow)
Replace (ICMP_ULE, wide canonical IV, backedge-taken-count) checks with an (active-lane-mask recipe,...
static bool handleMultiUseReductions(VPlan &Plan, OptimizationRemarkEmitter *ORE, Loop *TheLoop)
Try to legalize reductions with multiple in-loop uses.
static void dropPoisonGeneratingRecipes(VPlan &Plan, const std::function< bool(BasicBlock *)> &BlockNeedsPredication)
Drop poison flags from recipes that may generate a poison value that is used after vectorization,...
static void createAndOptimizeReplicateRegions(VPlan &Plan)
Wrap predicated VPReplicateRecipes with a mask operand in an if-then region block and remove the mask...
static void convertToVariableLengthStep(VPlan &Plan)
Transform loops with variable-length stepping after region dissolution.
static void addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF, std::optional< unsigned > VScaleForTuning)
Add branch weight metadata, if the Plan's middle block is terminated by a BranchOnCond recipe.
static std::unique_ptr< VPlan > narrowInterleaveGroups(VPlan &Plan, const TargetTransformInfo &TTI)
Try to find a single VF among Plan's VFs for which all interleave groups (with known minimum VF eleme...
static bool handleFindLastReductions(VPlan &Plan)
Check if Plan contains any FindLast reductions.
static void unrollByUF(VPlan &Plan, unsigned UF)
Explicitly unroll Plan by UF.
static DenseMap< const SCEV *, Value * > expandSCEVs(VPlan &Plan, ScalarEvolution &SE)
Expand VPExpandSCEVRecipes in Plan's entry block.
static void convertToConcreteRecipes(VPlan &Plan)
Lower abstract recipes to concrete ones, that can be codegen'd.
static void expandBranchOnTwoConds(VPlan &Plan)
Expand BranchOnTwoConds instructions into explicit CFG with BranchOnCond instructions.
static void hoistPredicatedLoads(VPlan &Plan, PredicatedScalarEvolution &PSE, const Loop *L)
Hoist predicated loads from the same address to the loop entry block, if they are guaranteed to execu...
static bool mergeBlocksIntoPredecessors(VPlan &Plan)
Remove redundant VPBasicBlocks by merging them into their single predecessor if the latter has a sing...
static void optimizeFindIVReductions(VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &L)
Optimize FindLast reductions selecting IVs (or expressions of IVs) by converting them to FindIV reduc...
static void convertToAbstractRecipes(VPlan &Plan, VPCostContext &Ctx, VFRange &Range)
This function converts initial recipes to the abstract recipes and clamps Range based on cost model f...
static void materializeConstantVectorTripCount(VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE)
static void addExitUsersForFirstOrderRecurrences(VPlan &Plan, VFRange &Range)
Handle users in the exit block for first order reductions in the original exit block.
static void createHeaderPhiRecipes(VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop, const MapVector< PHINode *, InductionDescriptor > &Inductions, const MapVector< PHINode *, RecurrenceDescriptor > &Reductions, const SmallPtrSetImpl< const PHINode * > &FixedOrderRecurrences, const SmallPtrSetImpl< PHINode * > &InLoopReductions, bool AllowReordering)
Replace VPPhi recipes in Plan's header with corresponding VPHeaderPHIRecipe subclasses for inductions...
static void addExplicitVectorLength(VPlan &Plan, const std::optional< unsigned > &MaxEVLSafeElements)
Add a VPCurrentIterationPHIRecipe and related recipes to Plan and replaces all uses of the canonical ...
static void optimizeEVLMasks(VPlan &Plan)
Optimize recipes which use an EVL-based header mask to VP intrinsics, for example:
static LLVM_ABI_FOR_TEST bool handleEarlyExits(VPlan &Plan, UncountableExitStyle Style, Loop *TheLoop, PredicatedScalarEvolution &PSE, DominatorTree &DT, AssumptionCache *AC)
Update Plan to account for all early exits.
static void replaceSymbolicStrides(VPlan &Plan, PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &StridesMap)
Replace symbolic strides from StridesMap in Plan with constants when possible.
static bool handleMaxMinNumReductions(VPlan &Plan)
Check if Plan contains any FMaxNum or FMinNum reductions.
static LLVM_ABI_FOR_TEST void createLoopRegions(VPlan &Plan)
Replace loops in Plan's flat CFG with VPRegionBlocks, turning Plan's flat CFG into a hierarchical CFG...
static void removeDeadRecipes(VPlan &Plan)
Remove dead recipes from Plan.
static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock, bool AddBranchWeights)
Wrap runtime check block CheckBlock in a VPIRBB and Cond in a VPValue and connect the block to Plan,...
static void simplifyRecipes(VPlan &Plan)
Perform instcombine-like simplifications on recipes in Plan.
static void sinkPredicatedStores(VPlan &Plan, PredicatedScalarEvolution &PSE, const Loop *L)
Sink predicated stores to the same address with complementary predicates (P and NOT P) to an uncondit...
static void replicateByVF(VPlan &Plan, ElementCount VF)
Replace replicating VPReplicateRecipe, VPScalarIVStepsRecipe and VPInstruction in Plan with VF single...
static void addIterationCountCheckBlock(VPlan &Plan, ElementCount VF, unsigned UF, bool RequiresScalarEpilogue, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, PredicatedScalarEvolution &PSE)
Add a new check block before the vector preheader to Plan to check if the main vector loop should be ...
static void clearReductionWrapFlags(VPlan &Plan)
Clear NSW/NUW flags from reduction instructions if necessary.
static void optimizeInductionLiveOutUsers(VPlan &Plan, PredicatedScalarEvolution &PSE, bool FoldTail)
If there's a single exit block, optimize its phi recipes that use exiting IV values by feeding them p...
static void createPartialReductions(VPlan &Plan, VPCostContext &CostCtx, VFRange &Range)
Detect and create partial reduction recipes for scaled reductions in Plan.
static void cse(VPlan &Plan)
Perform common-subexpression-elimination on Plan.
static void materializeVectorTripCount(VPlan &Plan, VPBasicBlock *VectorPHVPBB, bool TailByMasking, bool RequiresScalarEpilogue, VPValue *Step)
Materialize vector trip count computations to a set of VPInstructions.
static LLVM_ABI_FOR_TEST void optimize(VPlan &Plan)
Apply VPlan-to-VPlan optimizations to Plan, including induction recipe optimizations,...
static void dissolveLoopRegions(VPlan &Plan)
Replace loop regions with explicit CFG.
static void truncateToMinimalBitwidths(VPlan &Plan, const MapVector< Instruction *, uint64_t > &MinBWs)
Insert truncates and extends for any truncated recipe.
static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder)
Try to have all users of fixed-order recurrences appear after the recipe defining their previous valu...
static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE)
Optimize Plan based on BestVF and BestUF.
static void convertEVLExitCond(VPlan &Plan)
Replaces the exit condition from (branch-on-cond eq CanonicalIVInc, VectorTripCount) to (branch-on-co...
static LLVM_ABI_FOR_TEST void addMiddleCheck(VPlan &Plan, bool TailFolded)
If a check is needed to guard executing the scalar epilogue loop, it will be added to the middle bloc...