LLVM 23.0.0git
VPlanTransforms.h
Go to the documentation of this file.
1//===- VPlanTransforms.h - Utility VPlan to VPlan transforms --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file provides utility VPlan to VPlan transformations.
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
14#define LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
15
16#include "VPlan.h"
17#include "VPlanVerifier.h"
19#include "llvm/ADT/ScopeExit.h"
23#include "llvm/Support/Regex.h"
24
25namespace llvm {
26
28class Instruction;
29class Loop;
30class LoopVersioning;
32class PHINode;
33class ScalarEvolution;
37class VPBuilder;
38class VPRecipeBuilder;
39struct VFRange;
40
43
44#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
48#endif
49
51 /// Helper to run a VPlan pass \p Pass on \p VPlan, forwarding extra arguments
52 /// to the pass. Performs verification/printing after each VPlan pass if
53 /// requested via command line options.
54 template <bool EnableVerify = true, typename PassTy, typename... ArgsTy>
55 static decltype(auto) runPass(StringRef PassName, PassTy &&Pass, VPlan &Plan,
56 ArgsTy &&...Args) {
57 scope_exit PostTransformActions{[&]() {
58#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
59 // Make sure to print before verification, so that output is more useful
60 // in case of failures:
62 (VPlanPrintAfterPasses.getNumOccurrences() > 0 &&
64 return Regex(Entry).match(PassName);
65 }))) {
66 dbgs()
67 << "VPlan for loop in '"
69 << "' after " << PassName << '\n';
72 else
73 dbgs() << Plan << '\n';
74 }
75#endif
76 if (VerifyEachVPlan && EnableVerify) {
77 if (!verifyVPlanIsValid(Plan))
78 report_fatal_error("Broken VPlan found, compilation aborted!");
79 }
80 }};
81
82 return std::forward<PassTy>(Pass)(Plan, std::forward<ArgsTy>(Args)...);
83 }
84#define RUN_VPLAN_PASS(PASS, ...) \
85 llvm::VPlanTransforms::runPass(#PASS, PASS, __VA_ARGS__)
86#define RUN_VPLAN_PASS_NO_VERIFY(PASS, ...) \
87 llvm::VPlanTransforms::runPass<false>(#PASS, PASS, __VA_ARGS__)
88
89 /// Create a base VPlan0, serving as the common starting point for all later
90 /// candidates. It consists of an initial plain CFG loop with loop blocks from
91 /// \p TheLoop being directly translated to VPBasicBlocks with VPInstruction
92 /// corresponding to the input IR.
93 ///
94 /// The created loop is wrapped in an initial skeleton to facilitate
95 /// vectorization, consisting of a vector pre-header, an exit block for the
96 /// main vector loop (middle.block) and a new block as preheader of the scalar
97 /// loop (scalar.ph). See below for an illustration. It also creates a
98 /// VPValue expression for the original trip count.
99 ///
100 /// [ ] <-- Plan's entry VPIRBasicBlock, wrapping the original loop's
101 /// / \ old preheader. Will contain iteration number check and SCEV
102 /// | | expansions.
103 /// | |
104 /// / v
105 /// | [ ] <-- vector loop bypass (may consist of multiple blocks) will be
106 /// | / | added later.
107 /// | / v
108 /// || [ ] <-- vector pre header.
109 /// |/ |
110 /// | v
111 /// | [ ] \ <-- plain CFG loop wrapping original loop to be vectorized.
112 /// | [ ]_|
113 /// | |
114 /// | v
115 /// | [ ] <--- middle-block with the branch to successors
116 /// | / |
117 /// | / |
118 /// | | v
119 /// \--->[ ] <--- scalar preheader (initial a VPBasicBlock, which will be
120 /// | | replaced later by a VPIRBasicBlock wrapping the scalar
121 /// | | preheader basic block.
122 /// | |
123 /// v <-- edge from middle to exit iff epilogue is not required.
124 /// | [ ] \
125 /// | [ ]_| <-- old scalar loop to handle remainder (scalar epilogue,
126 /// | | header wrapped in VPIRBasicBlock).
127 /// \ |
128 /// \ v
129 /// >[ ] <-- original loop exit block(s), wrapped in VPIRBasicBlocks.
130 LLVM_ABI_FOR_TEST static std::unique_ptr<VPlan>
131 buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
132 PredicatedScalarEvolution &PSE, LoopVersioning *LVer = nullptr);
133
134 /// Replace VPPhi recipes in \p Plan's header with corresponding
135 /// VPHeaderPHIRecipe subclasses for inductions, reductions, and
136 /// fixed-order recurrences. This processes all header phis and creates
137 /// the appropriate widened recipe for each one. For fixed-order
138 /// recurrences, also creates FirstOrderRecurrenceSplice instructions and
139 /// sinks/hoists users as needed. Returns false if any fixed-order
140 /// recurrence cannot be handled.
141 static bool createHeaderPhiRecipes(
142 VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop,
143 const MapVector<PHINode *, InductionDescriptor> &Inductions,
144 const MapVector<PHINode *, RecurrenceDescriptor> &Reductions,
145 const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
146 const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering);
147
148 /// Finalize SCEV predicates by adding induction predicates from \p Plan to
149 /// \p PSE and checking constraints. Returns false if predicated IVs have
150 /// outside-loop uses via ExitingIVValue, if SCEV predicate complexity exceeds
151 /// \p SCEVCheckThreshold, or if predicates are needed but \p OptForSize is
152 /// true.
153 static bool
154 finalizeSCEVPredicates(VPlan &Plan, PredicatedScalarEvolution &PSE,
155 bool OptForSize, unsigned SCEVCheckThreshold,
156 OptimizationRemarkEmitter *ORE, Loop *TheLoop);
157
158 /// Create VPReductionRecipes for in-loop reductions. This processes chains
159 /// of operations contributing to in-loop reductions and creates appropriate
160 /// VPReductionRecipe instances.
161 static void createInLoopReductionRecipes(VPlan &Plan, ElementCount MinVF);
162
163 /// Update \p Plan to account for all early exits. If \p Style is not
164 /// NoUncountableExit, handles uncountable early exits and checks that all
165 /// loads are dereferenceable. Returns false if a non-dereferenceable load is
166 /// found.
167 LLVM_ABI_FOR_TEST static bool
168 handleEarlyExits(VPlan &Plan, UncountableExitStyle Style, Loop *TheLoop,
169 PredicatedScalarEvolution &PSE, DominatorTree &DT,
170 AssumptionCache *AC);
171
172 /// If a check is needed to guard executing the scalar epilogue loop, it will
173 /// be added to the middle block.
174 LLVM_ABI_FOR_TEST static void addMiddleCheck(VPlan &Plan, bool TailFolded);
175
176 // Create a check in \p CheckBlock to see if the vector loop should be
177 // executed. May create VPExpandSCEV recipes in the plan's entry block.
178 static void addMinimumIterationCheck(
179 VPlan &Plan, ElementCount VF, unsigned UF,
180 ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
181 bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights,
182 DebugLoc DL, PredicatedScalarEvolution &PSE, VPBasicBlock *CheckBlock);
183
184 /// Add a new check block before the vector preheader to \p Plan to check if
185 /// the main vector loop should be executed (TC >= VF * UF).
186 static void
187 addIterationCountCheckBlock(VPlan &Plan, ElementCount VF, unsigned UF,
188 bool RequiresScalarEpilogue, Loop *OrigLoop,
190 DebugLoc DL, PredicatedScalarEvolution &PSE);
191
192 /// Add a check to \p Plan to see if the epilogue vector loop should be
193 /// executed.
195 VPlan &Plan, Value *VectorTripCount, bool RequiresScalarEpilogue,
196 ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep,
197 unsigned EpilogueLoopStep, ScalarEvolution &SE);
198
199 /// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's
200 /// flat CFG into a hierarchical CFG. For the outermost loop, also create the
201 /// canonical IV's increment and adjust the latch terminator: replace
202 /// BranchOnCond with BranchOnCount, using \p DL for the canonical IV.
203 LLVM_ABI_FOR_TEST static void createLoopRegions(VPlan &Plan, DebugLoc DL);
204
205 /// Wrap runtime check block \p CheckBlock in a VPIRBB and \p Cond in a
206 /// VPValue and connect the block to \p Plan, using the VPValue as branch
207 /// condition.
208 static void attachVPCheckBlock(VPlan &Plan, VPValue *Cond,
209 VPBasicBlock *CheckBlock,
210 bool AddBranchWeights);
211 static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock,
212 bool AddBranchWeights);
213
214 /// Replaces the VPInstructions in \p Plan with corresponding
215 /// widen recipes. Returns false if any VPInstructions could not be converted
216 /// to a wide recipe if needed.
217 LLVM_ABI_FOR_TEST static bool
219 const TargetLibraryInfo &TLI);
220
221 /// Try to legalize reductions with multiple in-loop uses. Currently only
222 /// strict and non-strict min/max reductions used by FindLastIV reductions are
223 /// supported, corresponding to computing the first and last argmin/argmax,
224 /// respectively. Otherwise return false.
225 static bool handleMultiUseReductions(VPlan &Plan,
226 OptimizationRemarkEmitter *ORE,
227 Loop *TheLoop);
228
229 /// Check if \p Plan contains any FMaxNum or FMinNum reductions. If they do,
230 /// try to update the vector loop to exit early if any input is NaN and resume
231 /// executing in the scalar loop to handle the NaNs there. Return false if
232 /// this attempt was unsuccessful.
233 static bool handleMaxMinNumReductions(VPlan &Plan);
234
235 /// Check if \p Plan contains any FindLast reductions. If it does, try to
236 /// update the vector loop to save the appropriate state using selects
237 /// for entire vectors for both the latest mask containing at least one active
238 /// element and the corresponding data vector. Return false if this attempt
239 /// was unsuccessful.
240 static bool handleFindLastReductions(VPlan &Plan);
241
242 /// Clear NSW/NUW flags from reduction instructions if necessary.
243 static void clearReductionWrapFlags(VPlan &Plan);
244
245 /// Explicitly unroll \p Plan by \p UF.
246 static void unrollByUF(VPlan &Plan, unsigned UF);
247
248 /// Replace replicating VPReplicateRecipe, VPScalarIVStepsRecipe and
249 /// VPInstruction in \p Plan with \p VF single-scalar recipes. Replicate
250 /// regions are dissolved by replicating their blocks and their recipes \p VF
251 /// times.
252 /// TODO: Also dissolve replicate regions with live outs.
253 static void replicateByVF(VPlan &Plan, ElementCount VF);
254
255 /// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the
256 /// resulting plan to \p BestVF and \p BestUF.
257 static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
258 unsigned BestUF,
259 PredicatedScalarEvolution &PSE);
260
261 /// Try to simplify VPInstruction::ExplicitVectorLength recipes when the AVL
262 /// is known to be <= VF, replacing them with the AVL directly.
263 static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF,
264 PredicatedScalarEvolution &PSE);
265
266 /// Apply VPlan-to-VPlan optimizations to \p Plan, including induction recipe
267 /// optimizations, dead recipe removal, replicate region optimizations and
268 /// block merging.
269 LLVM_ABI_FOR_TEST static void optimize(VPlan &Plan);
270
271 /// Remove redundant VPBasicBlocks by merging them into their single
272 /// predecessor if the latter has a single successor.
273 static bool mergeBlocksIntoPredecessors(VPlan &Plan);
274
275 /// Wrap predicated VPReplicateRecipes with a mask operand in an if-then
276 /// region block and remove the mask operand. Optimize the created regions by
277 /// iteratively sinking scalar operands into the region, followed by merging
278 /// regions until no improvements are remaining.
279 static void createAndOptimizeReplicateRegions(VPlan &Plan);
280
281 /// Replace (ICMP_ULE, wide canonical IV, backedge-taken-count) checks with an
282 /// (active-lane-mask recipe, wide canonical IV, trip-count). If \p
283 /// UseActiveLaneMaskForControlFlow is true, introduce an
284 /// VPActiveLaneMaskPHIRecipe.
285 static void addActiveLaneMask(VPlan &Plan,
286 bool UseActiveLaneMaskForControlFlow);
287
288 /// Insert truncates and extends for any truncated recipe. Redundant casts
289 /// will be folded later.
290 static void
291 truncateToMinimalBitwidths(VPlan &Plan,
292 const MapVector<Instruction *, uint64_t> &MinBWs);
293
294 /// Replace symbolic strides from \p StridesMap in \p Plan with constants when
295 /// possible.
296 static void
297 replaceSymbolicStrides(VPlan &Plan, PredicatedScalarEvolution &PSE,
298 const DenseMap<Value *, const SCEV *> &StridesMap);
299
300 /// Drop poison flags from recipes that may generate a poison value that is
301 /// used after vectorization, even when their operands are not poison. Those
302 /// recipes meet the following conditions:
303 /// * Contribute to the address computation of a recipe generating a widen
304 /// memory load/store (VPWidenMemoryInstructionRecipe or
305 /// VPInterleaveRecipe).
306 /// * Such a widen memory load/store is masked, but not with the header mask.
307 static void dropPoisonGeneratingRecipes(VPlan &Plan);
308
309 /// Add a VPCurrentIterationPHIRecipe and related recipes to \p Plan and
310 /// replaces all uses of the canonical IV except for the canonical IV
311 /// increment with a VPCurrentIterationPHIRecipe. The canonical IV is only
312 /// used to control the loop after this transformation.
313 static void
314 addExplicitVectorLength(VPlan &Plan,
315 const std::optional<unsigned> &MaxEVLSafeElements);
316
317 /// Optimize recipes which use an EVL-based header mask to VP intrinsics, for
318 /// example:
319 ///
320 /// %mask = icmp ult step-vector, EVL
321 /// %load = load %ptr, %mask
322 /// -->
323 /// %load = vp.load %ptr, EVL
324 static void optimizeEVLMasks(VPlan &Plan);
325
326 // For each Interleave Group in \p InterleaveGroups replace the Recipes
327 // widening its memory instructions with a single VPInterleaveRecipe at its
328 // insertion point.
329 static void createInterleaveGroups(
330 VPlan &Plan,
331 const SmallPtrSetImpl<const InterleaveGroup<Instruction> *>
332 &InterleaveGroups,
333 const bool &EpilogueAllowed);
334
335 /// Transform widen memory recipes into strided access recipes when legal
336 /// and profitable. Clamps \p Range to maintain consistency with widen
337 /// decisions of \p Plan, and uses \p Ctx to evaluate the cost.
338 static void convertToStridedAccesses(VPlan &Plan,
339 PredicatedScalarEvolution &PSE, Loop &L,
340 VPCostContext &Ctx, VFRange &Range);
341
342 /// Remove dead recipes from \p Plan.
343 static void removeDeadRecipes(VPlan &Plan);
344
345 /// Update \p Plan to account for uncountable early exits by introducing
346 /// appropriate branching logic in the latch that handles early exits and the
347 /// latch exit condition. Multiple exits are handled with a dispatch block
348 /// that determines which exit to take based on lane-by-lane semantics.
349 static void handleUncountableEarlyExits(VPlan &Plan, VPBasicBlock *HeaderVPBB,
350 VPBasicBlock *LatchVPBB,
351 VPBasicBlock *MiddleVPBB,
353
354 /// Replaces the exit condition from
355 /// (branch-on-cond eq CanonicalIVInc, VectorTripCount)
356 /// to
357 /// (branch-on-cond eq AVLNext, 0)
358 static void convertEVLExitCond(VPlan &Plan);
359
360 /// Replace loop regions with explicit CFG.
361 static void dissolveLoopRegions(VPlan &Plan);
362
363 /// Expand BranchOnTwoConds instructions into explicit CFG with
364 /// BranchOnCond instructions. Should be called after dissolveLoopRegions.
365 static void expandBranchOnTwoConds(VPlan &Plan);
366
367 /// Transform loops with variable-length stepping after region
368 /// dissolution.
369 ///
370 /// Once loop regions are replaced with explicit CFG, loops can step with
371 /// variable vector lengths instead of fixed lengths. This transformation:
372 /// * Makes CurrentIteration-Phi concrete.
373 // * Removes CanonicalIV and increment.
374 static void convertToVariableLengthStep(VPlan &Plan);
375
376 /// Lower abstract recipes to concrete ones, that can be codegen'd.
377 static void convertToConcreteRecipes(VPlan &Plan);
378
379 /// This function converts initial recipes to the abstract recipes and clamps
380 /// \p Range based on cost model for following optimizations and cost
381 /// estimations. The converted abstract recipes will lower to concrete
382 /// recipes before codegen.
383 static void convertToAbstractRecipes(VPlan &Plan, VPCostContext &Ctx,
384 VFRange &Range);
385
386 /// Perform instcombine-like simplifications on recipes in \p Plan.
387 static void simplifyRecipes(VPlan &Plan);
388
389 /// Remove BranchOnCond recipes with true or false conditions together with
390 /// removing dead edges to their successors. If \p OnlyLatches is true, only
391 /// process loop latches.
392 static void removeBranchOnConst(VPlan &Plan, bool OnlyLatches = false);
393
394 /// Perform common-subexpression-elimination on \p Plan.
395 static void cse(VPlan &Plan);
396
397 /// If there's a single exit block, optimize its phi recipes that use exiting
398 /// IV values by feeding them precomputed end values instead, possibly taken
399 /// one step backwards.
400 static void optimizeInductionLiveOutUsers(VPlan &Plan,
401 PredicatedScalarEvolution &PSE,
402 bool FoldTail);
403
404 /// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
405 static void materializeBroadcasts(VPlan &Plan);
406
407 /// Hoist predicated loads from the same address to the loop entry block, if
408 /// they are guaranteed to execute on both paths (i.e., in replicate regions
409 /// with complementary masks P and NOT P).
410 static void hoistPredicatedLoads(VPlan &Plan, PredicatedScalarEvolution &PSE,
411 const Loop *L);
412
413 /// Sink predicated stores to the same address with complementary predicates
414 /// (P and NOT P) to an unconditional store with select recipes for the
415 /// stored values. This eliminates branching overhead when all paths
416 /// unconditionally store to the same location.
417 static void sinkPredicatedStores(VPlan &Plan, PredicatedScalarEvolution &PSE,
418 const Loop *L);
419
420 // Materialize vector trip counts for constants early if it can simply be
421 // computed as (Original TC / VF * UF) * VF * UF.
422 static void
423 materializeConstantVectorTripCount(VPlan &Plan, ElementCount BestVF,
424 unsigned BestUF,
425 PredicatedScalarEvolution &PSE);
426
427 /// Materialize vector trip count computations to a set of VPInstructions.
428 /// \p Step is used as the step value for the trip count computation.
429 /// \p MaxRuntimeStep is the maximum possible runtime value of Step, used to
430 /// prove the trip count is divisible by the step for scalable VFs.
431 static void materializeVectorTripCount(
432 VPlan &Plan, VPBasicBlock *VectorPHVPBB, bool TailByMasking,
433 bool RequiresScalarEpilogue, VPValue *Step,
434 std::optional<uint64_t> MaxRuntimeStep = std::nullopt);
435
436 /// Materialize the backedge-taken count to be computed explicitly using
437 /// VPInstructions.
438 static void materializeBackedgeTakenCount(VPlan &Plan,
439 VPBasicBlock *VectorPH);
440
441 /// Add explicit Build[Struct]Vector recipes to Pack multiple scalar values
442 /// into vectors and Unpack recipes to extract scalars from vectors as
443 /// needed.
444 static void materializePacksAndUnpacks(VPlan &Plan);
445
446 /// Materialize UF, VF and VFxUF to be computed explicitly using
447 /// VPInstructions.
448 static void materializeFactors(VPlan &Plan, VPBasicBlock *VectorPH,
449 ElementCount VF);
450
451 /// Attaches the alias-mask to the existing header-mask.
452 static void attachAliasMaskToHeaderMask(VPlan &Plan);
453
454 /// Materializes within the \p AliasCheckVPBB block. Updates the header mask
455 /// of the loop to use the alias mask. Returns the clamped VF.
456 static VPValue *materializeAliasMask(VPlan &Plan,
457 VPBasicBlock *AliasCheckVPBB,
458 ArrayRef<PointerDiffInfo> DiffChecks);
459
460 /// Materializes the alias mask within a check block before the loop. The
461 /// vector loop will only be entered if the clamped VF from the alias mask
462 /// is not scalar.
464 VPlan &Plan, ArrayRef<PointerDiffInfo> DiffChecks, bool HasBranchWeights);
465
466 /// Try to expand VPExpandSCEVRecipes in \p Plan's entry block to
467 /// VPInstructions. Recipes that cannot be expanded (like casts, min/max) are
468 /// kept for later IR-level expansion.
469 static void expandSCEVsToVPInstructions(VPlan &Plan, ScalarEvolution &SE);
470
471 /// Expand remaining VPExpandSCEVRecipes in \p Plan's entry block using
472 /// SCEVExpander. Each VPExpandSCEVRecipe is replaced with a live-in wrapping
473 /// the expanded IR value. A mapping from SCEV expressions to their expanded
474 /// IR value is returned.
475 static DenseMap<const SCEV *, Value *> expandSCEVs(VPlan &Plan,
476 ScalarEvolution &SE);
477
478 /// Try to find a single VF among \p Plan's VFs for which all interleave
479 /// groups (with known minimum VF elements) can be replaced by wide loads and
480 /// stores processing VF elements, if all transformed interleave groups access
481 /// the full vector width (checked via the maximum vector register width). If
482 /// the transformation can be applied, the original \p Plan will be split in
483 /// 2:
484 /// 1. The original Plan with the single VF containing the optimized recipes
485 /// using wide loads instead of interleave groups.
486 /// 2. A new clone which contains all VFs of Plan except the optimized VF.
487 ///
488 /// This effectively is a very simple form of loop-aware SLP, where we use
489 /// interleave groups to identify candidates.
490 static std::unique_ptr<VPlan>
491 narrowInterleaveGroups(VPlan &Plan, const TargetTransformInfo &TTI);
492
493 /// Adapts the vector loop region for tail folding by introducing a header
494 /// mask and conditionally executing the content of the region:
495 ///
496 /// Vector loop region before:
497 /// +-------------------------------------------+
498 /// |%iv = ... |
499 /// |... |
500 /// |%iv.next = add %iv, vfxuf |
501 /// |branch-on-count %iv.next, vector-trip-count|
502 /// +-------------------------------------------+
503 ///
504 /// Vector loop region after:
505 /// +-------------------------------------------+
506 /// |%iv = ... |
507 /// |%wide.iv = widen-canonical-iv ... |
508 /// |%header-mask = icmp ule %wide.iv, BTC |
509 /// |branch-on-cond %header-mask |---+
510 /// +-------------------------------------------+ |
511 /// | |
512 /// v |
513 /// +-------------------------------------------+ |
514 /// | ... | |
515 /// +-------------------------------------------+ |
516 /// | |
517 /// v |
518 /// +-------------------------------------------+ |
519 /// |<phis> = phi [..., ...], [poison, header] |
520 /// |%iv.next = add %iv, vfxuf |<--+
521 /// |branch-on-count %iv.next, vector-trip-count|
522 /// +-------------------------------------------+
523 ///
524 /// Any VPInstruction::ExtractLastLanes are also updated to extract from the
525 /// last active lane of the header mask.
526 static void foldTailByMasking(VPlan &Plan);
527
528 /// Predicate and linearize the control-flow in the only loop region of
529 /// \p Plan.
530 static void introduceMasksAndLinearize(VPlan &Plan);
531
532 /// Replace a VPWidenCanonicalIVRecipe if it is present in \p Plan, with a
533 /// VPWidenIntOrFpInductionRecipe, provided it would not cause additional
534 /// spills for \p VF at unroll factor \p UF.
536 VPlan &Plan, ScalarEvolution &SE, const TargetTransformInfo &TTI,
538 unsigned UF, const SmallPtrSetImpl<const Value *> &ValuesToIgnore);
539
540 /// Add branch weight metadata, if the \p Plan's middle block is terminated by
541 /// a BranchOnCond recipe.
542 static void
543 addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF,
544 std::optional<unsigned> VScaleForTuning);
545
546 /// Adjust first-order recurrence users in the middle block: create
547 /// penultimate element extracts for LCSSA phi users, and handle penultimate
548 /// extracts of the last active lane edge.
549 static void adjustFirstOrderRecurrenceMiddleUsers(VPlan &Plan,
550 VFRange &Range);
551
552 /// Optimize FindLast reductions selecting IVs (or expressions of IVs) by
553 /// converting them to FindIV reductions, if their IV range excludes a
554 /// suitable sentinel value. For expressions of IVs, the expression is sunk
555 /// to the middle block.
556 static void optimizeFindIVReductions(VPlan &Plan,
557 PredicatedScalarEvolution &PSE, Loop &L);
558
559 /// Detect and create partial reduction recipes for scaled reductions in
560 /// \p Plan. Must be called after recipe construction. If partial reductions
561 /// are only valid for a subset of VFs in Range, Range.End is updated.
562 static void createPartialReductions(VPlan &Plan, VPCostContext &CostCtx,
563 VFRange &Range);
564
565 /// Convert load/store VPInstructions in \p Plan into widened or replicate
566 /// recipes. Non load/store input instructions are left unchanged.
567 static void makeMemOpWideningDecisions(VPlan &Plan, VFRange &Range,
568 VPRecipeBuilder &RecipeBuilder);
569
570 /// Make VPlan-based scalarization decision prior to delegating to the ones
571 /// made by the legacy CM. Only transforms "usesFirstLaneOnly` def-use chains
572 /// enabled by prior widening of consecutive memory operations for now.
573 static void makeScalarizationDecisions(VPlan &Plan, VFRange &Range);
574
575 /// Convert call VPInstructions in \p Plan into widened call, vector
576 /// intrinsic or replicate recipes based on a cost comparison via \p CostCtx.
577 static void makeCallWideningDecisions(VPlan &Plan, VFRange &Range,
578 VPRecipeBuilder &RecipeBuilder,
579 VPCostContext &CostCtx);
580};
581
582} // namespace llvm
583
584#endif // LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static constexpr uint32_t MinItersBypassWeights[]
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
const SmallVectorImpl< MachineOperand > & Cond
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This pass exposes codegen information to IR-level passes.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const char PassName[]
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
A struct for saving information about induction variables.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
The optimization diagnostic interface.
Pass interface - Implemented by all 'passes'.
Definition Pass.h:99
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
The main scalar evolution driver.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
VPlan-based builder utility analogous to IRBuilder.
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4515
Helper class to create VPRecipies from IR instructions.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition VPlan.cpp:812
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4696
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1068
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4841
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI_FOR_TEST cl::opt< bool > VerifyEachVPlan
LLVM_ABI_FOR_TEST cl::opt< bool > VPlanPrintAfterAll
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
UncountableExitStyle
Different methods of handling early exits.
Definition VPlan.h:79
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI_FOR_TEST cl::list< std::string > VPlanPrintAfterPasses
TargetTransformInfo TTI
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI_FOR_TEST bool verifyVPlanIsValid(const VPlan &Plan)
Verify invariants for general VPlans.
LLVM_ABI_FOR_TEST cl::opt< bool > VPlanPrintVectorRegionScope
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
static void handleUncountableEarlyExits(VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, VPBasicBlock *MiddleVPBB, UncountableExitStyle Style)
Update Plan to account for uncountable early exits by introducing appropriate branching logic in the ...
static VPValue * materializeAliasMask(VPlan &Plan, VPBasicBlock *AliasCheckVPBB, ArrayRef< PointerDiffInfo > DiffChecks)
Materializes within the AliasCheckVPBB block.
static LLVM_ABI_FOR_TEST bool tryToConvertVPInstructionsToVPRecipes(VPlan &Plan, const TargetLibraryInfo &TLI)
Replaces the VPInstructions in Plan with corresponding widen recipes.
static void makeMemOpWideningDecisions(VPlan &Plan, VFRange &Range, VPRecipeBuilder &RecipeBuilder)
Convert load/store VPInstructions in Plan into widened or replicate recipes.
static decltype(auto) runPass(StringRef PassName, PassTy &&Pass, VPlan &Plan, ArgsTy &&...Args)
Helper to run a VPlan pass Pass on VPlan, forwarding extra arguments to the pass.
static void expandSCEVsToVPInstructions(VPlan &Plan, ScalarEvolution &SE)
Try to expand VPExpandSCEVRecipes in Plan's entry block to VPInstructions.
static bool createHeaderPhiRecipes(VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop, const MapVector< PHINode *, InductionDescriptor > &Inductions, const MapVector< PHINode *, RecurrenceDescriptor > &Reductions, const SmallPtrSetImpl< const PHINode * > &FixedOrderRecurrences, const SmallPtrSetImpl< PHINode * > &InLoopReductions, bool AllowReordering)
Replace VPPhi recipes in Plan's header with corresponding VPHeaderPHIRecipe subclasses for inductions...
static void materializeBroadcasts(VPlan &Plan)
Add explicit broadcasts for live-ins and VPValues defined in Plan's entry block if they are used as v...
static void materializePacksAndUnpacks(VPlan &Plan)
Add explicit Build[Struct]Vector recipes to Pack multiple scalar values into vectors and Unpack recip...
static void createInterleaveGroups(VPlan &Plan, const SmallPtrSetImpl< const InterleaveGroup< Instruction > * > &InterleaveGroups, const bool &EpilogueAllowed)
static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF, PredicatedScalarEvolution &PSE)
Try to simplify VPInstruction::ExplicitVectorLength recipes when the AVL is known to be <= VF,...
static void removeBranchOnConst(VPlan &Plan, bool OnlyLatches=false)
Remove BranchOnCond recipes with true or false conditions together with removing dead edges to their ...
static void introduceMasksAndLinearize(VPlan &Plan)
Predicate and linearize the control-flow in the only loop region of Plan.
static void materializeFactors(VPlan &Plan, VPBasicBlock *VectorPH, ElementCount VF)
Materialize UF, VF and VFxUF to be computed explicitly using VPInstructions.
static void foldTailByMasking(VPlan &Plan)
Adapts the vector loop region for tail folding by introducing a header mask and conditionally executi...
static void materializeBackedgeTakenCount(VPlan &Plan, VPBasicBlock *VectorPH)
Materialize the backedge-taken count to be computed explicitly using VPInstructions.
static void addMinimumVectorEpilogueIterationCheck(VPlan &Plan, Value *VectorTripCount, bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep, unsigned EpilogueLoopStep, ScalarEvolution &SE)
Add a check to Plan to see if the epilogue vector loop should be executed.
static void addActiveLaneMask(VPlan &Plan, bool UseActiveLaneMaskForControlFlow)
Replace (ICMP_ULE, wide canonical IV, backedge-taken-count) checks with an (active-lane-mask recipe,...
static bool handleMultiUseReductions(VPlan &Plan, OptimizationRemarkEmitter *ORE, Loop *TheLoop)
Try to legalize reductions with multiple in-loop uses.
static void replaceWideCanonicalIVWithWideIV(VPlan &Plan, ScalarEvolution &SE, const TargetTransformInfo &TTI, TargetTransformInfo::TargetCostKind CostKind, ElementCount VF, unsigned UF, const SmallPtrSetImpl< const Value * > &ValuesToIgnore)
Replace a VPWidenCanonicalIVRecipe if it is present in Plan, with a VPWidenIntOrFpInductionRecipe,...
static void createAndOptimizeReplicateRegions(VPlan &Plan)
Wrap predicated VPReplicateRecipes with a mask operand in an if-then region block and remove the mask...
static void convertToVariableLengthStep(VPlan &Plan)
Transform loops with variable-length stepping after region dissolution.
static void addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF, std::optional< unsigned > VScaleForTuning)
Add branch weight metadata, if the Plan's middle block is terminated by a BranchOnCond recipe.
static std::unique_ptr< VPlan > narrowInterleaveGroups(VPlan &Plan, const TargetTransformInfo &TTI)
Try to find a single VF among Plan's VFs for which all interleave groups (with known minimum VF eleme...
static bool handleFindLastReductions(VPlan &Plan)
Check if Plan contains any FindLast reductions.
static void createInLoopReductionRecipes(VPlan &Plan, ElementCount MinVF)
Create VPReductionRecipes for in-loop reductions.
static void materializeAliasMaskCheckBlock(VPlan &Plan, ArrayRef< PointerDiffInfo > DiffChecks, bool HasBranchWeights)
Materializes the alias mask within a check block before the loop.
static void unrollByUF(VPlan &Plan, unsigned UF)
Explicitly unroll Plan by UF.
static DenseMap< const SCEV *, Value * > expandSCEVs(VPlan &Plan, ScalarEvolution &SE)
Expand remaining VPExpandSCEVRecipes in Plan's entry block using SCEVExpander.
static void convertToConcreteRecipes(VPlan &Plan)
Lower abstract recipes to concrete ones, that can be codegen'd.
static LLVM_ABI_FOR_TEST void createLoopRegions(VPlan &Plan, DebugLoc DL)
Replace loops in Plan's flat CFG with VPRegionBlocks, turning Plan's flat CFG into a hierarchical CFG...
static LLVM_ABI_FOR_TEST std::unique_ptr< VPlan > buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, PredicatedScalarEvolution &PSE, LoopVersioning *LVer=nullptr)
Create a base VPlan0, serving as the common starting point for all later candidates.
static void expandBranchOnTwoConds(VPlan &Plan)
Expand BranchOnTwoConds instructions into explicit CFG with BranchOnCond instructions.
static void materializeVectorTripCount(VPlan &Plan, VPBasicBlock *VectorPHVPBB, bool TailByMasking, bool RequiresScalarEpilogue, VPValue *Step, std::optional< uint64_t > MaxRuntimeStep=std::nullopt)
Materialize vector trip count computations to a set of VPInstructions.
static void hoistPredicatedLoads(VPlan &Plan, PredicatedScalarEvolution &PSE, const Loop *L)
Hoist predicated loads from the same address to the loop entry block, if they are guaranteed to execu...
static bool mergeBlocksIntoPredecessors(VPlan &Plan)
Remove redundant VPBasicBlocks by merging them into their single predecessor if the latter has a sing...
static void attachAliasMaskToHeaderMask(VPlan &Plan)
Attaches the alias-mask to the existing header-mask.
static void optimizeFindIVReductions(VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &L)
Optimize FindLast reductions selecting IVs (or expressions of IVs) by converting them to FindIV reduc...
static void convertToAbstractRecipes(VPlan &Plan, VPCostContext &Ctx, VFRange &Range)
This function converts initial recipes to the abstract recipes and clamps Range based on cost model f...
static void materializeConstantVectorTripCount(VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE)
static void makeScalarizationDecisions(VPlan &Plan, VFRange &Range)
Make VPlan-based scalarization decision prior to delegating to the ones made by the legacy CM.
static void addExplicitVectorLength(VPlan &Plan, const std::optional< unsigned > &MaxEVLSafeElements)
Add a VPCurrentIterationPHIRecipe and related recipes to Plan and replaces all uses of the canonical ...
static void makeCallWideningDecisions(VPlan &Plan, VFRange &Range, VPRecipeBuilder &RecipeBuilder, VPCostContext &CostCtx)
Convert call VPInstructions in Plan into widened call, vector intrinsic or replicate recipes based on...
static void adjustFirstOrderRecurrenceMiddleUsers(VPlan &Plan, VFRange &Range)
Adjust first-order recurrence users in the middle block: create penultimate element extracts for LCSS...
static void optimizeEVLMasks(VPlan &Plan)
Optimize recipes which use an EVL-based header mask to VP intrinsics, for example:
static LLVM_ABI_FOR_TEST bool handleEarlyExits(VPlan &Plan, UncountableExitStyle Style, Loop *TheLoop, PredicatedScalarEvolution &PSE, DominatorTree &DT, AssumptionCache *AC)
Update Plan to account for all early exits.
static void replaceSymbolicStrides(VPlan &Plan, PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &StridesMap)
Replace symbolic strides from StridesMap in Plan with constants when possible.
static bool handleMaxMinNumReductions(VPlan &Plan)
Check if Plan contains any FMaxNum or FMinNum reductions.
static void removeDeadRecipes(VPlan &Plan)
Remove dead recipes from Plan.
static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock, bool AddBranchWeights)
static void simplifyRecipes(VPlan &Plan)
Perform instcombine-like simplifications on recipes in Plan.
static void sinkPredicatedStores(VPlan &Plan, PredicatedScalarEvolution &PSE, const Loop *L)
Sink predicated stores to the same address with complementary predicates (P and NOT P) to an uncondit...
static bool finalizeSCEVPredicates(VPlan &Plan, PredicatedScalarEvolution &PSE, bool OptForSize, unsigned SCEVCheckThreshold, OptimizationRemarkEmitter *ORE, Loop *TheLoop)
Finalize SCEV predicates by adding induction predicates from Plan to PSE and checking constraints.
static void replicateByVF(VPlan &Plan, ElementCount VF)
Replace replicating VPReplicateRecipe, VPScalarIVStepsRecipe and VPInstruction in Plan with VF single...
static void convertToStridedAccesses(VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &L, VPCostContext &Ctx, VFRange &Range)
Transform widen memory recipes into strided access recipes when legal and profitable.
static void addIterationCountCheckBlock(VPlan &Plan, ElementCount VF, unsigned UF, bool RequiresScalarEpilogue, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, PredicatedScalarEvolution &PSE)
Add a new check block before the vector preheader to Plan to check if the main vector loop should be ...
static void clearReductionWrapFlags(VPlan &Plan)
Clear NSW/NUW flags from reduction instructions if necessary.
static void optimizeInductionLiveOutUsers(VPlan &Plan, PredicatedScalarEvolution &PSE, bool FoldTail)
If there's a single exit block, optimize its phi recipes that use exiting IV values by feeding them p...
static void createPartialReductions(VPlan &Plan, VPCostContext &CostCtx, VFRange &Range)
Detect and create partial reduction recipes for scaled reductions in Plan.
static void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue, bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, PredicatedScalarEvolution &PSE, VPBasicBlock *CheckBlock)
static void cse(VPlan &Plan)
Perform common-subexpression-elimination on Plan.
static void attachVPCheckBlock(VPlan &Plan, VPValue *Cond, VPBasicBlock *CheckBlock, bool AddBranchWeights)
Wrap runtime check block CheckBlock in a VPIRBB and Cond in a VPValue and connect the block to Plan,...
static LLVM_ABI_FOR_TEST void optimize(VPlan &Plan)
Apply VPlan-to-VPlan optimizations to Plan, including induction recipe optimizations,...
static void dissolveLoopRegions(VPlan &Plan)
Replace loop regions with explicit CFG.
static void truncateToMinimalBitwidths(VPlan &Plan, const MapVector< Instruction *, uint64_t > &MinBWs)
Insert truncates and extends for any truncated recipe.
static void dropPoisonGeneratingRecipes(VPlan &Plan)
Drop poison flags from recipes that may generate a poison value that is used after vectorization,...
static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE)
Optimize Plan based on BestVF and BestUF.
static void convertEVLExitCond(VPlan &Plan)
Replaces the exit condition from (branch-on-cond eq CanonicalIVInc, VectorTripCount) to (branch-on-co...
static LLVM_ABI_FOR_TEST void addMiddleCheck(VPlan &Plan, bool TailFolded)
If a check is needed to guard executing the scalar epilogue loop, it will be added to the middle bloc...