LLVM 18.0.0git
Scalarizer.cpp
Go to the documentation of this file.
1//===- Scalarizer.cpp - Scalarize vector operations -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass converts vector operations into scalar operations (or, optionally,
10// operations on smaller vector widths), in order to expose optimization
11// opportunities on the individual scalar operations.
12// It is mainly intended for targets that do not have vector units, but it
13// may also be useful for revectorizing code to different vector widths.
14//
15//===----------------------------------------------------------------------===//
16
20#include "llvm/ADT/Twine.h"
22#include "llvm/IR/Argument.h"
23#include "llvm/IR/BasicBlock.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DataLayout.h"
27#include "llvm/IR/Dominators.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/InstrTypes.h"
32#include "llvm/IR/Instruction.h"
34#include "llvm/IR/Intrinsics.h"
35#include "llvm/IR/LLVMContext.h"
36#include "llvm/IR/Module.h"
37#include "llvm/IR/Type.h"
38#include "llvm/IR/Value.h"
40#include "llvm/Pass.h"
44#include <cassert>
45#include <cstdint>
46#include <iterator>
47#include <map>
48#include <utility>
49
50using namespace llvm;
51
52#define DEBUG_TYPE "scalarizer"
53
55 "scalarize-variable-insert-extract", cl::init(true), cl::Hidden,
56 cl::desc("Allow the scalarizer pass to scalarize "
57 "insertelement/extractelement with variable index"));
58
59// This is disabled by default because having separate loads and stores
60// makes it more likely that the -combiner-alias-analysis limits will be
61// reached.
63 "scalarize-load-store", cl::init(false), cl::Hidden,
64 cl::desc("Allow the scalarizer pass to scalarize loads and store"));
65
66// Split vectors larger than this size into fragments, where each fragment is
67// either a vector no larger than this size or a scalar.
68//
69// Instructions with operands or results of different sizes that would be split
70// into a different number of fragments are currently left as-is.
72 "scalarize-min-bits", cl::init(0), cl::Hidden,
73 cl::desc("Instruct the scalarizer pass to attempt to keep values of a "
74 "minimum number of bits"));
75
76namespace {
77
78BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) {
79 BasicBlock *BB = Itr->getParent();
80 if (isa<PHINode>(Itr))
81 Itr = BB->getFirstInsertionPt();
82 if (Itr != BB->end())
83 Itr = skipDebugIntrinsics(Itr);
84 return Itr;
85}
86
87// Used to store the scattered form of a vector.
88using ValueVector = SmallVector<Value *, 8>;
89
90// Used to map a vector Value and associated type to its scattered form.
91// The associated type is only non-null for pointer values that are "scattered"
92// when used as pointer operands to load or store.
93//
94// We use std::map because we want iterators to persist across insertion and
95// because the values are relatively large.
96using ScatterMap = std::map<std::pair<Value *, Type *>, ValueVector>;
97
98// Lists Instructions that have been replaced with scalar implementations,
99// along with a pointer to their scattered forms.
101
102struct VectorSplit {
103 // The type of the vector.
104 FixedVectorType *VecTy = nullptr;
105
106 // The number of elements packed in a fragment (other than the remainder).
107 unsigned NumPacked = 0;
108
109 // The number of fragments (scalars or smaller vectors) into which the vector
110 // shall be split.
111 unsigned NumFragments = 0;
112
113 // The type of each complete fragment.
114 Type *SplitTy = nullptr;
115
116 // The type of the remainder (last) fragment; null if all fragments are
117 // complete.
118 Type *RemainderTy = nullptr;
119
120 Type *getFragmentType(unsigned I) const {
121 return RemainderTy && I == NumFragments - 1 ? RemainderTy : SplitTy;
122 }
123};
124
125// Provides a very limited vector-like interface for lazily accessing one
126// component of a scattered vector or vector pointer.
127class Scatterer {
128public:
129 Scatterer() = default;
130
131 // Scatter V into Size components. If new instructions are needed,
132 // insert them before BBI in BB. If Cache is nonnull, use it to cache
133 // the results.
134 Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
135 const VectorSplit &VS, ValueVector *cachePtr = nullptr);
136
137 // Return component I, creating a new Value for it if necessary.
138 Value *operator[](unsigned I);
139
140 // Return the number of components.
141 unsigned size() const { return VS.NumFragments; }
142
143private:
144 BasicBlock *BB;
146 Value *V;
147 VectorSplit VS;
148 bool IsPointer;
149 ValueVector *CachePtr;
150 ValueVector Tmp;
151};
152
153// FCmpSplitter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp
154// called Name that compares X and Y in the same way as FCI.
155struct FCmpSplitter {
156 FCmpSplitter(FCmpInst &fci) : FCI(fci) {}
157
158 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
159 const Twine &Name) const {
160 return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name);
161 }
162
163 FCmpInst &FCI;
164};
165
166// ICmpSplitter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp
167// called Name that compares X and Y in the same way as ICI.
168struct ICmpSplitter {
169 ICmpSplitter(ICmpInst &ici) : ICI(ici) {}
170
171 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
172 const Twine &Name) const {
173 return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name);
174 }
175
176 ICmpInst &ICI;
177};
178
179// UnarySplitter(UO)(Builder, X, Name) uses Builder to create
180// a unary operator like UO called Name with operand X.
181struct UnarySplitter {
182 UnarySplitter(UnaryOperator &uo) : UO(uo) {}
183
184 Value *operator()(IRBuilder<> &Builder, Value *Op, const Twine &Name) const {
185 return Builder.CreateUnOp(UO.getOpcode(), Op, Name);
186 }
187
188 UnaryOperator &UO;
189};
190
191// BinarySplitter(BO)(Builder, X, Y, Name) uses Builder to create
192// a binary operator like BO called Name with operands X and Y.
193struct BinarySplitter {
194 BinarySplitter(BinaryOperator &bo) : BO(bo) {}
195
196 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
197 const Twine &Name) const {
198 return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name);
199 }
200
201 BinaryOperator &BO;
202};
203
204// Information about a load or store that we're scalarizing.
205struct VectorLayout {
206 VectorLayout() = default;
207
208 // Return the alignment of fragment Frag.
209 Align getFragmentAlign(unsigned Frag) {
210 return commonAlignment(VecAlign, Frag * SplitSize);
211 }
212
213 // The split of the underlying vector type.
214 VectorSplit VS;
215
216 // The alignment of the vector.
217 Align VecAlign;
218
219 // The size of each (non-remainder) fragment in bytes.
220 uint64_t SplitSize = 0;
221};
222
223/// Concatenate the given fragments to a single vector value of the type
224/// described in @p VS.
225static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments,
226 const VectorSplit &VS, Twine Name) {
227 unsigned NumElements = VS.VecTy->getNumElements();
228 SmallVector<int> ExtendMask;
229 SmallVector<int> InsertMask;
230
231 if (VS.NumPacked > 1) {
232 // Prepare the shufflevector masks once and re-use them for all
233 // fragments.
234 ExtendMask.resize(NumElements, -1);
235 for (unsigned I = 0; I < VS.NumPacked; ++I)
236 ExtendMask[I] = I;
237
238 InsertMask.resize(NumElements);
239 for (unsigned I = 0; I < NumElements; ++I)
240 InsertMask[I] = I;
241 }
242
243 Value *Res = PoisonValue::get(VS.VecTy);
244 for (unsigned I = 0; I < VS.NumFragments; ++I) {
245 Value *Fragment = Fragments[I];
246
247 unsigned NumPacked = VS.NumPacked;
248 if (I == VS.NumFragments - 1 && VS.RemainderTy) {
249 if (auto *RemVecTy = dyn_cast<FixedVectorType>(VS.RemainderTy))
250 NumPacked = RemVecTy->getNumElements();
251 else
252 NumPacked = 1;
253 }
254
255 if (NumPacked == 1) {
256 Res = Builder.CreateInsertElement(Res, Fragment, I * VS.NumPacked,
257 Name + ".upto" + Twine(I));
258 } else {
259 Fragment = Builder.CreateShuffleVector(Fragment, Fragment, ExtendMask);
260 if (I == 0) {
261 Res = Fragment;
262 } else {
263 for (unsigned J = 0; J < NumPacked; ++J)
264 InsertMask[I * VS.NumPacked + J] = NumElements + J;
265 Res = Builder.CreateShuffleVector(Res, Fragment, InsertMask,
266 Name + ".upto" + Twine(I));
267 for (unsigned J = 0; J < NumPacked; ++J)
268 InsertMask[I * VS.NumPacked + J] = I * VS.NumPacked + J;
269 }
270 }
271 }
272
273 return Res;
274}
275
276template <typename T>
277T getWithDefaultOverride(const cl::opt<T> &ClOption,
278 const std::optional<T> &DefaultOverride) {
279 return ClOption.getNumOccurrences() ? ClOption
280 : DefaultOverride.value_or(ClOption);
281}
282
283class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
284public:
285 ScalarizerVisitor(unsigned ParallelLoopAccessMDKind, DominatorTree *DT,
287 : ParallelLoopAccessMDKind(ParallelLoopAccessMDKind), DT(DT),
288 ScalarizeVariableInsertExtract(
289 getWithDefaultOverride(ClScalarizeVariableInsertExtract,
290 Options.ScalarizeVariableInsertExtract)),
291 ScalarizeLoadStore(getWithDefaultOverride(ClScalarizeLoadStore,
292 Options.ScalarizeLoadStore)),
293 ScalarizeMinBits(getWithDefaultOverride(ClScalarizeMinBits,
294 Options.ScalarizeMinBits)) {}
295
296 bool visit(Function &F);
297
298 // InstVisitor methods. They return true if the instruction was scalarized,
299 // false if nothing changed.
300 bool visitInstruction(Instruction &I) { return false; }
301 bool visitSelectInst(SelectInst &SI);
302 bool visitICmpInst(ICmpInst &ICI);
303 bool visitFCmpInst(FCmpInst &FCI);
307 bool visitCastInst(CastInst &CI);
308 bool visitBitCastInst(BitCastInst &BCI);
312 bool visitPHINode(PHINode &PHI);
313 bool visitLoadInst(LoadInst &LI);
314 bool visitStoreInst(StoreInst &SI);
315 bool visitCallInst(CallInst &ICI);
316 bool visitFreezeInst(FreezeInst &FI);
317
318private:
319 Scatterer scatter(Instruction *Point, Value *V, const VectorSplit &VS);
320 void gather(Instruction *Op, const ValueVector &CV, const VectorSplit &VS);
321 void replaceUses(Instruction *Op, Value *CV);
322 bool canTransferMetadata(unsigned Kind);
323 void transferMetadataAndIRFlags(Instruction *Op, const ValueVector &CV);
324 std::optional<VectorSplit> getVectorSplit(Type *Ty);
325 std::optional<VectorLayout> getVectorLayout(Type *Ty, Align Alignment,
326 const DataLayout &DL);
327 bool finish();
328
329 template<typename T> bool splitUnary(Instruction &, const T &);
330 template<typename T> bool splitBinary(Instruction &, const T &);
331
332 bool splitCall(CallInst &CI);
333
334 ScatterMap Scattered;
335 GatherList Gathered;
336 bool Scalarized;
337
338 SmallVector<WeakTrackingVH, 32> PotentiallyDeadInstrs;
339
340 unsigned ParallelLoopAccessMDKind;
341
342 DominatorTree *DT;
343
344 const bool ScalarizeVariableInsertExtract;
345 const bool ScalarizeLoadStore;
346 const unsigned ScalarizeMinBits;
347};
348
349class ScalarizerLegacyPass : public FunctionPass {
350public:
351 static char ID;
352
353 ScalarizerLegacyPass() : FunctionPass(ID) {
355 }
356
357 bool runOnFunction(Function &F) override;
358
359 void getAnalysisUsage(AnalysisUsage& AU) const override {
362 }
363};
364
365} // end anonymous namespace
366
367char ScalarizerLegacyPass::ID = 0;
368INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer",
369 "Scalarize vector operations", false, false)
371INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer",
372 "Scalarize vector operations", false, false)
373
374Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
375 const VectorSplit &VS, ValueVector *cachePtr)
376 : BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) {
377 IsPointer = V->getType()->isPointerTy();
378 if (!CachePtr) {
379 Tmp.resize(VS.NumFragments, nullptr);
380 } else {
381 assert((CachePtr->empty() || VS.NumFragments == CachePtr->size() ||
382 IsPointer) &&
383 "Inconsistent vector sizes");
384 if (VS.NumFragments > CachePtr->size())
385 CachePtr->resize(VS.NumFragments, nullptr);
386 }
387}
388
389// Return fragment Frag, creating a new Value for it if necessary.
390Value *Scatterer::operator[](unsigned Frag) {
391 ValueVector &CV = CachePtr ? *CachePtr : Tmp;
392 // Try to reuse a previous value.
393 if (CV[Frag])
394 return CV[Frag];
395 IRBuilder<> Builder(BB, BBI);
396 if (IsPointer) {
397 if (Frag == 0)
398 CV[Frag] = V;
399 else
400 CV[Frag] = Builder.CreateConstGEP1_32(VS.SplitTy, V, Frag,
401 V->getName() + ".i" + Twine(Frag));
402 return CV[Frag];
403 }
404
405 Type *FragmentTy = VS.getFragmentType(Frag);
406
407 if (auto *VecTy = dyn_cast<FixedVectorType>(FragmentTy)) {
409 for (unsigned J = 0; J < VecTy->getNumElements(); ++J)
410 Mask.push_back(Frag * VS.NumPacked + J);
411 CV[Frag] =
412 Builder.CreateShuffleVector(V, PoisonValue::get(V->getType()), Mask,
413 V->getName() + ".i" + Twine(Frag));
414 } else {
415 // Search through a chain of InsertElementInsts looking for element Frag.
416 // Record other elements in the cache. The new V is still suitable
417 // for all uncached indices.
418 while (true) {
419 InsertElementInst *Insert = dyn_cast<InsertElementInst>(V);
420 if (!Insert)
421 break;
422 ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2));
423 if (!Idx)
424 break;
425 unsigned J = Idx->getZExtValue();
426 V = Insert->getOperand(0);
427 if (Frag * VS.NumPacked == J) {
428 CV[Frag] = Insert->getOperand(1);
429 return CV[Frag];
430 }
431
432 if (VS.NumPacked == 1 && !CV[J]) {
433 // Only cache the first entry we find for each index we're not actively
434 // searching for. This prevents us from going too far up the chain and
435 // caching incorrect entries.
436 CV[J] = Insert->getOperand(1);
437 }
438 }
439 CV[Frag] = Builder.CreateExtractElement(V, Frag * VS.NumPacked,
440 V->getName() + ".i" + Twine(Frag));
441 }
442
443 return CV[Frag];
444}
445
446bool ScalarizerLegacyPass::runOnFunction(Function &F) {
447 if (skipFunction(F))
448 return false;
449
450 Module &M = *F.getParent();
451 unsigned ParallelLoopAccessMDKind =
452 M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
453 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
454 ScalarizerVisitor Impl(ParallelLoopAccessMDKind, DT, ScalarizerPassOptions());
455 return Impl.visit(F);
456}
457
459 return new ScalarizerLegacyPass();
460}
461
462bool ScalarizerVisitor::visit(Function &F) {
463 assert(Gathered.empty() && Scattered.empty());
464
465 Scalarized = false;
466
467 // To ensure we replace gathered components correctly we need to do an ordered
468 // traversal of the basic blocks in the function.
469 ReversePostOrderTraversal<BasicBlock *> RPOT(&F.getEntryBlock());
470 for (BasicBlock *BB : RPOT) {
471 for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
472 Instruction *I = &*II;
473 bool Done = InstVisitor::visit(I);
474 ++II;
475 if (Done && I->getType()->isVoidTy())
476 I->eraseFromParent();
477 }
478 }
479 return finish();
480}
481
482// Return a scattered form of V that can be accessed by Point. V must be a
483// vector or a pointer to a vector.
484Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V,
485 const VectorSplit &VS) {
486 if (Argument *VArg = dyn_cast<Argument>(V)) {
487 // Put the scattered form of arguments in the entry block,
488 // so that it can be used everywhere.
489 Function *F = VArg->getParent();
490 BasicBlock *BB = &F->getEntryBlock();
491 return Scatterer(BB, BB->begin(), V, VS, &Scattered[{V, VS.SplitTy}]);
492 }
493 if (Instruction *VOp = dyn_cast<Instruction>(V)) {
494 // When scalarizing PHI nodes we might try to examine/rewrite InsertElement
495 // nodes in predecessors. If those predecessors are unreachable from entry,
496 // then the IR in those blocks could have unexpected properties resulting in
497 // infinite loops in Scatterer::operator[]. By simply treating values
498 // originating from instructions in unreachable blocks as undef we do not
499 // need to analyse them further.
500 if (!DT->isReachableFromEntry(VOp->getParent()))
501 return Scatterer(Point->getParent(), Point->getIterator(),
502 PoisonValue::get(V->getType()), VS);
503 // Put the scattered form of an instruction directly after the
504 // instruction, skipping over PHI nodes and debug intrinsics.
505 BasicBlock *BB = VOp->getParent();
506 return Scatterer(
507 BB, skipPastPhiNodesAndDbg(std::next(BasicBlock::iterator(VOp))), V, VS,
508 &Scattered[{V, VS.SplitTy}]);
509 }
510 // In the fallback case, just put the scattered before Point and
511 // keep the result local to Point.
512 return Scatterer(Point->getParent(), Point->getIterator(), V, VS);
513}
514
515// Replace Op with the gathered form of the components in CV. Defer the
516// deletion of Op and creation of the gathered form to the end of the pass,
517// so that we can avoid creating the gathered form if all uses of Op are
518// replaced with uses of CV.
519void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV,
520 const VectorSplit &VS) {
521 transferMetadataAndIRFlags(Op, CV);
522
523 // If we already have a scattered form of Op (created from ExtractElements
524 // of Op itself), replace them with the new form.
525 ValueVector &SV = Scattered[{Op, VS.SplitTy}];
526 if (!SV.empty()) {
527 for (unsigned I = 0, E = SV.size(); I != E; ++I) {
528 Value *V = SV[I];
529 if (V == nullptr || SV[I] == CV[I])
530 continue;
531
532 Instruction *Old = cast<Instruction>(V);
533 if (isa<Instruction>(CV[I]))
534 CV[I]->takeName(Old);
535 Old->replaceAllUsesWith(CV[I]);
536 PotentiallyDeadInstrs.emplace_back(Old);
537 }
538 }
539 SV = CV;
540 Gathered.push_back(GatherList::value_type(Op, &SV));
541}
542
543// Replace Op with CV and collect Op has a potentially dead instruction.
544void ScalarizerVisitor::replaceUses(Instruction *Op, Value *CV) {
545 if (CV != Op) {
546 Op->replaceAllUsesWith(CV);
547 PotentiallyDeadInstrs.emplace_back(Op);
548 Scalarized = true;
549 }
550}
551
552// Return true if it is safe to transfer the given metadata tag from
553// vector to scalar instructions.
554bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) {
555 return (Tag == LLVMContext::MD_tbaa
556 || Tag == LLVMContext::MD_fpmath
557 || Tag == LLVMContext::MD_tbaa_struct
558 || Tag == LLVMContext::MD_invariant_load
559 || Tag == LLVMContext::MD_alias_scope
560 || Tag == LLVMContext::MD_noalias
561 || Tag == ParallelLoopAccessMDKind
562 || Tag == LLVMContext::MD_access_group);
563}
564
565// Transfer metadata from Op to the instructions in CV if it is known
566// to be safe to do so.
567void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *Op,
568 const ValueVector &CV) {
570 Op->getAllMetadataOtherThanDebugLoc(MDs);
571 for (unsigned I = 0, E = CV.size(); I != E; ++I) {
572 if (Instruction *New = dyn_cast<Instruction>(CV[I])) {
573 for (const auto &MD : MDs)
574 if (canTransferMetadata(MD.first))
575 New->setMetadata(MD.first, MD.second);
576 New->copyIRFlags(Op);
577 if (Op->getDebugLoc() && !New->getDebugLoc())
578 New->setDebugLoc(Op->getDebugLoc());
579 }
580 }
581}
582
583// Determine how Ty is split, if at all.
584std::optional<VectorSplit> ScalarizerVisitor::getVectorSplit(Type *Ty) {
585 VectorSplit Split;
586 Split.VecTy = dyn_cast<FixedVectorType>(Ty);
587 if (!Split.VecTy)
588 return {};
589
590 unsigned NumElems = Split.VecTy->getNumElements();
591 Type *ElemTy = Split.VecTy->getElementType();
592
593 if (NumElems == 1 || ElemTy->isPointerTy() ||
594 2 * ElemTy->getScalarSizeInBits() > ScalarizeMinBits) {
595 Split.NumPacked = 1;
596 Split.NumFragments = NumElems;
597 Split.SplitTy = ElemTy;
598 } else {
599 Split.NumPacked = ScalarizeMinBits / ElemTy->getScalarSizeInBits();
600 if (Split.NumPacked >= NumElems)
601 return {};
602
603 Split.NumFragments = divideCeil(NumElems, Split.NumPacked);
604 Split.SplitTy = FixedVectorType::get(ElemTy, Split.NumPacked);
605
606 unsigned RemainderElems = NumElems % Split.NumPacked;
607 if (RemainderElems > 1)
608 Split.RemainderTy = FixedVectorType::get(ElemTy, RemainderElems);
609 else if (RemainderElems == 1)
610 Split.RemainderTy = ElemTy;
611 }
612
613 return Split;
614}
615
616// Try to fill in Layout from Ty, returning true on success. Alignment is
617// the alignment of the vector, or std::nullopt if the ABI default should be
618// used.
619std::optional<VectorLayout>
620ScalarizerVisitor::getVectorLayout(Type *Ty, Align Alignment,
621 const DataLayout &DL) {
622 std::optional<VectorSplit> VS = getVectorSplit(Ty);
623 if (!VS)
624 return {};
625
626 VectorLayout Layout;
627 Layout.VS = *VS;
628 // Check that we're dealing with full-byte fragments.
629 if (!DL.typeSizeEqualsStoreSize(VS->SplitTy) ||
630 (VS->RemainderTy && !DL.typeSizeEqualsStoreSize(VS->RemainderTy)))
631 return {};
632 Layout.VecAlign = Alignment;
633 Layout.SplitSize = DL.getTypeStoreSize(VS->SplitTy);
634 return Layout;
635}
636
637// Scalarize one-operand instruction I, using Split(Builder, X, Name)
638// to create an instruction like I with operand X and name Name.
639template<typename Splitter>
640bool ScalarizerVisitor::splitUnary(Instruction &I, const Splitter &Split) {
641 std::optional<VectorSplit> VS = getVectorSplit(I.getType());
642 if (!VS)
643 return false;
644
645 std::optional<VectorSplit> OpVS;
646 if (I.getOperand(0)->getType() == I.getType()) {
647 OpVS = VS;
648 } else {
649 OpVS = getVectorSplit(I.getOperand(0)->getType());
650 if (!OpVS || VS->NumPacked != OpVS->NumPacked)
651 return false;
652 }
653
655 Scatterer Op = scatter(&I, I.getOperand(0), *OpVS);
656 assert(Op.size() == VS->NumFragments && "Mismatched unary operation");
657 ValueVector Res;
658 Res.resize(VS->NumFragments);
659 for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag)
660 Res[Frag] = Split(Builder, Op[Frag], I.getName() + ".i" + Twine(Frag));
661 gather(&I, Res, *VS);
662 return true;
663}
664
665// Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
666// to create an instruction like I with operands X and Y and name Name.
667template<typename Splitter>
668bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
669 std::optional<VectorSplit> VS = getVectorSplit(I.getType());
670 if (!VS)
671 return false;
672
673 std::optional<VectorSplit> OpVS;
674 if (I.getOperand(0)->getType() == I.getType()) {
675 OpVS = VS;
676 } else {
677 OpVS = getVectorSplit(I.getOperand(0)->getType());
678 if (!OpVS || VS->NumPacked != OpVS->NumPacked)
679 return false;
680 }
681
683 Scatterer VOp0 = scatter(&I, I.getOperand(0), *OpVS);
684 Scatterer VOp1 = scatter(&I, I.getOperand(1), *OpVS);
685 assert(VOp0.size() == VS->NumFragments && "Mismatched binary operation");
686 assert(VOp1.size() == VS->NumFragments && "Mismatched binary operation");
687 ValueVector Res;
688 Res.resize(VS->NumFragments);
689 for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag) {
690 Value *Op0 = VOp0[Frag];
691 Value *Op1 = VOp1[Frag];
692 Res[Frag] = Split(Builder, Op0, Op1, I.getName() + ".i" + Twine(Frag));
693 }
694 gather(&I, Res, *VS);
695 return true;
696}
697
700}
701
702/// If a call to a vector typed intrinsic function, split into a scalar call per
703/// element if possible for the intrinsic.
704bool ScalarizerVisitor::splitCall(CallInst &CI) {
705 std::optional<VectorSplit> VS = getVectorSplit(CI.getType());
706 if (!VS)
707 return false;
708
710 if (!F)
711 return false;
712
713 Intrinsic::ID ID = F->getIntrinsicID();
715 return false;
716
717 // unsigned NumElems = VT->getNumElements();
718 unsigned NumArgs = CI.arg_size();
719
720 ValueVector ScalarOperands(NumArgs);
721 SmallVector<Scatterer, 8> Scattered(NumArgs);
722 SmallVector<int> OverloadIdx(NumArgs, -1);
723
725 // Add return type if intrinsic is overloaded on it.
727 Tys.push_back(VS->SplitTy);
728
729 // Assumes that any vector type has the same number of elements as the return
730 // vector type, which is true for all current intrinsics.
731 for (unsigned I = 0; I != NumArgs; ++I) {
732 Value *OpI = CI.getOperand(I);
733 if ([[maybe_unused]] auto *OpVecTy =
734 dyn_cast<FixedVectorType>(OpI->getType())) {
735 assert(OpVecTy->getNumElements() == VS->VecTy->getNumElements());
736 std::optional<VectorSplit> OpVS = getVectorSplit(OpI->getType());
737 if (!OpVS || OpVS->NumPacked != VS->NumPacked) {
738 // The natural split of the operand doesn't match the result. This could
739 // happen if the vector elements are different and the ScalarizeMinBits
740 // option is used.
741 //
742 // We could in principle handle this case as well, at the cost of
743 // complicating the scattering machinery to support multiple scattering
744 // granularities for a single value.
745 return false;
746 }
747
748 Scattered[I] = scatter(&CI, OpI, *OpVS);
750 OverloadIdx[I] = Tys.size();
751 Tys.push_back(OpVS->SplitTy);
752 }
753 } else {
754 ScalarOperands[I] = OpI;
756 Tys.push_back(OpI->getType());
757 }
758 }
759
760 ValueVector Res(VS->NumFragments);
761 ValueVector ScalarCallOps(NumArgs);
762
763 Function *NewIntrin = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
764 IRBuilder<> Builder(&CI);
765
766 // Perform actual scalarization, taking care to preserve any scalar operands.
767 for (unsigned I = 0; I < VS->NumFragments; ++I) {
768 bool IsRemainder = I == VS->NumFragments - 1 && VS->RemainderTy;
769 ScalarCallOps.clear();
770
771 if (IsRemainder)
772 Tys[0] = VS->RemainderTy;
773
774 for (unsigned J = 0; J != NumArgs; ++J) {
776 ScalarCallOps.push_back(ScalarOperands[J]);
777 } else {
778 ScalarCallOps.push_back(Scattered[J][I]);
779 if (IsRemainder && OverloadIdx[J] >= 0)
780 Tys[OverloadIdx[J]] = Scattered[J][I]->getType();
781 }
782 }
783
784 if (IsRemainder)
785 NewIntrin = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
786
787 Res[I] = Builder.CreateCall(NewIntrin, ScalarCallOps,
788 CI.getName() + ".i" + Twine(I));
789 }
790
791 gather(&CI, Res, *VS);
792 return true;
793}
794
795bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) {
796 std::optional<VectorSplit> VS = getVectorSplit(SI.getType());
797 if (!VS)
798 return false;
799
800 std::optional<VectorSplit> CondVS;
801 if (isa<FixedVectorType>(SI.getCondition()->getType())) {
802 CondVS = getVectorSplit(SI.getCondition()->getType());
803 if (!CondVS || CondVS->NumPacked != VS->NumPacked) {
804 // This happens when ScalarizeMinBits is used.
805 return false;
806 }
807 }
808
809 IRBuilder<> Builder(&SI);
810 Scatterer VOp1 = scatter(&SI, SI.getOperand(1), *VS);
811 Scatterer VOp2 = scatter(&SI, SI.getOperand(2), *VS);
812 assert(VOp1.size() == VS->NumFragments && "Mismatched select");
813 assert(VOp2.size() == VS->NumFragments && "Mismatched select");
814 ValueVector Res;
815 Res.resize(VS->NumFragments);
816
817 if (CondVS) {
818 Scatterer VOp0 = scatter(&SI, SI.getOperand(0), *CondVS);
819 assert(VOp0.size() == CondVS->NumFragments && "Mismatched select");
820 for (unsigned I = 0; I < VS->NumFragments; ++I) {
821 Value *Op0 = VOp0[I];
822 Value *Op1 = VOp1[I];
823 Value *Op2 = VOp2[I];
824 Res[I] = Builder.CreateSelect(Op0, Op1, Op2,
825 SI.getName() + ".i" + Twine(I));
826 }
827 } else {
828 Value *Op0 = SI.getOperand(0);
829 for (unsigned I = 0; I < VS->NumFragments; ++I) {
830 Value *Op1 = VOp1[I];
831 Value *Op2 = VOp2[I];
832 Res[I] = Builder.CreateSelect(Op0, Op1, Op2,
833 SI.getName() + ".i" + Twine(I));
834 }
835 }
836 gather(&SI, Res, *VS);
837 return true;
838}
839
840bool ScalarizerVisitor::visitICmpInst(ICmpInst &ICI) {
841 return splitBinary(ICI, ICmpSplitter(ICI));
842}
843
844bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) {
845 return splitBinary(FCI, FCmpSplitter(FCI));
846}
847
848bool ScalarizerVisitor::visitUnaryOperator(UnaryOperator &UO) {
849 return splitUnary(UO, UnarySplitter(UO));
850}
851
852bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) {
853 return splitBinary(BO, BinarySplitter(BO));
854}
855
856bool ScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
857 std::optional<VectorSplit> VS = getVectorSplit(GEPI.getType());
858 if (!VS)
859 return false;
860
861 IRBuilder<> Builder(&GEPI);
862 unsigned NumIndices = GEPI.getNumIndices();
863
864 // The base pointer and indices might be scalar even if it's a vector GEP.
865 SmallVector<Value *, 8> ScalarOps{1 + NumIndices};
866 SmallVector<Scatterer, 8> ScatterOps{1 + NumIndices};
867
868 for (unsigned I = 0; I < 1 + NumIndices; ++I) {
869 if (auto *VecTy =
870 dyn_cast<FixedVectorType>(GEPI.getOperand(I)->getType())) {
871 std::optional<VectorSplit> OpVS = getVectorSplit(VecTy);
872 if (!OpVS || OpVS->NumPacked != VS->NumPacked) {
873 // This can happen when ScalarizeMinBits is used.
874 return false;
875 }
876 ScatterOps[I] = scatter(&GEPI, GEPI.getOperand(I), *OpVS);
877 } else {
878 ScalarOps[I] = GEPI.getOperand(I);
879 }
880 }
881
882 ValueVector Res;
883 Res.resize(VS->NumFragments);
884 for (unsigned I = 0; I < VS->NumFragments; ++I) {
886 SplitOps.resize(1 + NumIndices);
887 for (unsigned J = 0; J < 1 + NumIndices; ++J) {
888 if (ScalarOps[J])
889 SplitOps[J] = ScalarOps[J];
890 else
891 SplitOps[J] = ScatterOps[J][I];
892 }
893 Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), SplitOps[0],
894 ArrayRef(SplitOps).drop_front(),
895 GEPI.getName() + ".i" + Twine(I));
896 if (GEPI.isInBounds())
897 if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I]))
898 NewGEPI->setIsInBounds();
899 }
900 gather(&GEPI, Res, *VS);
901 return true;
902}
903
904bool ScalarizerVisitor::visitCastInst(CastInst &CI) {
905 std::optional<VectorSplit> DestVS = getVectorSplit(CI.getDestTy());
906 if (!DestVS)
907 return false;
908
909 std::optional<VectorSplit> SrcVS = getVectorSplit(CI.getSrcTy());
910 if (!SrcVS || SrcVS->NumPacked != DestVS->NumPacked)
911 return false;
912
913 IRBuilder<> Builder(&CI);
914 Scatterer Op0 = scatter(&CI, CI.getOperand(0), *SrcVS);
915 assert(Op0.size() == SrcVS->NumFragments && "Mismatched cast");
916 ValueVector Res;
917 Res.resize(DestVS->NumFragments);
918 for (unsigned I = 0; I < DestVS->NumFragments; ++I)
919 Res[I] =
920 Builder.CreateCast(CI.getOpcode(), Op0[I], DestVS->getFragmentType(I),
921 CI.getName() + ".i" + Twine(I));
922 gather(&CI, Res, *DestVS);
923 return true;
924}
925
926bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) {
927 std::optional<VectorSplit> DstVS = getVectorSplit(BCI.getDestTy());
928 std::optional<VectorSplit> SrcVS = getVectorSplit(BCI.getSrcTy());
929 if (!DstVS || !SrcVS || DstVS->RemainderTy || SrcVS->RemainderTy)
930 return false;
931
932 const bool isPointerTy = DstVS->VecTy->getElementType()->isPointerTy();
933
934 // Vectors of pointers are always fully scalarized.
935 assert(!isPointerTy || (DstVS->NumPacked == 1 && SrcVS->NumPacked == 1));
936
937 IRBuilder<> Builder(&BCI);
938 Scatterer Op0 = scatter(&BCI, BCI.getOperand(0), *SrcVS);
939 ValueVector Res;
940 Res.resize(DstVS->NumFragments);
941
942 unsigned DstSplitBits = DstVS->SplitTy->getPrimitiveSizeInBits();
943 unsigned SrcSplitBits = SrcVS->SplitTy->getPrimitiveSizeInBits();
944
945 if (isPointerTy || DstSplitBits == SrcSplitBits) {
946 assert(DstVS->NumFragments == SrcVS->NumFragments);
947 for (unsigned I = 0; I < DstVS->NumFragments; ++I) {
948 Res[I] = Builder.CreateBitCast(Op0[I], DstVS->getFragmentType(I),
949 BCI.getName() + ".i" + Twine(I));
950 }
951 } else if (SrcSplitBits % DstSplitBits == 0) {
952 // Convert each source fragment to the same-sized destination vector and
953 // then scatter the result to the destination.
954 VectorSplit MidVS;
955 MidVS.NumPacked = DstVS->NumPacked;
956 MidVS.NumFragments = SrcSplitBits / DstSplitBits;
957 MidVS.VecTy = FixedVectorType::get(DstVS->VecTy->getElementType(),
958 MidVS.NumPacked * MidVS.NumFragments);
959 MidVS.SplitTy = DstVS->SplitTy;
960
961 unsigned ResI = 0;
962 for (unsigned I = 0; I < SrcVS->NumFragments; ++I) {
963 Value *V = Op0[I];
964
965 // Look through any existing bitcasts before converting to <N x t2>.
966 // In the best case, the resulting conversion might be a no-op.
968 while ((VI = dyn_cast<Instruction>(V)) &&
969 VI->getOpcode() == Instruction::BitCast)
970 V = VI->getOperand(0);
971
972 V = Builder.CreateBitCast(V, MidVS.VecTy, V->getName() + ".cast");
973
974 Scatterer Mid = scatter(&BCI, V, MidVS);
975 for (unsigned J = 0; J < MidVS.NumFragments; ++J)
976 Res[ResI++] = Mid[J];
977 }
978 } else if (DstSplitBits % SrcSplitBits == 0) {
979 // Gather enough source fragments to make up a destination fragment and
980 // then convert to the destination type.
981 VectorSplit MidVS;
982 MidVS.NumFragments = DstSplitBits / SrcSplitBits;
983 MidVS.NumPacked = SrcVS->NumPacked;
984 MidVS.VecTy = FixedVectorType::get(SrcVS->VecTy->getElementType(),
985 MidVS.NumPacked * MidVS.NumFragments);
986 MidVS.SplitTy = SrcVS->SplitTy;
987
988 unsigned SrcI = 0;
989 SmallVector<Value *, 8> ConcatOps;
990 ConcatOps.resize(MidVS.NumFragments);
991 for (unsigned I = 0; I < DstVS->NumFragments; ++I) {
992 for (unsigned J = 0; J < MidVS.NumFragments; ++J)
993 ConcatOps[J] = Op0[SrcI++];
994 Value *V = concatenate(Builder, ConcatOps, MidVS,
995 BCI.getName() + ".i" + Twine(I));
996 Res[I] = Builder.CreateBitCast(V, DstVS->getFragmentType(I),
997 BCI.getName() + ".i" + Twine(I));
998 }
999 } else {
1000 return false;
1001 }
1002
1003 gather(&BCI, Res, *DstVS);
1004 return true;
1005}
1006
1007bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
1008 std::optional<VectorSplit> VS = getVectorSplit(IEI.getType());
1009 if (!VS)
1010 return false;
1011
1012 IRBuilder<> Builder(&IEI);
1013 Scatterer Op0 = scatter(&IEI, IEI.getOperand(0), *VS);
1014 Value *NewElt = IEI.getOperand(1);
1015 Value *InsIdx = IEI.getOperand(2);
1016
1017 ValueVector Res;
1018 Res.resize(VS->NumFragments);
1019
1020 if (auto *CI = dyn_cast<ConstantInt>(InsIdx)) {
1021 unsigned Idx = CI->getZExtValue();
1022 unsigned Fragment = Idx / VS->NumPacked;
1023 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1024 if (I == Fragment) {
1025 bool IsPacked = VS->NumPacked > 1;
1026 if (Fragment == VS->NumFragments - 1 && VS->RemainderTy &&
1027 !VS->RemainderTy->isVectorTy())
1028 IsPacked = false;
1029 if (IsPacked) {
1030 Res[I] =
1031 Builder.CreateInsertElement(Op0[I], NewElt, Idx % VS->NumPacked);
1032 } else {
1033 Res[I] = NewElt;
1034 }
1035 } else {
1036 Res[I] = Op0[I];
1037 }
1038 }
1039 } else {
1040 // Never split a variable insertelement that isn't fully scalarized.
1041 if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1)
1042 return false;
1043
1044 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1045 Value *ShouldReplace =
1046 Builder.CreateICmpEQ(InsIdx, ConstantInt::get(InsIdx->getType(), I),
1047 InsIdx->getName() + ".is." + Twine(I));
1048 Value *OldElt = Op0[I];
1049 Res[I] = Builder.CreateSelect(ShouldReplace, NewElt, OldElt,
1050 IEI.getName() + ".i" + Twine(I));
1051 }
1052 }
1053
1054 gather(&IEI, Res, *VS);
1055 return true;
1056}
1057
1058bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
1059 std::optional<VectorSplit> VS = getVectorSplit(EEI.getOperand(0)->getType());
1060 if (!VS)
1061 return false;
1062
1063 IRBuilder<> Builder(&EEI);
1064 Scatterer Op0 = scatter(&EEI, EEI.getOperand(0), *VS);
1065 Value *ExtIdx = EEI.getOperand(1);
1066
1067 if (auto *CI = dyn_cast<ConstantInt>(ExtIdx)) {
1068 unsigned Idx = CI->getZExtValue();
1069 unsigned Fragment = Idx / VS->NumPacked;
1070 Value *Res = Op0[Fragment];
1071 bool IsPacked = VS->NumPacked > 1;
1072 if (Fragment == VS->NumFragments - 1 && VS->RemainderTy &&
1073 !VS->RemainderTy->isVectorTy())
1074 IsPacked = false;
1075 if (IsPacked)
1076 Res = Builder.CreateExtractElement(Res, Idx % VS->NumPacked);
1077 replaceUses(&EEI, Res);
1078 return true;
1079 }
1080
1081 // Never split a variable extractelement that isn't fully scalarized.
1082 if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1)
1083 return false;
1084
1085 Value *Res = PoisonValue::get(VS->VecTy->getElementType());
1086 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1087 Value *ShouldExtract =
1088 Builder.CreateICmpEQ(ExtIdx, ConstantInt::get(ExtIdx->getType(), I),
1089 ExtIdx->getName() + ".is." + Twine(I));
1090 Value *Elt = Op0[I];
1091 Res = Builder.CreateSelect(ShouldExtract, Elt, Res,
1092 EEI.getName() + ".upto" + Twine(I));
1093 }
1094 replaceUses(&EEI, Res);
1095 return true;
1096}
1097
1098bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
1099 std::optional<VectorSplit> VS = getVectorSplit(SVI.getType());
1100 std::optional<VectorSplit> VSOp =
1101 getVectorSplit(SVI.getOperand(0)->getType());
1102 if (!VS || !VSOp || VS->NumPacked > 1 || VSOp->NumPacked > 1)
1103 return false;
1104
1105 Scatterer Op0 = scatter(&SVI, SVI.getOperand(0), *VSOp);
1106 Scatterer Op1 = scatter(&SVI, SVI.getOperand(1), *VSOp);
1107 ValueVector Res;
1108 Res.resize(VS->NumFragments);
1109
1110 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1111 int Selector = SVI.getMaskValue(I);
1112 if (Selector < 0)
1113 Res[I] = PoisonValue::get(VS->VecTy->getElementType());
1114 else if (unsigned(Selector) < Op0.size())
1115 Res[I] = Op0[Selector];
1116 else
1117 Res[I] = Op1[Selector - Op0.size()];
1118 }
1119 gather(&SVI, Res, *VS);
1120 return true;
1121}
1122
1123bool ScalarizerVisitor::visitPHINode(PHINode &PHI) {
1124 std::optional<VectorSplit> VS = getVectorSplit(PHI.getType());
1125 if (!VS)
1126 return false;
1127
1129 ValueVector Res;
1130 Res.resize(VS->NumFragments);
1131
1132 unsigned NumOps = PHI.getNumOperands();
1133 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1134 Res[I] = Builder.CreatePHI(VS->getFragmentType(I), NumOps,
1135 PHI.getName() + ".i" + Twine(I));
1136 }
1137
1138 for (unsigned I = 0; I < NumOps; ++I) {
1139 Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I), *VS);
1140 BasicBlock *IncomingBlock = PHI.getIncomingBlock(I);
1141 for (unsigned J = 0; J < VS->NumFragments; ++J)
1142 cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock);
1143 }
1144 gather(&PHI, Res, *VS);
1145 return true;
1146}
1147
1148bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) {
1149 if (!ScalarizeLoadStore)
1150 return false;
1151 if (!LI.isSimple())
1152 return false;
1153
1154 std::optional<VectorLayout> Layout = getVectorLayout(
1155 LI.getType(), LI.getAlign(), LI.getModule()->getDataLayout());
1156 if (!Layout)
1157 return false;
1158
1159 IRBuilder<> Builder(&LI);
1160 Scatterer Ptr = scatter(&LI, LI.getPointerOperand(), Layout->VS);
1161 ValueVector Res;
1162 Res.resize(Layout->VS.NumFragments);
1163
1164 for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) {
1165 Res[I] = Builder.CreateAlignedLoad(Layout->VS.getFragmentType(I), Ptr[I],
1166 Align(Layout->getFragmentAlign(I)),
1167 LI.getName() + ".i" + Twine(I));
1168 }
1169 gather(&LI, Res, Layout->VS);
1170 return true;
1171}
1172
1173bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) {
1174 if (!ScalarizeLoadStore)
1175 return false;
1176 if (!SI.isSimple())
1177 return false;
1178
1179 Value *FullValue = SI.getValueOperand();
1180 std::optional<VectorLayout> Layout = getVectorLayout(
1181 FullValue->getType(), SI.getAlign(), SI.getModule()->getDataLayout());
1182 if (!Layout)
1183 return false;
1184
1185 IRBuilder<> Builder(&SI);
1186 Scatterer VPtr = scatter(&SI, SI.getPointerOperand(), Layout->VS);
1187 Scatterer VVal = scatter(&SI, FullValue, Layout->VS);
1188
1189 ValueVector Stores;
1190 Stores.resize(Layout->VS.NumFragments);
1191 for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) {
1192 Value *Val = VVal[I];
1193 Value *Ptr = VPtr[I];
1194 Stores[I] =
1195 Builder.CreateAlignedStore(Val, Ptr, Layout->getFragmentAlign(I));
1196 }
1197 transferMetadataAndIRFlags(&SI, Stores);
1198 return true;
1199}
1200
1201bool ScalarizerVisitor::visitCallInst(CallInst &CI) {
1202 return splitCall(CI);
1203}
1204
1205bool ScalarizerVisitor::visitFreezeInst(FreezeInst &FI) {
1206 return splitUnary(FI, [](IRBuilder<> &Builder, Value *Op, const Twine &Name) {
1207 return Builder.CreateFreeze(Op, Name);
1208 });
1209}
1210
1211// Delete the instructions that we scalarized. If a full vector result
1212// is still needed, recreate it using InsertElements.
1213bool ScalarizerVisitor::finish() {
1214 // The presence of data in Gathered or Scattered indicates changes
1215 // made to the Function.
1216 if (Gathered.empty() && Scattered.empty() && !Scalarized)
1217 return false;
1218 for (const auto &GMI : Gathered) {
1219 Instruction *Op = GMI.first;
1220 ValueVector &CV = *GMI.second;
1221 if (!Op->use_empty()) {
1222 // The value is still needed, so recreate it using a series of
1223 // insertelements and/or shufflevectors.
1224 Value *Res;
1225 if (auto *Ty = dyn_cast<FixedVectorType>(Op->getType())) {
1226 BasicBlock *BB = Op->getParent();
1228 if (isa<PHINode>(Op))
1229 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
1230
1231 VectorSplit VS = *getVectorSplit(Ty);
1232 assert(VS.NumFragments == CV.size());
1233
1234 Res = concatenate(Builder, CV, VS, Op->getName());
1235
1236 Res->takeName(Op);
1237 } else {
1238 assert(CV.size() == 1 && Op->getType() == CV[0]->getType());
1239 Res = CV[0];
1240 if (Op == Res)
1241 continue;
1242 }
1243 Op->replaceAllUsesWith(Res);
1244 }
1245 PotentiallyDeadInstrs.emplace_back(Op);
1246 }
1247 Gathered.clear();
1248 Scattered.clear();
1249 Scalarized = false;
1250
1252
1253 return true;
1254}
1255
1257 Module &M = *F.getParent();
1258 unsigned ParallelLoopAccessMDKind =
1259 M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
1261 ScalarizerVisitor Impl(ParallelLoopAccessMDKind, DT, Options);
1262 bool Changed = Impl.visit(F);
1265 return Changed ? PA : PreservedAnalyses::all();
1266}
aarch64 promote const
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Rewrite undef for PHI
assume Assume Builder
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isTriviallyScalariable(Intrinsic::ID ID)
Definition: Scalarizer.cpp:698
Scalarize vector operations
Definition: Scalarizer.cpp:372
static cl::opt< bool > ClScalarizeVariableInsertExtract("scalarize-variable-insert-extract", cl::init(true), cl::Hidden, cl::desc("Allow the scalarizer pass to scalarize " "insertelement/extractelement with variable index"))
scalarizer
Definition: Scalarizer.cpp:371
static cl::opt< bool > ClScalarizeLoadStore("scalarize-load-store", cl::init(false), cl::Hidden, cl::desc("Allow the scalarizer pass to scalarize loads and store"))
static cl::opt< unsigned > ClScalarizeMinBits("scalarize-min-bits", cl::init(0), cl::Hidden, cl::desc("Instruct the scalarizer pass to attempt to keep values of a " "minimum number of bits"))
This pass converts vector operations into scalar operations (or, optionally, operations on smaller ve...
This file defines the SmallVector class.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator end()
Definition: BasicBlock.h:337
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:335
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:257
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
This class represents a no-op cast from one type to another.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1412
unsigned arg_size() const
Definition: InstrTypes.h:1355
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:428
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:673
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:668
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:675
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:314
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
This instruction extracts a single (scalar) element from a VectorType value.
This instruction compares its operands according to the predicate given to the constructor.
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:536
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:693
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:940
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
Type * getSourceElementType() const
unsigned getNumIndices() const
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2625
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
Base class for instruction visitors.
Definition: InstVisitor.h:78
RetTy visitFreezeInst(FreezeInst &I)
Definition: InstVisitor.h:200
RetTy visitFCmpInst(FCmpInst &I)
Definition: InstVisitor.h:167
RetTy visitExtractElementInst(ExtractElementInst &I)
Definition: InstVisitor.h:191
RetTy visitShuffleVectorInst(ShuffleVectorInst &I)
Definition: InstVisitor.h:193
RetTy visitBitCastInst(BitCastInst &I)
Definition: InstVisitor.h:187
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87
RetTy visitPHINode(PHINode &I)
Definition: InstVisitor.h:175
RetTy visitUnaryOperator(UnaryOperator &I)
Definition: InstVisitor.h:260
RetTy visitStoreInst(StoreInst &I)
Definition: InstVisitor.h:170
RetTy visitInsertElementInst(InsertElementInst &I)
Definition: InstVisitor.h:192
RetTy visitBinaryOperator(BinaryOperator &I)
Definition: InstVisitor.h:261
RetTy visitICmpInst(ICmpInst &I)
Definition: InstVisitor.h:166
RetTy visitCallInst(CallInst &I)
Definition: InstVisitor.h:220
RetTy visitCastInst(CastInst &I)
Definition: InstVisitor.h:259
RetTy visitSelectInst(SelectInst &I)
Definition: InstVisitor.h:189
RetTy visitGetElementPtrInst(GetElementPtrInst &I)
Definition: InstVisitor.h:174
void visitInstruction(Instruction &I)
Definition: InstVisitor.h:280
RetTy visitLoadInst(LoadInst &I)
Definition: InstVisitor.h:169
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:71
const BasicBlock * getParent() const
Definition: Instruction.h:90
An instruction for reading from memory.
Definition: Instructions.h:177
Value * getPointerOperand()
Definition: Instructions.h:264
bool isSimple() const
Definition: Instructions.h:256
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:220
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:254
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1743
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This class represents the LLVM 'select' instruction.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
size_t size() const
Definition: SmallVector.h:91
void resize(size_type N)
Definition: SmallVector.h:642
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:535
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:384
int getNumOccurrences() const
Definition: CommandLine.h:401
self_iterator getIterator()
Definition: ilist_node.h:82
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:119
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1422
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1685
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:414
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
@ Done
Definition: Threading.h:61
BasicBlock::iterator skipDebugIntrinsics(BasicBlock::iterator It)
Advance It while it points to a debug instruction and return the result.
Definition: BasicBlock.cpp:547
FunctionPass * createScalarizerPass()
Create a legacy pass manager instance of the Scalarizer pass.
Definition: Scalarizer.cpp:458
DWARFExpression::Operation Op
bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
Definition: Local.cpp:544
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:44
void initializeScalarizerLegacyPassPass(PassRegistry &)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39