LLVM 19.0.0git
Scalarizer.cpp
Go to the documentation of this file.
1//===- Scalarizer.cpp - Scalarize vector operations -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass converts vector operations into scalar operations (or, optionally,
10// operations on smaller vector widths), in order to expose optimization
11// opportunities on the individual scalar operations.
12// It is mainly intended for targets that do not have vector units, but it
13// may also be useful for revectorizing code to different vector widths.
14//
15//===----------------------------------------------------------------------===//
16
20#include "llvm/ADT/Twine.h"
22#include "llvm/IR/Argument.h"
23#include "llvm/IR/BasicBlock.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DataLayout.h"
27#include "llvm/IR/Dominators.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/InstrTypes.h"
32#include "llvm/IR/Instruction.h"
34#include "llvm/IR/Intrinsics.h"
35#include "llvm/IR/LLVMContext.h"
36#include "llvm/IR/Module.h"
37#include "llvm/IR/Type.h"
38#include "llvm/IR/Value.h"
42#include <cassert>
43#include <cstdint>
44#include <iterator>
45#include <map>
46#include <utility>
47
48using namespace llvm;
49
50#define DEBUG_TYPE "scalarizer"
51
53 "scalarize-variable-insert-extract", cl::init(true), cl::Hidden,
54 cl::desc("Allow the scalarizer pass to scalarize "
55 "insertelement/extractelement with variable index"));
56
57// This is disabled by default because having separate loads and stores
58// makes it more likely that the -combiner-alias-analysis limits will be
59// reached.
61 "scalarize-load-store", cl::init(false), cl::Hidden,
62 cl::desc("Allow the scalarizer pass to scalarize loads and store"));
63
64// Split vectors larger than this size into fragments, where each fragment is
65// either a vector no larger than this size or a scalar.
66//
67// Instructions with operands or results of different sizes that would be split
68// into a different number of fragments are currently left as-is.
70 "scalarize-min-bits", cl::init(0), cl::Hidden,
71 cl::desc("Instruct the scalarizer pass to attempt to keep values of a "
72 "minimum number of bits"));
73
74namespace {
75
76BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) {
77 BasicBlock *BB = Itr->getParent();
78 if (isa<PHINode>(Itr))
79 Itr = BB->getFirstInsertionPt();
80 if (Itr != BB->end())
81 Itr = skipDebugIntrinsics(Itr);
82 return Itr;
83}
84
85// Used to store the scattered form of a vector.
86using ValueVector = SmallVector<Value *, 8>;
87
88// Used to map a vector Value and associated type to its scattered form.
89// The associated type is only non-null for pointer values that are "scattered"
90// when used as pointer operands to load or store.
91//
92// We use std::map because we want iterators to persist across insertion and
93// because the values are relatively large.
94using ScatterMap = std::map<std::pair<Value *, Type *>, ValueVector>;
95
96// Lists Instructions that have been replaced with scalar implementations,
97// along with a pointer to their scattered forms.
99
100struct VectorSplit {
101 // The type of the vector.
102 FixedVectorType *VecTy = nullptr;
103
104 // The number of elements packed in a fragment (other than the remainder).
105 unsigned NumPacked = 0;
106
107 // The number of fragments (scalars or smaller vectors) into which the vector
108 // shall be split.
109 unsigned NumFragments = 0;
110
111 // The type of each complete fragment.
112 Type *SplitTy = nullptr;
113
114 // The type of the remainder (last) fragment; null if all fragments are
115 // complete.
116 Type *RemainderTy = nullptr;
117
118 Type *getFragmentType(unsigned I) const {
119 return RemainderTy && I == NumFragments - 1 ? RemainderTy : SplitTy;
120 }
121};
122
123// Provides a very limited vector-like interface for lazily accessing one
124// component of a scattered vector or vector pointer.
125class Scatterer {
126public:
127 Scatterer() = default;
128
129 // Scatter V into Size components. If new instructions are needed,
130 // insert them before BBI in BB. If Cache is nonnull, use it to cache
131 // the results.
132 Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
133 const VectorSplit &VS, ValueVector *cachePtr = nullptr);
134
135 // Return component I, creating a new Value for it if necessary.
136 Value *operator[](unsigned I);
137
138 // Return the number of components.
139 unsigned size() const { return VS.NumFragments; }
140
141private:
142 BasicBlock *BB;
144 Value *V;
145 VectorSplit VS;
146 bool IsPointer;
147 ValueVector *CachePtr;
148 ValueVector Tmp;
149};
150
151// FCmpSplitter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp
152// called Name that compares X and Y in the same way as FCI.
153struct FCmpSplitter {
154 FCmpSplitter(FCmpInst &fci) : FCI(fci) {}
155
156 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
157 const Twine &Name) const {
158 return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name);
159 }
160
161 FCmpInst &FCI;
162};
163
164// ICmpSplitter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp
165// called Name that compares X and Y in the same way as ICI.
166struct ICmpSplitter {
167 ICmpSplitter(ICmpInst &ici) : ICI(ici) {}
168
169 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
170 const Twine &Name) const {
171 return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name);
172 }
173
174 ICmpInst &ICI;
175};
176
177// UnarySplitter(UO)(Builder, X, Name) uses Builder to create
178// a unary operator like UO called Name with operand X.
179struct UnarySplitter {
180 UnarySplitter(UnaryOperator &uo) : UO(uo) {}
181
182 Value *operator()(IRBuilder<> &Builder, Value *Op, const Twine &Name) const {
183 return Builder.CreateUnOp(UO.getOpcode(), Op, Name);
184 }
185
186 UnaryOperator &UO;
187};
188
189// BinarySplitter(BO)(Builder, X, Y, Name) uses Builder to create
190// a binary operator like BO called Name with operands X and Y.
191struct BinarySplitter {
192 BinarySplitter(BinaryOperator &bo) : BO(bo) {}
193
194 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
195 const Twine &Name) const {
196 return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name);
197 }
198
199 BinaryOperator &BO;
200};
201
202// Information about a load or store that we're scalarizing.
203struct VectorLayout {
204 VectorLayout() = default;
205
206 // Return the alignment of fragment Frag.
207 Align getFragmentAlign(unsigned Frag) {
208 return commonAlignment(VecAlign, Frag * SplitSize);
209 }
210
211 // The split of the underlying vector type.
212 VectorSplit VS;
213
214 // The alignment of the vector.
215 Align VecAlign;
216
217 // The size of each (non-remainder) fragment in bytes.
218 uint64_t SplitSize = 0;
219};
220
221/// Concatenate the given fragments to a single vector value of the type
222/// described in @p VS.
223static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments,
224 const VectorSplit &VS, Twine Name) {
225 unsigned NumElements = VS.VecTy->getNumElements();
226 SmallVector<int> ExtendMask;
227 SmallVector<int> InsertMask;
228
229 if (VS.NumPacked > 1) {
230 // Prepare the shufflevector masks once and re-use them for all
231 // fragments.
232 ExtendMask.resize(NumElements, -1);
233 for (unsigned I = 0; I < VS.NumPacked; ++I)
234 ExtendMask[I] = I;
235
236 InsertMask.resize(NumElements);
237 for (unsigned I = 0; I < NumElements; ++I)
238 InsertMask[I] = I;
239 }
240
241 Value *Res = PoisonValue::get(VS.VecTy);
242 for (unsigned I = 0; I < VS.NumFragments; ++I) {
243 Value *Fragment = Fragments[I];
244
245 unsigned NumPacked = VS.NumPacked;
246 if (I == VS.NumFragments - 1 && VS.RemainderTy) {
247 if (auto *RemVecTy = dyn_cast<FixedVectorType>(VS.RemainderTy))
248 NumPacked = RemVecTy->getNumElements();
249 else
250 NumPacked = 1;
251 }
252
253 if (NumPacked == 1) {
254 Res = Builder.CreateInsertElement(Res, Fragment, I * VS.NumPacked,
255 Name + ".upto" + Twine(I));
256 } else {
257 Fragment = Builder.CreateShuffleVector(Fragment, Fragment, ExtendMask);
258 if (I == 0) {
259 Res = Fragment;
260 } else {
261 for (unsigned J = 0; J < NumPacked; ++J)
262 InsertMask[I * VS.NumPacked + J] = NumElements + J;
263 Res = Builder.CreateShuffleVector(Res, Fragment, InsertMask,
264 Name + ".upto" + Twine(I));
265 for (unsigned J = 0; J < NumPacked; ++J)
266 InsertMask[I * VS.NumPacked + J] = I * VS.NumPacked + J;
267 }
268 }
269 }
270
271 return Res;
272}
273
274template <typename T>
275T getWithDefaultOverride(const cl::opt<T> &ClOption,
276 const std::optional<T> &DefaultOverride) {
277 return ClOption.getNumOccurrences() ? ClOption
278 : DefaultOverride.value_or(ClOption);
279}
280
281class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
282public:
283 ScalarizerVisitor(DominatorTree *DT, ScalarizerPassOptions Options)
284 : DT(DT), ScalarizeVariableInsertExtract(getWithDefaultOverride(
286 Options.ScalarizeVariableInsertExtract)),
287 ScalarizeLoadStore(getWithDefaultOverride(ClScalarizeLoadStore,
288 Options.ScalarizeLoadStore)),
289 ScalarizeMinBits(getWithDefaultOverride(ClScalarizeMinBits,
290 Options.ScalarizeMinBits)) {}
291
292 bool visit(Function &F);
293
294 // InstVisitor methods. They return true if the instruction was scalarized,
295 // false if nothing changed.
296 bool visitInstruction(Instruction &I) { return false; }
297 bool visitSelectInst(SelectInst &SI);
298 bool visitICmpInst(ICmpInst &ICI);
299 bool visitFCmpInst(FCmpInst &FCI);
303 bool visitCastInst(CastInst &CI);
304 bool visitBitCastInst(BitCastInst &BCI);
308 bool visitPHINode(PHINode &PHI);
309 bool visitLoadInst(LoadInst &LI);
310 bool visitStoreInst(StoreInst &SI);
311 bool visitCallInst(CallInst &ICI);
312 bool visitFreezeInst(FreezeInst &FI);
313
314private:
315 Scatterer scatter(Instruction *Point, Value *V, const VectorSplit &VS);
316 void gather(Instruction *Op, const ValueVector &CV, const VectorSplit &VS);
317 void replaceUses(Instruction *Op, Value *CV);
318 bool canTransferMetadata(unsigned Kind);
319 void transferMetadataAndIRFlags(Instruction *Op, const ValueVector &CV);
320 std::optional<VectorSplit> getVectorSplit(Type *Ty);
321 std::optional<VectorLayout> getVectorLayout(Type *Ty, Align Alignment,
322 const DataLayout &DL);
323 bool finish();
324
325 template<typename T> bool splitUnary(Instruction &, const T &);
326 template<typename T> bool splitBinary(Instruction &, const T &);
327
328 bool splitCall(CallInst &CI);
329
330 ScatterMap Scattered;
331 GatherList Gathered;
332 bool Scalarized;
333
334 SmallVector<WeakTrackingVH, 32> PotentiallyDeadInstrs;
335
336 DominatorTree *DT;
337
338 const bool ScalarizeVariableInsertExtract;
339 const bool ScalarizeLoadStore;
340 const unsigned ScalarizeMinBits;
341};
342
343} // end anonymous namespace
344
345Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
346 const VectorSplit &VS, ValueVector *cachePtr)
347 : BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) {
348 IsPointer = V->getType()->isPointerTy();
349 if (!CachePtr) {
350 Tmp.resize(VS.NumFragments, nullptr);
351 } else {
352 assert((CachePtr->empty() || VS.NumFragments == CachePtr->size() ||
353 IsPointer) &&
354 "Inconsistent vector sizes");
355 if (VS.NumFragments > CachePtr->size())
356 CachePtr->resize(VS.NumFragments, nullptr);
357 }
358}
359
360// Return fragment Frag, creating a new Value for it if necessary.
361Value *Scatterer::operator[](unsigned Frag) {
362 ValueVector &CV = CachePtr ? *CachePtr : Tmp;
363 // Try to reuse a previous value.
364 if (CV[Frag])
365 return CV[Frag];
366 IRBuilder<> Builder(BB, BBI);
367 if (IsPointer) {
368 if (Frag == 0)
369 CV[Frag] = V;
370 else
371 CV[Frag] = Builder.CreateConstGEP1_32(VS.SplitTy, V, Frag,
372 V->getName() + ".i" + Twine(Frag));
373 return CV[Frag];
374 }
375
376 Type *FragmentTy = VS.getFragmentType(Frag);
377
378 if (auto *VecTy = dyn_cast<FixedVectorType>(FragmentTy)) {
380 for (unsigned J = 0; J < VecTy->getNumElements(); ++J)
381 Mask.push_back(Frag * VS.NumPacked + J);
382 CV[Frag] =
383 Builder.CreateShuffleVector(V, PoisonValue::get(V->getType()), Mask,
384 V->getName() + ".i" + Twine(Frag));
385 } else {
386 // Search through a chain of InsertElementInsts looking for element Frag.
387 // Record other elements in the cache. The new V is still suitable
388 // for all uncached indices.
389 while (true) {
390 InsertElementInst *Insert = dyn_cast<InsertElementInst>(V);
391 if (!Insert)
392 break;
393 ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2));
394 if (!Idx)
395 break;
396 unsigned J = Idx->getZExtValue();
397 V = Insert->getOperand(0);
398 if (Frag * VS.NumPacked == J) {
399 CV[Frag] = Insert->getOperand(1);
400 return CV[Frag];
401 }
402
403 if (VS.NumPacked == 1 && !CV[J]) {
404 // Only cache the first entry we find for each index we're not actively
405 // searching for. This prevents us from going too far up the chain and
406 // caching incorrect entries.
407 CV[J] = Insert->getOperand(1);
408 }
409 }
410 CV[Frag] = Builder.CreateExtractElement(V, Frag * VS.NumPacked,
411 V->getName() + ".i" + Twine(Frag));
412 }
413
414 return CV[Frag];
415}
416
417bool ScalarizerVisitor::visit(Function &F) {
418 assert(Gathered.empty() && Scattered.empty());
419
420 Scalarized = false;
421
422 // To ensure we replace gathered components correctly we need to do an ordered
423 // traversal of the basic blocks in the function.
424 ReversePostOrderTraversal<BasicBlock *> RPOT(&F.getEntryBlock());
425 for (BasicBlock *BB : RPOT) {
426 for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
427 Instruction *I = &*II;
428 bool Done = InstVisitor::visit(I);
429 ++II;
430 if (Done && I->getType()->isVoidTy())
431 I->eraseFromParent();
432 }
433 }
434 return finish();
435}
436
437// Return a scattered form of V that can be accessed by Point. V must be a
438// vector or a pointer to a vector.
439Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V,
440 const VectorSplit &VS) {
441 if (Argument *VArg = dyn_cast<Argument>(V)) {
442 // Put the scattered form of arguments in the entry block,
443 // so that it can be used everywhere.
444 Function *F = VArg->getParent();
445 BasicBlock *BB = &F->getEntryBlock();
446 return Scatterer(BB, BB->begin(), V, VS, &Scattered[{V, VS.SplitTy}]);
447 }
448 if (Instruction *VOp = dyn_cast<Instruction>(V)) {
449 // When scalarizing PHI nodes we might try to examine/rewrite InsertElement
450 // nodes in predecessors. If those predecessors are unreachable from entry,
451 // then the IR in those blocks could have unexpected properties resulting in
452 // infinite loops in Scatterer::operator[]. By simply treating values
453 // originating from instructions in unreachable blocks as undef we do not
454 // need to analyse them further.
455 if (!DT->isReachableFromEntry(VOp->getParent()))
456 return Scatterer(Point->getParent(), Point->getIterator(),
457 PoisonValue::get(V->getType()), VS);
458 // Put the scattered form of an instruction directly after the
459 // instruction, skipping over PHI nodes and debug intrinsics.
460 BasicBlock *BB = VOp->getParent();
461 return Scatterer(
462 BB, skipPastPhiNodesAndDbg(std::next(BasicBlock::iterator(VOp))), V, VS,
463 &Scattered[{V, VS.SplitTy}]);
464 }
465 // In the fallback case, just put the scattered before Point and
466 // keep the result local to Point.
467 return Scatterer(Point->getParent(), Point->getIterator(), V, VS);
468}
469
470// Replace Op with the gathered form of the components in CV. Defer the
471// deletion of Op and creation of the gathered form to the end of the pass,
472// so that we can avoid creating the gathered form if all uses of Op are
473// replaced with uses of CV.
474void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV,
475 const VectorSplit &VS) {
476 transferMetadataAndIRFlags(Op, CV);
477
478 // If we already have a scattered form of Op (created from ExtractElements
479 // of Op itself), replace them with the new form.
480 ValueVector &SV = Scattered[{Op, VS.SplitTy}];
481 if (!SV.empty()) {
482 for (unsigned I = 0, E = SV.size(); I != E; ++I) {
483 Value *V = SV[I];
484 if (V == nullptr || SV[I] == CV[I])
485 continue;
486
487 Instruction *Old = cast<Instruction>(V);
488 if (isa<Instruction>(CV[I]))
489 CV[I]->takeName(Old);
490 Old->replaceAllUsesWith(CV[I]);
491 PotentiallyDeadInstrs.emplace_back(Old);
492 }
493 }
494 SV = CV;
495 Gathered.push_back(GatherList::value_type(Op, &SV));
496}
497
498// Replace Op with CV and collect Op has a potentially dead instruction.
499void ScalarizerVisitor::replaceUses(Instruction *Op, Value *CV) {
500 if (CV != Op) {
501 Op->replaceAllUsesWith(CV);
502 PotentiallyDeadInstrs.emplace_back(Op);
503 Scalarized = true;
504 }
505}
506
507// Return true if it is safe to transfer the given metadata tag from
508// vector to scalar instructions.
509bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) {
510 return (Tag == LLVMContext::MD_tbaa
511 || Tag == LLVMContext::MD_fpmath
512 || Tag == LLVMContext::MD_tbaa_struct
513 || Tag == LLVMContext::MD_invariant_load
514 || Tag == LLVMContext::MD_alias_scope
515 || Tag == LLVMContext::MD_noalias
516 || Tag == LLVMContext::MD_mem_parallel_loop_access
517 || Tag == LLVMContext::MD_access_group);
518}
519
520// Transfer metadata from Op to the instructions in CV if it is known
521// to be safe to do so.
522void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *Op,
523 const ValueVector &CV) {
525 Op->getAllMetadataOtherThanDebugLoc(MDs);
526 for (unsigned I = 0, E = CV.size(); I != E; ++I) {
527 if (Instruction *New = dyn_cast<Instruction>(CV[I])) {
528 for (const auto &MD : MDs)
529 if (canTransferMetadata(MD.first))
530 New->setMetadata(MD.first, MD.second);
531 New->copyIRFlags(Op);
532 if (Op->getDebugLoc() && !New->getDebugLoc())
533 New->setDebugLoc(Op->getDebugLoc());
534 }
535 }
536}
537
538// Determine how Ty is split, if at all.
539std::optional<VectorSplit> ScalarizerVisitor::getVectorSplit(Type *Ty) {
540 VectorSplit Split;
541 Split.VecTy = dyn_cast<FixedVectorType>(Ty);
542 if (!Split.VecTy)
543 return {};
544
545 unsigned NumElems = Split.VecTy->getNumElements();
546 Type *ElemTy = Split.VecTy->getElementType();
547
548 if (NumElems == 1 || ElemTy->isPointerTy() ||
549 2 * ElemTy->getScalarSizeInBits() > ScalarizeMinBits) {
550 Split.NumPacked = 1;
551 Split.NumFragments = NumElems;
552 Split.SplitTy = ElemTy;
553 } else {
554 Split.NumPacked = ScalarizeMinBits / ElemTy->getScalarSizeInBits();
555 if (Split.NumPacked >= NumElems)
556 return {};
557
558 Split.NumFragments = divideCeil(NumElems, Split.NumPacked);
559 Split.SplitTy = FixedVectorType::get(ElemTy, Split.NumPacked);
560
561 unsigned RemainderElems = NumElems % Split.NumPacked;
562 if (RemainderElems > 1)
563 Split.RemainderTy = FixedVectorType::get(ElemTy, RemainderElems);
564 else if (RemainderElems == 1)
565 Split.RemainderTy = ElemTy;
566 }
567
568 return Split;
569}
570
571// Try to fill in Layout from Ty, returning true on success. Alignment is
572// the alignment of the vector, or std::nullopt if the ABI default should be
573// used.
574std::optional<VectorLayout>
575ScalarizerVisitor::getVectorLayout(Type *Ty, Align Alignment,
576 const DataLayout &DL) {
577 std::optional<VectorSplit> VS = getVectorSplit(Ty);
578 if (!VS)
579 return {};
580
581 VectorLayout Layout;
582 Layout.VS = *VS;
583 // Check that we're dealing with full-byte fragments.
584 if (!DL.typeSizeEqualsStoreSize(VS->SplitTy) ||
585 (VS->RemainderTy && !DL.typeSizeEqualsStoreSize(VS->RemainderTy)))
586 return {};
587 Layout.VecAlign = Alignment;
588 Layout.SplitSize = DL.getTypeStoreSize(VS->SplitTy);
589 return Layout;
590}
591
592// Scalarize one-operand instruction I, using Split(Builder, X, Name)
593// to create an instruction like I with operand X and name Name.
594template<typename Splitter>
595bool ScalarizerVisitor::splitUnary(Instruction &I, const Splitter &Split) {
596 std::optional<VectorSplit> VS = getVectorSplit(I.getType());
597 if (!VS)
598 return false;
599
600 std::optional<VectorSplit> OpVS;
601 if (I.getOperand(0)->getType() == I.getType()) {
602 OpVS = VS;
603 } else {
604 OpVS = getVectorSplit(I.getOperand(0)->getType());
605 if (!OpVS || VS->NumPacked != OpVS->NumPacked)
606 return false;
607 }
608
609 IRBuilder<> Builder(&I);
610 Scatterer Op = scatter(&I, I.getOperand(0), *OpVS);
611 assert(Op.size() == VS->NumFragments && "Mismatched unary operation");
612 ValueVector Res;
613 Res.resize(VS->NumFragments);
614 for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag)
615 Res[Frag] = Split(Builder, Op[Frag], I.getName() + ".i" + Twine(Frag));
616 gather(&I, Res, *VS);
617 return true;
618}
619
620// Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
621// to create an instruction like I with operands X and Y and name Name.
622template<typename Splitter>
623bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
624 std::optional<VectorSplit> VS = getVectorSplit(I.getType());
625 if (!VS)
626 return false;
627
628 std::optional<VectorSplit> OpVS;
629 if (I.getOperand(0)->getType() == I.getType()) {
630 OpVS = VS;
631 } else {
632 OpVS = getVectorSplit(I.getOperand(0)->getType());
633 if (!OpVS || VS->NumPacked != OpVS->NumPacked)
634 return false;
635 }
636
637 IRBuilder<> Builder(&I);
638 Scatterer VOp0 = scatter(&I, I.getOperand(0), *OpVS);
639 Scatterer VOp1 = scatter(&I, I.getOperand(1), *OpVS);
640 assert(VOp0.size() == VS->NumFragments && "Mismatched binary operation");
641 assert(VOp1.size() == VS->NumFragments && "Mismatched binary operation");
642 ValueVector Res;
643 Res.resize(VS->NumFragments);
644 for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag) {
645 Value *Op0 = VOp0[Frag];
646 Value *Op1 = VOp1[Frag];
647 Res[Frag] = Split(Builder, Op0, Op1, I.getName() + ".i" + Twine(Frag));
648 }
649 gather(&I, Res, *VS);
650 return true;
651}
652
655}
656
657/// If a call to a vector typed intrinsic function, split into a scalar call per
658/// element if possible for the intrinsic.
659bool ScalarizerVisitor::splitCall(CallInst &CI) {
660 std::optional<VectorSplit> VS = getVectorSplit(CI.getType());
661 if (!VS)
662 return false;
663
665 if (!F)
666 return false;
667
668 Intrinsic::ID ID = F->getIntrinsicID();
670 return false;
671
672 // unsigned NumElems = VT->getNumElements();
673 unsigned NumArgs = CI.arg_size();
674
675 ValueVector ScalarOperands(NumArgs);
676 SmallVector<Scatterer, 8> Scattered(NumArgs);
677 SmallVector<int> OverloadIdx(NumArgs, -1);
678
680 // Add return type if intrinsic is overloaded on it.
682 Tys.push_back(VS->SplitTy);
683
684 // Assumes that any vector type has the same number of elements as the return
685 // vector type, which is true for all current intrinsics.
686 for (unsigned I = 0; I != NumArgs; ++I) {
687 Value *OpI = CI.getOperand(I);
688 if ([[maybe_unused]] auto *OpVecTy =
689 dyn_cast<FixedVectorType>(OpI->getType())) {
690 assert(OpVecTy->getNumElements() == VS->VecTy->getNumElements());
691 std::optional<VectorSplit> OpVS = getVectorSplit(OpI->getType());
692 if (!OpVS || OpVS->NumPacked != VS->NumPacked) {
693 // The natural split of the operand doesn't match the result. This could
694 // happen if the vector elements are different and the ScalarizeMinBits
695 // option is used.
696 //
697 // We could in principle handle this case as well, at the cost of
698 // complicating the scattering machinery to support multiple scattering
699 // granularities for a single value.
700 return false;
701 }
702
703 Scattered[I] = scatter(&CI, OpI, *OpVS);
705 OverloadIdx[I] = Tys.size();
706 Tys.push_back(OpVS->SplitTy);
707 }
708 } else {
709 ScalarOperands[I] = OpI;
711 Tys.push_back(OpI->getType());
712 }
713 }
714
715 ValueVector Res(VS->NumFragments);
716 ValueVector ScalarCallOps(NumArgs);
717
718 Function *NewIntrin = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
719 IRBuilder<> Builder(&CI);
720
721 // Perform actual scalarization, taking care to preserve any scalar operands.
722 for (unsigned I = 0; I < VS->NumFragments; ++I) {
723 bool IsRemainder = I == VS->NumFragments - 1 && VS->RemainderTy;
724 ScalarCallOps.clear();
725
726 if (IsRemainder)
727 Tys[0] = VS->RemainderTy;
728
729 for (unsigned J = 0; J != NumArgs; ++J) {
731 ScalarCallOps.push_back(ScalarOperands[J]);
732 } else {
733 ScalarCallOps.push_back(Scattered[J][I]);
734 if (IsRemainder && OverloadIdx[J] >= 0)
735 Tys[OverloadIdx[J]] = Scattered[J][I]->getType();
736 }
737 }
738
739 if (IsRemainder)
740 NewIntrin = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
741
742 Res[I] = Builder.CreateCall(NewIntrin, ScalarCallOps,
743 CI.getName() + ".i" + Twine(I));
744 }
745
746 gather(&CI, Res, *VS);
747 return true;
748}
749
750bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) {
751 std::optional<VectorSplit> VS = getVectorSplit(SI.getType());
752 if (!VS)
753 return false;
754
755 std::optional<VectorSplit> CondVS;
756 if (isa<FixedVectorType>(SI.getCondition()->getType())) {
757 CondVS = getVectorSplit(SI.getCondition()->getType());
758 if (!CondVS || CondVS->NumPacked != VS->NumPacked) {
759 // This happens when ScalarizeMinBits is used.
760 return false;
761 }
762 }
763
764 IRBuilder<> Builder(&SI);
765 Scatterer VOp1 = scatter(&SI, SI.getOperand(1), *VS);
766 Scatterer VOp2 = scatter(&SI, SI.getOperand(2), *VS);
767 assert(VOp1.size() == VS->NumFragments && "Mismatched select");
768 assert(VOp2.size() == VS->NumFragments && "Mismatched select");
769 ValueVector Res;
770 Res.resize(VS->NumFragments);
771
772 if (CondVS) {
773 Scatterer VOp0 = scatter(&SI, SI.getOperand(0), *CondVS);
774 assert(VOp0.size() == CondVS->NumFragments && "Mismatched select");
775 for (unsigned I = 0; I < VS->NumFragments; ++I) {
776 Value *Op0 = VOp0[I];
777 Value *Op1 = VOp1[I];
778 Value *Op2 = VOp2[I];
779 Res[I] = Builder.CreateSelect(Op0, Op1, Op2,
780 SI.getName() + ".i" + Twine(I));
781 }
782 } else {
783 Value *Op0 = SI.getOperand(0);
784 for (unsigned I = 0; I < VS->NumFragments; ++I) {
785 Value *Op1 = VOp1[I];
786 Value *Op2 = VOp2[I];
787 Res[I] = Builder.CreateSelect(Op0, Op1, Op2,
788 SI.getName() + ".i" + Twine(I));
789 }
790 }
791 gather(&SI, Res, *VS);
792 return true;
793}
794
795bool ScalarizerVisitor::visitICmpInst(ICmpInst &ICI) {
796 return splitBinary(ICI, ICmpSplitter(ICI));
797}
798
799bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) {
800 return splitBinary(FCI, FCmpSplitter(FCI));
801}
802
803bool ScalarizerVisitor::visitUnaryOperator(UnaryOperator &UO) {
804 return splitUnary(UO, UnarySplitter(UO));
805}
806
807bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) {
808 return splitBinary(BO, BinarySplitter(BO));
809}
810
811bool ScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
812 std::optional<VectorSplit> VS = getVectorSplit(GEPI.getType());
813 if (!VS)
814 return false;
815
816 IRBuilder<> Builder(&GEPI);
817 unsigned NumIndices = GEPI.getNumIndices();
818
819 // The base pointer and indices might be scalar even if it's a vector GEP.
820 SmallVector<Value *, 8> ScalarOps{1 + NumIndices};
821 SmallVector<Scatterer, 8> ScatterOps{1 + NumIndices};
822
823 for (unsigned I = 0; I < 1 + NumIndices; ++I) {
824 if (auto *VecTy =
825 dyn_cast<FixedVectorType>(GEPI.getOperand(I)->getType())) {
826 std::optional<VectorSplit> OpVS = getVectorSplit(VecTy);
827 if (!OpVS || OpVS->NumPacked != VS->NumPacked) {
828 // This can happen when ScalarizeMinBits is used.
829 return false;
830 }
831 ScatterOps[I] = scatter(&GEPI, GEPI.getOperand(I), *OpVS);
832 } else {
833 ScalarOps[I] = GEPI.getOperand(I);
834 }
835 }
836
837 ValueVector Res;
838 Res.resize(VS->NumFragments);
839 for (unsigned I = 0; I < VS->NumFragments; ++I) {
841 SplitOps.resize(1 + NumIndices);
842 for (unsigned J = 0; J < 1 + NumIndices; ++J) {
843 if (ScalarOps[J])
844 SplitOps[J] = ScalarOps[J];
845 else
846 SplitOps[J] = ScatterOps[J][I];
847 }
848 Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), SplitOps[0],
849 ArrayRef(SplitOps).drop_front(),
850 GEPI.getName() + ".i" + Twine(I));
851 if (GEPI.isInBounds())
852 if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I]))
853 NewGEPI->setIsInBounds();
854 }
855 gather(&GEPI, Res, *VS);
856 return true;
857}
858
859bool ScalarizerVisitor::visitCastInst(CastInst &CI) {
860 std::optional<VectorSplit> DestVS = getVectorSplit(CI.getDestTy());
861 if (!DestVS)
862 return false;
863
864 std::optional<VectorSplit> SrcVS = getVectorSplit(CI.getSrcTy());
865 if (!SrcVS || SrcVS->NumPacked != DestVS->NumPacked)
866 return false;
867
868 IRBuilder<> Builder(&CI);
869 Scatterer Op0 = scatter(&CI, CI.getOperand(0), *SrcVS);
870 assert(Op0.size() == SrcVS->NumFragments && "Mismatched cast");
871 ValueVector Res;
872 Res.resize(DestVS->NumFragments);
873 for (unsigned I = 0; I < DestVS->NumFragments; ++I)
874 Res[I] =
875 Builder.CreateCast(CI.getOpcode(), Op0[I], DestVS->getFragmentType(I),
876 CI.getName() + ".i" + Twine(I));
877 gather(&CI, Res, *DestVS);
878 return true;
879}
880
881bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) {
882 std::optional<VectorSplit> DstVS = getVectorSplit(BCI.getDestTy());
883 std::optional<VectorSplit> SrcVS = getVectorSplit(BCI.getSrcTy());
884 if (!DstVS || !SrcVS || DstVS->RemainderTy || SrcVS->RemainderTy)
885 return false;
886
887 const bool isPointerTy = DstVS->VecTy->getElementType()->isPointerTy();
888
889 // Vectors of pointers are always fully scalarized.
890 assert(!isPointerTy || (DstVS->NumPacked == 1 && SrcVS->NumPacked == 1));
891
892 IRBuilder<> Builder(&BCI);
893 Scatterer Op0 = scatter(&BCI, BCI.getOperand(0), *SrcVS);
894 ValueVector Res;
895 Res.resize(DstVS->NumFragments);
896
897 unsigned DstSplitBits = DstVS->SplitTy->getPrimitiveSizeInBits();
898 unsigned SrcSplitBits = SrcVS->SplitTy->getPrimitiveSizeInBits();
899
900 if (isPointerTy || DstSplitBits == SrcSplitBits) {
901 assert(DstVS->NumFragments == SrcVS->NumFragments);
902 for (unsigned I = 0; I < DstVS->NumFragments; ++I) {
903 Res[I] = Builder.CreateBitCast(Op0[I], DstVS->getFragmentType(I),
904 BCI.getName() + ".i" + Twine(I));
905 }
906 } else if (SrcSplitBits % DstSplitBits == 0) {
907 // Convert each source fragment to the same-sized destination vector and
908 // then scatter the result to the destination.
909 VectorSplit MidVS;
910 MidVS.NumPacked = DstVS->NumPacked;
911 MidVS.NumFragments = SrcSplitBits / DstSplitBits;
912 MidVS.VecTy = FixedVectorType::get(DstVS->VecTy->getElementType(),
913 MidVS.NumPacked * MidVS.NumFragments);
914 MidVS.SplitTy = DstVS->SplitTy;
915
916 unsigned ResI = 0;
917 for (unsigned I = 0; I < SrcVS->NumFragments; ++I) {
918 Value *V = Op0[I];
919
920 // Look through any existing bitcasts before converting to <N x t2>.
921 // In the best case, the resulting conversion might be a no-op.
923 while ((VI = dyn_cast<Instruction>(V)) &&
924 VI->getOpcode() == Instruction::BitCast)
925 V = VI->getOperand(0);
926
927 V = Builder.CreateBitCast(V, MidVS.VecTy, V->getName() + ".cast");
928
929 Scatterer Mid = scatter(&BCI, V, MidVS);
930 for (unsigned J = 0; J < MidVS.NumFragments; ++J)
931 Res[ResI++] = Mid[J];
932 }
933 } else if (DstSplitBits % SrcSplitBits == 0) {
934 // Gather enough source fragments to make up a destination fragment and
935 // then convert to the destination type.
936 VectorSplit MidVS;
937 MidVS.NumFragments = DstSplitBits / SrcSplitBits;
938 MidVS.NumPacked = SrcVS->NumPacked;
939 MidVS.VecTy = FixedVectorType::get(SrcVS->VecTy->getElementType(),
940 MidVS.NumPacked * MidVS.NumFragments);
941 MidVS.SplitTy = SrcVS->SplitTy;
942
943 unsigned SrcI = 0;
944 SmallVector<Value *, 8> ConcatOps;
945 ConcatOps.resize(MidVS.NumFragments);
946 for (unsigned I = 0; I < DstVS->NumFragments; ++I) {
947 for (unsigned J = 0; J < MidVS.NumFragments; ++J)
948 ConcatOps[J] = Op0[SrcI++];
949 Value *V = concatenate(Builder, ConcatOps, MidVS,
950 BCI.getName() + ".i" + Twine(I));
951 Res[I] = Builder.CreateBitCast(V, DstVS->getFragmentType(I),
952 BCI.getName() + ".i" + Twine(I));
953 }
954 } else {
955 return false;
956 }
957
958 gather(&BCI, Res, *DstVS);
959 return true;
960}
961
962bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
963 std::optional<VectorSplit> VS = getVectorSplit(IEI.getType());
964 if (!VS)
965 return false;
966
967 IRBuilder<> Builder(&IEI);
968 Scatterer Op0 = scatter(&IEI, IEI.getOperand(0), *VS);
969 Value *NewElt = IEI.getOperand(1);
970 Value *InsIdx = IEI.getOperand(2);
971
972 ValueVector Res;
973 Res.resize(VS->NumFragments);
974
975 if (auto *CI = dyn_cast<ConstantInt>(InsIdx)) {
976 unsigned Idx = CI->getZExtValue();
977 unsigned Fragment = Idx / VS->NumPacked;
978 for (unsigned I = 0; I < VS->NumFragments; ++I) {
979 if (I == Fragment) {
980 bool IsPacked = VS->NumPacked > 1;
981 if (Fragment == VS->NumFragments - 1 && VS->RemainderTy &&
982 !VS->RemainderTy->isVectorTy())
983 IsPacked = false;
984 if (IsPacked) {
985 Res[I] =
986 Builder.CreateInsertElement(Op0[I], NewElt, Idx % VS->NumPacked);
987 } else {
988 Res[I] = NewElt;
989 }
990 } else {
991 Res[I] = Op0[I];
992 }
993 }
994 } else {
995 // Never split a variable insertelement that isn't fully scalarized.
996 if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1)
997 return false;
998
999 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1000 Value *ShouldReplace =
1001 Builder.CreateICmpEQ(InsIdx, ConstantInt::get(InsIdx->getType(), I),
1002 InsIdx->getName() + ".is." + Twine(I));
1003 Value *OldElt = Op0[I];
1004 Res[I] = Builder.CreateSelect(ShouldReplace, NewElt, OldElt,
1005 IEI.getName() + ".i" + Twine(I));
1006 }
1007 }
1008
1009 gather(&IEI, Res, *VS);
1010 return true;
1011}
1012
1013bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
1014 std::optional<VectorSplit> VS = getVectorSplit(EEI.getOperand(0)->getType());
1015 if (!VS)
1016 return false;
1017
1018 IRBuilder<> Builder(&EEI);
1019 Scatterer Op0 = scatter(&EEI, EEI.getOperand(0), *VS);
1020 Value *ExtIdx = EEI.getOperand(1);
1021
1022 if (auto *CI = dyn_cast<ConstantInt>(ExtIdx)) {
1023 unsigned Idx = CI->getZExtValue();
1024 unsigned Fragment = Idx / VS->NumPacked;
1025 Value *Res = Op0[Fragment];
1026 bool IsPacked = VS->NumPacked > 1;
1027 if (Fragment == VS->NumFragments - 1 && VS->RemainderTy &&
1028 !VS->RemainderTy->isVectorTy())
1029 IsPacked = false;
1030 if (IsPacked)
1031 Res = Builder.CreateExtractElement(Res, Idx % VS->NumPacked);
1032 replaceUses(&EEI, Res);
1033 return true;
1034 }
1035
1036 // Never split a variable extractelement that isn't fully scalarized.
1037 if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1)
1038 return false;
1039
1040 Value *Res = PoisonValue::get(VS->VecTy->getElementType());
1041 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1042 Value *ShouldExtract =
1043 Builder.CreateICmpEQ(ExtIdx, ConstantInt::get(ExtIdx->getType(), I),
1044 ExtIdx->getName() + ".is." + Twine(I));
1045 Value *Elt = Op0[I];
1046 Res = Builder.CreateSelect(ShouldExtract, Elt, Res,
1047 EEI.getName() + ".upto" + Twine(I));
1048 }
1049 replaceUses(&EEI, Res);
1050 return true;
1051}
1052
1053bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
1054 std::optional<VectorSplit> VS = getVectorSplit(SVI.getType());
1055 std::optional<VectorSplit> VSOp =
1056 getVectorSplit(SVI.getOperand(0)->getType());
1057 if (!VS || !VSOp || VS->NumPacked > 1 || VSOp->NumPacked > 1)
1058 return false;
1059
1060 Scatterer Op0 = scatter(&SVI, SVI.getOperand(0), *VSOp);
1061 Scatterer Op1 = scatter(&SVI, SVI.getOperand(1), *VSOp);
1062 ValueVector Res;
1063 Res.resize(VS->NumFragments);
1064
1065 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1066 int Selector = SVI.getMaskValue(I);
1067 if (Selector < 0)
1068 Res[I] = PoisonValue::get(VS->VecTy->getElementType());
1069 else if (unsigned(Selector) < Op0.size())
1070 Res[I] = Op0[Selector];
1071 else
1072 Res[I] = Op1[Selector - Op0.size()];
1073 }
1074 gather(&SVI, Res, *VS);
1075 return true;
1076}
1077
1078bool ScalarizerVisitor::visitPHINode(PHINode &PHI) {
1079 std::optional<VectorSplit> VS = getVectorSplit(PHI.getType());
1080 if (!VS)
1081 return false;
1082
1083 IRBuilder<> Builder(&PHI);
1084 ValueVector Res;
1085 Res.resize(VS->NumFragments);
1086
1087 unsigned NumOps = PHI.getNumOperands();
1088 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1089 Res[I] = Builder.CreatePHI(VS->getFragmentType(I), NumOps,
1090 PHI.getName() + ".i" + Twine(I));
1091 }
1092
1093 for (unsigned I = 0; I < NumOps; ++I) {
1094 Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I), *VS);
1095 BasicBlock *IncomingBlock = PHI.getIncomingBlock(I);
1096 for (unsigned J = 0; J < VS->NumFragments; ++J)
1097 cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock);
1098 }
1099 gather(&PHI, Res, *VS);
1100 return true;
1101}
1102
1103bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) {
1104 if (!ScalarizeLoadStore)
1105 return false;
1106 if (!LI.isSimple())
1107 return false;
1108
1109 std::optional<VectorLayout> Layout = getVectorLayout(
1110 LI.getType(), LI.getAlign(), LI.getModule()->getDataLayout());
1111 if (!Layout)
1112 return false;
1113
1114 IRBuilder<> Builder(&LI);
1115 Scatterer Ptr = scatter(&LI, LI.getPointerOperand(), Layout->VS);
1116 ValueVector Res;
1117 Res.resize(Layout->VS.NumFragments);
1118
1119 for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) {
1120 Res[I] = Builder.CreateAlignedLoad(Layout->VS.getFragmentType(I), Ptr[I],
1121 Align(Layout->getFragmentAlign(I)),
1122 LI.getName() + ".i" + Twine(I));
1123 }
1124 gather(&LI, Res, Layout->VS);
1125 return true;
1126}
1127
1128bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) {
1129 if (!ScalarizeLoadStore)
1130 return false;
1131 if (!SI.isSimple())
1132 return false;
1133
1134 Value *FullValue = SI.getValueOperand();
1135 std::optional<VectorLayout> Layout = getVectorLayout(
1136 FullValue->getType(), SI.getAlign(), SI.getModule()->getDataLayout());
1137 if (!Layout)
1138 return false;
1139
1140 IRBuilder<> Builder(&SI);
1141 Scatterer VPtr = scatter(&SI, SI.getPointerOperand(), Layout->VS);
1142 Scatterer VVal = scatter(&SI, FullValue, Layout->VS);
1143
1144 ValueVector Stores;
1145 Stores.resize(Layout->VS.NumFragments);
1146 for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) {
1147 Value *Val = VVal[I];
1148 Value *Ptr = VPtr[I];
1149 Stores[I] =
1150 Builder.CreateAlignedStore(Val, Ptr, Layout->getFragmentAlign(I));
1151 }
1152 transferMetadataAndIRFlags(&SI, Stores);
1153 return true;
1154}
1155
1156bool ScalarizerVisitor::visitCallInst(CallInst &CI) {
1157 return splitCall(CI);
1158}
1159
1160bool ScalarizerVisitor::visitFreezeInst(FreezeInst &FI) {
1161 return splitUnary(FI, [](IRBuilder<> &Builder, Value *Op, const Twine &Name) {
1162 return Builder.CreateFreeze(Op, Name);
1163 });
1164}
1165
1166// Delete the instructions that we scalarized. If a full vector result
1167// is still needed, recreate it using InsertElements.
1168bool ScalarizerVisitor::finish() {
1169 // The presence of data in Gathered or Scattered indicates changes
1170 // made to the Function.
1171 if (Gathered.empty() && Scattered.empty() && !Scalarized)
1172 return false;
1173 for (const auto &GMI : Gathered) {
1174 Instruction *Op = GMI.first;
1175 ValueVector &CV = *GMI.second;
1176 if (!Op->use_empty()) {
1177 // The value is still needed, so recreate it using a series of
1178 // insertelements and/or shufflevectors.
1179 Value *Res;
1180 if (auto *Ty = dyn_cast<FixedVectorType>(Op->getType())) {
1181 BasicBlock *BB = Op->getParent();
1182 IRBuilder<> Builder(Op);
1183 if (isa<PHINode>(Op))
1184 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
1185
1186 VectorSplit VS = *getVectorSplit(Ty);
1187 assert(VS.NumFragments == CV.size());
1188
1189 Res = concatenate(Builder, CV, VS, Op->getName());
1190
1191 Res->takeName(Op);
1192 } else {
1193 assert(CV.size() == 1 && Op->getType() == CV[0]->getType());
1194 Res = CV[0];
1195 if (Op == Res)
1196 continue;
1197 }
1198 Op->replaceAllUsesWith(Res);
1199 }
1200 PotentiallyDeadInstrs.emplace_back(Op);
1201 }
1202 Gathered.clear();
1203 Scattered.clear();
1204 Scalarized = false;
1205
1207
1208 return true;
1209}
1210
1213 ScalarizerVisitor Impl(DT, Options);
1214 bool Changed = Impl.visit(F);
1217 return Changed ? PA : PreservedAnalyses::all();
1218}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Rewrite undef for PHI
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isTriviallyScalariable(Intrinsic::ID ID)
Definition: Scalarizer.cpp:653
static cl::opt< bool > ClScalarizeVariableInsertExtract("scalarize-variable-insert-extract", cl::init(true), cl::Hidden, cl::desc("Allow the scalarizer pass to scalarize " "insertelement/extractelement with variable index"))
static cl::opt< bool > ClScalarizeLoadStore("scalarize-load-store", cl::init(false), cl::Hidden, cl::desc("Allow the scalarizer pass to scalarize loads and store"))
static cl::opt< unsigned > ClScalarizeMinBits("scalarize-min-bits", cl::init(0), cl::Hidden, cl::desc("Instruct the scalarizer pass to attempt to keep values of a " "minimum number of bits"))
This pass converts vector operations into scalar operations (or, optionally, operations on smaller ve...
This file defines the SmallVector class.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:348
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:500
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:442
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:429
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:398
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:205
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:164
This class represents a no-op cast from one type to another.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1709
unsigned arg_size() const
Definition: InstrTypes.h:1652
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:574
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:908
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:903
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:910
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
This instruction extracts a single (scalar) element from a VectorType value.
This instruction compares its operands according to the predicate given to the constructor.
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:539
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
This class represents a freeze function that returns random concrete value if an operand is either a ...
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:973
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
Type * getSourceElementType() const
unsigned getNumIndices() const
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2339
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2450
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1875
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2438
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1801
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1110
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2513
Value * CreateUnOp(Instruction::UnaryOps Opc, Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1747
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2375
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2219
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2105
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2472
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1660
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2139
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1820
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2390
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1860
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2329
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2644
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
Base class for instruction visitors.
Definition: InstVisitor.h:78
RetTy visitFreezeInst(FreezeInst &I)
Definition: InstVisitor.h:200
RetTy visitFCmpInst(FCmpInst &I)
Definition: InstVisitor.h:167
RetTy visitExtractElementInst(ExtractElementInst &I)
Definition: InstVisitor.h:191
RetTy visitShuffleVectorInst(ShuffleVectorInst &I)
Definition: InstVisitor.h:193
RetTy visitBitCastInst(BitCastInst &I)
Definition: InstVisitor.h:187
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87
RetTy visitPHINode(PHINode &I)
Definition: InstVisitor.h:175
RetTy visitUnaryOperator(UnaryOperator &I)
Definition: InstVisitor.h:260
RetTy visitStoreInst(StoreInst &I)
Definition: InstVisitor.h:170
RetTy visitInsertElementInst(InsertElementInst &I)
Definition: InstVisitor.h:192
RetTy visitBinaryOperator(BinaryOperator &I)
Definition: InstVisitor.h:261
RetTy visitICmpInst(ICmpInst &I)
Definition: InstVisitor.h:166
RetTy visitCallInst(CallInst &I)
Definition: InstVisitor.h:220
RetTy visitCastInst(CastInst &I)
Definition: InstVisitor.h:259
RetTy visitSelectInst(SelectInst &I)
Definition: InstVisitor.h:189
RetTy visitGetElementPtrInst(GetElementPtrInst &I)
Definition: InstVisitor.h:174
void visitInstruction(Instruction &I)
Definition: InstVisitor.h:280
RetTy visitLoadInst(LoadInst &I)
Definition: InstVisitor.h:169
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:80
const BasicBlock * getParent() const
Definition: Instruction.h:152
An instruction for reading from memory.
Definition: Instructions.h:184
Value * getPointerOperand()
Definition: Instructions.h:280
bool isSimple() const
Definition: Instructions.h:272
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:236
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:287
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:129
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This class represents the LLVM 'select' instruction.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
size_t size() const
Definition: SmallVector.h:91
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
int getNumOccurrences() const
Definition: CommandLine.h:406
self_iterator getIterator()
Definition: ilist_node.h:109
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1459
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1689
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:417
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
@ Done
Definition: Threading.h:61
BasicBlock::iterator skipDebugIntrinsics(BasicBlock::iterator It)
Advance It while it points to a debug instruction and return the result.
Definition: BasicBlock.cpp:682
bool isPointerTy(const Type *T)
Definition: SPIRVUtils.h:116
DWARFExpression::Operation Op
bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
Definition: Local.cpp:548
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:45
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39