45 #define DEBUG_TYPE "vplan-slp" 53 CompletelySLP =
false;
59 return cast<VPInstruction>(V)->getUnderlyingInstr();
61 unsigned BundleSize = 0;
63 Type *
T = cast<VPInstruction>(V)->getUnderlyingInstr()->getType();
67 WidestBundleBits =
std::max(WidestBundleBits, BundleSize);
70 auto Res = BundleToCombined.try_emplace(to_vector<4>(Operands), New);
72 "Already created a combined instruction for the operand bundle");
79 return Op && isa<VPInstruction>(
Op) &&
80 cast<VPInstruction>(Op)->getUnderlyingInstr();
82 LLVM_DEBUG(
dbgs() <<
"VPSLP: not all operands are VPInstructions\n");
91 cast<VPInstruction>(Operands[0])->getUnderlyingInstr();
92 unsigned Opcode = OriginalInstr->
getOpcode();
95 const Instruction *
I = cast<VPInstruction>(
Op)->getUnderlyingInstr();
105 return cast<VPInstruction>(
Op)->
getParent() != &this->BB;
113 LLVM_DEBUG(
dbgs() <<
"VPSLP: Some operands have multiple users.\n");
122 unsigned LoadsSeen = 0;
124 for (
auto &
I : *Parent) {
125 auto *VPI = cast<VPInstruction>(&
I);
130 if (LoadsSeen == Operands.
size())
132 if (LoadsSeen > 0 && VPI->mayWriteToMemory()) {
134 dbgs() <<
"VPSLP: instruction modifying memory between loads\n");
140 return cast<LoadInst>(cast<VPInstruction>(
Op)->getUnderlyingInstr())
143 LLVM_DEBUG(
dbgs() <<
"VPSLP: only simple loads are supported.\n");
150 return cast<StoreInst>(cast<VPInstruction>(
Op)->getUnderlyingInstr())
153 LLVM_DEBUG(
dbgs() <<
"VPSLP: only simple stores are supported.\n");
161 unsigned OperandIndex) {
164 auto *U = cast<VPUser>(V);
165 Operands.
push_back(U->getOperand(OperandIndex));
172 cast<VPInstruction>(Values[0])->
getOpcode());
178 auto *VPI = cast<VPInstruction>(Values[0]);
180 switch (VPI->getOpcode()) {
187 for (
unsigned I = 0, NumOps = VPI->getNumOperands();
I < NumOps; ++
I)
197 unsigned Opcode = cast<VPInstruction>(Values[0])->
getOpcode();
199 return cast<VPInstruction>(V)->
getOpcode() != Opcode;
218 return GA && GB && GA == GB && GA->getIndex(A) + 1 == GB->getIndex(B);
225 if (!isa<VPInstruction>(V1) || !isa<VPInstruction>(V2))
230 cast<VPInstruction>(V2), IAI);
233 for (
unsigned I = 0, EV1 = cast<VPUser>(V1)->getNumOperands();
I < EV1; ++
I)
234 for (
unsigned J = 0, EV2 = cast<VPUser>(V2)->getNumOperands(); J < EV2; ++J)
235 Score +=
getLAScore(cast<VPUser>(V1)->getOperand(
I),
236 cast<VPUser>(V2)->getOperand(J), MaxLevel - 1, IAI);
240 std::pair<VPlanSlp::OpMode, VPValue *>
245 "Currently we only handle load and commutative opcodes");
250 << *cast<VPInstruction>(Last)->getUnderlyingInstr() <<
" ");
251 for (
auto *Candidate : Candidates) {
252 auto *LastI = cast<VPInstruction>(Last);
253 auto *CandidateI = cast<VPInstruction>(Candidate);
255 LLVM_DEBUG(
dbgs() << *cast<VPInstruction>(Candidate)->getUnderlyingInstr()
262 if (BestCandidates.
empty())
263 return {OpMode::Failed,
nullptr};
265 if (BestCandidates.
size() == 1)
266 return {
Mode, BestCandidates[0]};
269 unsigned BestScore = 0;
271 unsigned PrevScore = ~0u;
275 for (
auto *Candidate : BestCandidates) {
277 if (PrevScore == ~0u)
279 if (PrevScore != Score)
283 if (Score > BestScore) {
292 << *cast<VPInstruction>(Best)->getUnderlyingInstr()
294 Candidates.erase(Best);
307 for (
auto &Operands : MultiNodeOps) {
308 FinalOrder.
push_back({Operands.first, {Operands.second[0]}});
309 if (cast<VPInstruction>(Operands.second[0])->getOpcode() ==
316 for (
unsigned Lane = 1,
E = MultiNodeOps[0].
second.size(); Lane <
E; ++Lane) {
317 LLVM_DEBUG(
dbgs() <<
" Finding best value for lane " << Lane <<
"\n");
320 for (
auto Ops : MultiNodeOps) {
322 dbgs() << *cast<VPInstruction>(Ops.second[Lane])->getUnderlyingInstr()
324 Candidates.
insert(Ops.second[Lane]);
328 for (
unsigned Op = 0, E = MultiNodeOps.size();
Op <
E; ++
Op) {
330 if (Mode[
Op] == OpMode::Failed)
333 VPValue *Last = FinalOrder[
Op].second[Lane - 1];
334 std::pair<OpMode, VPValue *> Res =
335 getBest(Mode[
Op], Last, Candidates, IAI);
349 for (
auto Op : Values) {
350 if (
auto *VPInstr = cast_or_null<VPInstruction>(
Op))
351 if (
auto *Instr = VPInstr->getUnderlyingInstr()) {
352 dbgs() << *Instr <<
" | ";
355 dbgs() <<
" nullptr | ";
364 auto I = BundleToCombined.find(to_vector<4>(Values));
365 if (I != BundleToCombined.end()) {
370 for (
auto *V : Values) {
371 auto UI = V->user_begin();
372 auto *FirstUser = *UI++;
373 while (UI != V->user_end()) {
374 assert(*UI == FirstUser &&
"Currently we only support SLP trees.");
384 dbgs() <<
"buildGraph: ";
388 if (!areVectorizable(Values))
396 bool MultiNodeRoot = !MultiNodeActive;
397 MultiNodeActive =
true;
400 dbgs() <<
" Visiting Commutative";
401 dumpBundle(Operands);
404 auto OperandsOpcode =
getOpcode(Operands);
405 if (OperandsOpcode && OperandsOpcode ==
getOpcode(Values)) {
420 MultiNodeActive =
false;
422 auto FinalOrder = reorderMultiNodeOps();
424 MultiNodeOps.
clear();
425 for (
auto &Ops : FinalOrder) {
427 Ops.first->replaceAllUsesWith(NewOp);
428 for (
unsigned i = 0; i < CombinedOperands.
size(); i++)
429 if (CombinedOperands[i] == Ops.first)
430 CombinedOperands[i] = NewOp;
440 CombinedOperands.
push_back(cast<VPInstruction>(V)->getOperand(0));
447 switch (ValuesOpcode) {
455 Opcode = ValuesOpcode;
462 assert(CombinedOperands.
size() > 0 &&
"Need more some operands");
464 VPI->setUnderlyingInstr(cast<VPInstruction>(Values[0])->getUnderlyingInstr());
467 cast<VPInstruction>(Values[0])->
print(
dbgs());
dbgs() <<
"\n");
468 addCombined(Values, VPI);
reference emplace_back(ArgTypes &&... Args)
This class represents lattice values for constants.
void push_back(const T &Elt)
bool hasMoreThanOneUniqueUser()
Returns true if the value has more than one unique user.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
bool isVectorTy() const
True if this is an instance of VectorType.
void reserve(size_type N)
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
static bool areCommutative(ArrayRef< VPValue *> Values)
static Optional< unsigned > getOpcode(ArrayRef< VPValue *> Values)
Returns the opcode of Values or ~0 if they do not all agree.
mir Rename Register Operands
Type * getType() const
All values are typed, get the type of this value.
const T & getValue() const LLVM_LVALUE_FUNCTION
InterleaveGroup< VPInstruction > * getInterleaveGroup(VPInstruction *Instr) const
Get the interleave group that Instr belongs to.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
This file contains the declarations of the Vectorization Plan base classes:
The instances of the Type class are immutable: once they are created, they are never changed...
size_t size() const
size - Get the array size.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
VPInstruction * buildGraph(ArrayRef< VPValue *> Operands)
Tries to build an SLP tree rooted at Operands and returns a VPInstruction combining Operands...
Align max(MaybeAlign Lhs, Align Rhs)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
static unsigned getLAScore(VPValue *V1, VPValue *V2, unsigned MaxLevel, VPInterleavedAccessInfo &IAI)
Implements getLAScore from Listing 7 in the paper.
bool isCommutative() const
Return true if the instruction is commutative:
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static unsigned LookaheadMaxDepth
static bool areConsecutiveOrMatch(VPInstruction *A, VPInstruction *B, VPInterleavedAccessInfo &IAI)
Returns true if A and B access sequential memory if they are loads or stores or if they have identica...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
LLVM_NODISCARD bool empty() const
unsigned getOpcode() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static const Function * getParent(const Value *V)
static SmallVector< VPValue *, 4 > getOperands(ArrayRef< VPValue *> Values, unsigned OperandIndex)
This is a concrete Recipe that models a single VPlan-level instruction.
bool empty() const
empty - Check if the array is empty.