Go to the documentation of this file.
13 #ifndef LLVM_ANALYSIS_VECTORUTILS_H
14 #define LLVM_ANALYSIS_VECTORUTILS_H
22 class TargetLibraryInfo;
132 static constexpr
char const *
_LLVM_ =
"_LLVM_";
219 static void getVFABIMappings(
const CallInst &CI,
230 if (ListOfStrings.empty())
232 for (
const auto &MangledName : ListOfStrings) {
241 "Vector function is missing.");
253 getVFABIMappings(CI,
Ret);
262 :
M(CI.getModule()), CI(CI),
272 for (
const auto &
Info : ScalarToVectorMappings)
273 if (
Info.Shape == Shape)
274 return M->getFunction(
Info.VectorName);
281 template <
typename T>
class ArrayRef;
283 class GetElementPtrInst;
292 namespace Intrinsic {
300 if (Scalar->isVoidTy() || Scalar->isMetadataTy() || EC.isScalar())
317 unsigned ScalarOpdIdx);
327 const TargetLibraryInfo *TLI);
381 SmallVectorImpl<int> &ScaledMask);
399 SmallVectorImpl<int> &ScaledMask);
414 ArrayRef<int>
Mask,
unsigned NumOfSrcRegs,
unsigned NumOfDestRegs,
415 unsigned NumOfUsedRegs, function_ref<
void()> NoInputAction,
416 function_ref<
void(ArrayRef<int>,
unsigned,
unsigned)> SingleInputAction,
417 function_ref<
void(ArrayRef<int>,
unsigned,
unsigned)> ManyInputsAction);
453 MapVector<Instruction*, uint64_t>
456 const TargetTransformInfo *
TTI=
nullptr);
471 const Instruction *Inst2);
495 const InterleaveGroup<Instruction> &Group);
609 template <
typename InstTy>
class InterleaveGroup {
612 : Factor(Factor), Reverse(Reverse), Alignment(Alignment),
613 InsertPos(nullptr) {}
616 : Alignment(Alignment), InsertPos(Instr) {
618 assert(Factor > 1 &&
"Invalid interleave factor");
620 Reverse = Stride < 0;
639 int32_t
Key = *MaybeKey;
650 if (
Key > LargestKey) {
652 if (
Index >=
static_cast<int32_t
>(Factor))
656 }
else if (
Key < SmallestKey) {
660 if (!MaybeLargestIndex)
664 if (*MaybeLargestIndex >=
static_cast<int64_t
>(Factor))
671 Alignment =
std::min(Alignment, NewAlign);
672 Members[
Key] = Instr;
687 for (
auto I : Members) {
688 if (
I.second == Instr)
689 return I.first - SmallestKey;
726 int32_t SmallestKey = 0;
727 int32_t LargestKey = 0;
756 : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
772 if (InterleaveGroups.empty()) {
774 !RequiresScalarEpilogue &&
775 "RequiresScalarEpilog should not be set without interleave groups");
779 InterleaveGroupMap.clear();
780 for (
auto *Ptr : InterleaveGroups)
782 InterleaveGroups.clear();
783 RequiresScalarEpilogue =
false;
789 return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end();
797 return InterleaveGroupMap.lookup(Instr);
802 return make_range(InterleaveGroups.begin(), InterleaveGroups.end());
829 bool RequiresScalarEpilogue =
false;
841 struct StrideDescriptor {
842 StrideDescriptor() =
default;
843 StrideDescriptor(int64_t Stride,
const SCEV *Scev,
uint64_t Size,
845 : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {}
851 const SCEV *Scev =
nullptr;
861 using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
867 InterleaveGroup<Instruction> *
868 createInterleaveGroup(Instruction *Instr,
int Stride,
Align Alignment) {
870 "Already in an interleaved access group");
871 InterleaveGroupMap[Instr] =
872 new InterleaveGroup<Instruction>(Instr, Stride, Alignment);
873 InterleaveGroups.
insert(InterleaveGroupMap[Instr]);
874 return InterleaveGroupMap[Instr];
878 void releaseGroup(InterleaveGroup<Instruction> *Group) {
879 for (
unsigned i = 0;
i < Group->getFactor();
i++)
880 if (Instruction *Member = Group->getMember(
i))
881 InterleaveGroupMap.
erase(Member);
883 InterleaveGroups.
erase(Group);
888 void collectConstStrideAccesses(
889 MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
893 static bool isStrided(
int Stride);
901 bool areDependencesValid()
const {
910 bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
911 StrideEntry *
B)
const {
927 auto *Src =
A->first;
928 auto SrcDes =
A->second;
931 auto *
Sink =
B->first;
932 auto SinkDes =
B->second;
936 if (!Src->mayWriteToMemory())
940 if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
945 if (!areDependencesValid())
950 return Dependences.
find(Src) == Dependences.
end() ||
958 void collectDependences() {
959 if (!areDependencesValid())
962 for (
auto Dep : *Deps)
963 Dependences[Dep.getSource(*LAI)].
insert(Dep.getDestination(*LAI));
This is an optimization pass for GlobalISel generic memory operations.
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Represents a single loop in the control flow graph.
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, DominatorTree *DT, LoopInfo *LI, const LoopAccessInfo *LAI)
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
void getVectorVariantNames(const CallInst &CI, SmallVectorImpl< std::string > &VariantMappings)
Populates a set of strings representing the Vector Function ABI variants associated to the CallInst C...
The main scalar evolution driver.
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
static VFShape getScalarShape(const CallInst &CI)
bool erase(const KeyT &Val)
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
The instances of the Type class are immutable: once they are created, they are never changed.
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, unsigned OpdIdx)
Identifies if the vector form of the intrinsic has a operand that has an overloaded type.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
SmallVector< VFParameter, 8 > Parameters
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
The group of interleaved loads/stores sharing the same stride and close to each other.
VFISAKind
Describes the type of Instruction Set Architecture.
constexpr bool hasValue() const
bool hasValidParameterList() const
Validation check on the Parameters in the VFShape.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
An information struct used to provide DenseMap with the various necessary components for a given valu...
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName, unsigned numArgs, ElementCount VF)
This routine mangles the given VectorName according to the LangRef specification for vector-function-...
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
VFParamKind
Describes the type of Parameters.
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
std::string VectorName
Scalar Function Name.
std::enable_if_t< std::is_signed< T >::value, llvm::Optional< T > > checkedSub(T LHS, T RHS)
Subtract two signed integers LHS and RHS.
The Vector Function Database.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
DenseMap< const Value *, Value * > ValueToValueMap
Holds the VFShape for a specific scalar to vector function mapping.
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Type * ToVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Drive the analysis of interleaved memory accesses in the loop.
Analysis containing CSE Info
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
Contains the information about the kind of vectorization available.
This struct is a compact representation of a valid (non-zero power of two) alignment.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
iterator_range< SmallPtrSetIterator< llvm::InterleaveGroup< Instruction > * > > getInterleaveGroups()
static ElementCount getFixed(ScalarTy MinVal)
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
std::enable_if_t< std::is_signed< T >::value, llvm::Optional< T > > checkedAdd(T LHS, T RHS)
Add two signed integers LHS and RHS.
void setInsertPos(InstTy *Inst)
Encapsulates information needed to describe a parameter.
static constexpr const char * _LLVM_
LLVM Internal VFABI ISA token for vector functions.
InstTy * getInsertPos() const
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
This class represents an analyzed expression in the program.
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
uint32_t getFactor() const
VFISAKind ISA
Vector Function Name associated to this VFInfo.
Function * getVectorizedFunction(const VFShape &Shape) const
Drive the analysis of memory accesses in the loop.
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
iterator find(const_arg_type_t< KeyT > Val)
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
constexpr const T & getValue() const &
@ BasicBlock
Various leaf nodes.
A Module instance is used to store all the information related to an LLVM module.
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
std::string ScalarName
Classification of the vector function.
bool operator==(const VFShape &Other) const
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
StringRef - Represent a constant reference to a string, i.e.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
Common base class shared among various IRBuilders.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
VFParamKind getVFParamKindFromString(const StringRef Token)
Retrieve the VFParamKind from a string token.
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned arg_size() const
void updateParam(VFParameter P)
Update the parameter in position P.ParamPos to P.
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
static constexpr const char * MappingsAttrName
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
static VFShape get(const CallInst &CI, ElementCount EC, bool HasGlobalPred)
A range adaptor for a pair of iterators.
Optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const Module &M)
Function to construct a VFInfo out of a mangled names in the following format:
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
This class represents a function call, abstracting a target machine's calling convention.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
static constexpr const char * _LLVM_Scalarize_
Prefix for internal name redirection for vector function that tells the compiler to scalarize the cal...
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
bool invalidateGroups()
Invalidate groups, e.g., in case all blocks in loop will be predicated contrary to original assumptio...
APFloat abs(APFloat X)
Returns the absolute value of the argument.
LLVM Value Representation.
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
VFDatabase(CallInst &CI)
Constructor, requires a CallInst instance.
uint32_t getNumMembers() const
Optional< std::vector< StOtherPiece > > Other
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
bool operator==(const VFParameter &Other) const
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.