40#define VPINTERNAL_VPLEGAL_CASES \
41 VPINTERNAL_CASE(Legal) \
42 VPINTERNAL_CASE(Discard) \
43 VPINTERNAL_CASE(Convert)
45#define VPINTERNAL_CASE(X) "|" #X
51 ". If non-empty, ignore "
52 "TargetTransformInfo and "
53 "always use this transformation for the %evl parameter (Used in "
59 ". If non-empty, Ignore "
60 "TargetTransformInfo and "
61 "always use this transformation for the %mask parameter (Used in "
65#define VPINTERNAL_CASE(X) .Case(#X, VPLegalization::X)
71#undef VPINTERNAL_VPLEGAL_CASES
78#define DEBUG_TYPE "expandvp"
80STATISTIC(NumFoldedVL,
"Number of folded vector length params");
81STATISTIC(NumLoweredVPOps,
"Number of folded vector predication operations");
88 if (
auto *ConstValue = dyn_cast<Constant>(SplattedVal))
89 return ConstValue->isAllOnesValue();
102 auto *NewInst = dyn_cast<Instruction>(&NewVal);
103 if (!NewInst || !isa<FPMathOperator>(NewVal))
106 auto *OldFMOp = dyn_cast<FPMathOperator>(&VPI);
110 NewInst->setFastMathFlags(OldFMOp->getFastMathFlags());
123 if (isa<VPReductionIntrinsic>(VPI))
127 unsigned FunctionalOpc = OpcOpt.value_or((
unsigned)Instruction::Call);
136struct CachingVPExpander {
176 unsigned UnpredicatedIntrinsicID);
180 unsigned UnpredicatedIntrinsicID);
204 bool UsingTTIOverrides;
210 bool expandVectorPredication();
220 for (
unsigned Idx = 0;
Idx < NumElems; ++
Idx)
232 auto *
M =
Builder.GetInsertBlock()->getModule();
233 Type *BoolVecTy = VectorType::get(
Builder.getInt1Ty(), ElemCount);
235 M, Intrinsic::get_active_lane_mask, {BoolVecTy, EVLParam->
getType()});
238 return Builder.CreateCall(ActiveMaskFunc, {ConstZero, EVLParam});
244 Value *VLSplat =
Builder.CreateVectorSplat(NumElems, EVLParam);
245 Value *IdxVec = createStepVector(Builder, LaneTy, NumElems);
250CachingVPExpander::expandPredicationInBinaryOperator(
IRBuilder<> &Builder,
253 "Implicitly dropping %evl in non-speculatable operator!");
270 case Instruction::UDiv:
271 case Instruction::SDiv:
272 case Instruction::URem:
273 case Instruction::SRem:
276 Op1 =
Builder.CreateSelect(Mask, Op1, SafeDivisor);
286Value *CachingVPExpander::expandPredicationToIntCall(
288 switch (UnpredicatedIntrinsicID) {
290 case Intrinsic::smax:
291 case Intrinsic::smin:
292 case Intrinsic::umax:
293 case Intrinsic::umin: {
297 VPI.
getModule(), UnpredicatedIntrinsicID, {VPI.getType()});
306Value *CachingVPExpander::expandPredicationToFPCall(
309 "Implicitly dropping %evl in non-speculatable operator!");
311 switch (UnpredicatedIntrinsicID) {
312 case Intrinsic::fabs:
313 case Intrinsic::sqrt: {
316 VPI.
getModule(), UnpredicatedIntrinsicID, {VPI.getType()});
321 case Intrinsic::maxnum:
322 case Intrinsic::minnum: {
326 VPI.
getModule(), UnpredicatedIntrinsicID, {VPI.getType()});
331 case Intrinsic::experimental_constrained_fma:
332 case Intrinsic::experimental_constrained_fmuladd: {
337 VPI.
getModule(), UnpredicatedIntrinsicID, {VPI.getType()});
339 Builder.CreateConstrainedFPCall(Fn, {Op0, Op1, Op2}, VPI.
getName());
350 bool Negative =
false;
355 case Intrinsic::vp_reduce_add:
356 case Intrinsic::vp_reduce_or:
357 case Intrinsic::vp_reduce_xor:
358 case Intrinsic::vp_reduce_umax:
360 case Intrinsic::vp_reduce_mul:
362 case Intrinsic::vp_reduce_and:
363 case Intrinsic::vp_reduce_umin:
364 return ConstantInt::getAllOnesValue(EltTy);
365 case Intrinsic::vp_reduce_smin:
368 case Intrinsic::vp_reduce_smax:
371 case Intrinsic::vp_reduce_fmax:
374 case Intrinsic::vp_reduce_fmin: {
381 APFloat::getLargest(Semantics, Negative));
383 case Intrinsic::vp_reduce_fadd:
385 case Intrinsic::vp_reduce_fmul:
391CachingVPExpander::expandPredicationInReduction(
IRBuilder<> &Builder,
394 "Implicitly dropping %evl in non-speculatable operator!");
401 auto *NeutralElt = getNeutralReductionElement(VPI, VPI.
getType());
402 auto *NeutralVector =
Builder.CreateVectorSplat(
403 cast<VectorType>(RedOp->
getType())->getElementCount(), NeutralElt);
404 RedOp =
Builder.CreateSelect(Mask, RedOp, NeutralVector);
413 case Intrinsic::vp_reduce_add:
417 case Intrinsic::vp_reduce_mul:
421 case Intrinsic::vp_reduce_and:
425 case Intrinsic::vp_reduce_or:
429 case Intrinsic::vp_reduce_xor:
433 case Intrinsic::vp_reduce_smax:
436 Builder.CreateBinaryIntrinsic(Intrinsic::smax, Reduction, Start);
438 case Intrinsic::vp_reduce_smin:
441 Builder.CreateBinaryIntrinsic(Intrinsic::smin, Reduction, Start);
443 case Intrinsic::vp_reduce_umax:
446 Builder.CreateBinaryIntrinsic(Intrinsic::umax, Reduction, Start);
448 case Intrinsic::vp_reduce_umin:
451 Builder.CreateBinaryIntrinsic(Intrinsic::umin, Reduction, Start);
453 case Intrinsic::vp_reduce_fmax:
457 Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, Reduction, Start);
459 case Intrinsic::vp_reduce_fmin:
463 Builder.CreateBinaryIntrinsic(Intrinsic::minnum, Reduction, Start);
465 case Intrinsic::vp_reduce_fadd:
468 case Intrinsic::vp_reduce_fmul:
477Value *CachingVPExpander::expandPredicationToCastIntrinsic(
IRBuilder<> &Builder,
479 Value *CastOp =
nullptr;
483 case Intrinsic::vp_sext:
487 case Intrinsic::vp_zext:
491 case Intrinsic::vp_trunc:
495 case Intrinsic::vp_inttoptr:
499 case Intrinsic::vp_ptrtoint:
503 case Intrinsic::vp_fptosi:
508 case Intrinsic::vp_fptoui:
512 case Intrinsic::vp_sitofp:
516 case Intrinsic::vp_uitofp:
520 case Intrinsic::vp_fptrunc:
524 case Intrinsic::vp_fpext:
534CachingVPExpander::expandPredicationInMemoryIntrinsic(
IRBuilder<> &Builder,
538 const auto &
DL =
F.getParent()->getDataLayout();
547 Value *NewMemoryInst =
nullptr;
551 case Intrinsic::vp_store:
554 Builder.CreateStore(DataParam, PtrParam,
false);
555 if (AlignOpt.has_value())
557 NewMemoryInst = NewStore;
559 NewMemoryInst =
Builder.CreateMaskedStore(
560 DataParam, PtrParam, AlignOpt.
valueOrOne(), MaskParam);
563 case Intrinsic::vp_load:
567 if (AlignOpt.has_value())
569 NewMemoryInst = NewLoad;
571 NewMemoryInst =
Builder.CreateMaskedLoad(
575 case Intrinsic::vp_scatter: {
577 cast<VectorType>(DataParam->
getType())->getElementType();
578 NewMemoryInst =
Builder.CreateMaskedScatter(
580 AlignOpt.value_or(
DL.getPrefTypeAlign(ElementType)), MaskParam);
583 case Intrinsic::vp_gather: {
584 auto *ElementType = cast<VectorType>(VPI.
getType())->getElementType();
585 NewMemoryInst =
Builder.CreateMaskedGather(
587 AlignOpt.value_or(
DL.getPrefTypeAlign(ElementType)), MaskParam,
nullptr,
595 return NewMemoryInst;
598Value *CachingVPExpander::expandPredicationInComparison(
IRBuilder<> &Builder,
601 "Implicitly dropping %evl in non-speculatable operator!");
610 auto *NewCmp =
Builder.CreateCmp(Pred, Op0, Op1);
616void CachingVPExpander::discardEVLParameter(
VPIntrinsic &VPI) {
627 Value *MaxEVL =
nullptr;
636 Value *VScale =
Builder.CreateCall(VScaleFunc, {},
"vscale");
637 MaxEVL =
Builder.CreateMul(VScale, FactorConst,
"scalable_size",
657 assert(OldMaskParam &&
"no mask param to fold the vl param into");
658 assert(OldEVLParam &&
"no EVL param to fold away");
665 Value *VLMask = convertEVLToMask(Builder, OldEVLParam, ElemCount);
666 Value *NewMaskParam =
Builder.CreateAnd(VLMask, OldMaskParam);
670 discardEVLParameter(VPI);
672 "transformation did not render the evl param ineffective!");
679 LLVM_DEBUG(
dbgs() <<
"Lowering to unpredicated op: " << VPI <<
'\n');
687 return expandPredicationInBinaryOperator(Builder, VPI);
689 if (
auto *VPRI = dyn_cast<VPReductionIntrinsic>(&VPI))
690 return expandPredicationInReduction(Builder, *VPRI);
692 if (
auto *VPCmp = dyn_cast<VPCmpIntrinsic>(&VPI))
693 return expandPredicationInComparison(Builder, *VPCmp);
696 return expandPredicationToCastIntrinsic(Builder, VPI);
702 case Intrinsic::vp_fneg: {
707 case Intrinsic::vp_abs:
708 return expandPredicationToIntCall(Builder, VPI, Intrinsic::abs);
709 case Intrinsic::vp_smax:
710 return expandPredicationToIntCall(Builder, VPI, Intrinsic::smax);
711 case Intrinsic::vp_smin:
712 return expandPredicationToIntCall(Builder, VPI, Intrinsic::smin);
713 case Intrinsic::vp_umax:
714 return expandPredicationToIntCall(Builder, VPI, Intrinsic::umax);
715 case Intrinsic::vp_umin:
716 return expandPredicationToIntCall(Builder, VPI, Intrinsic::umin);
717 case Intrinsic::vp_fabs:
718 return expandPredicationToFPCall(Builder, VPI, Intrinsic::fabs);
719 case Intrinsic::vp_sqrt:
720 return expandPredicationToFPCall(Builder, VPI, Intrinsic::sqrt);
721 case Intrinsic::vp_maxnum:
722 return expandPredicationToFPCall(Builder, VPI, Intrinsic::maxnum);
723 case Intrinsic::vp_minnum:
724 return expandPredicationToFPCall(Builder, VPI, Intrinsic::minnum);
725 case Intrinsic::vp_load:
726 case Intrinsic::vp_store:
727 case Intrinsic::vp_gather:
728 case Intrinsic::vp_scatter:
729 return expandPredicationInMemoryIntrinsic(Builder, VPI);
733 if (
Value *Call = expandPredicationToFPCall(Builder, VPI, *CID))
745 : PI(PI), Strategy(InitStrat) {}
772CachingVPExpander::getVPLegalizationStrategy(
const VPIntrinsic &VPI)
const {
787bool CachingVPExpander::expandVectorPredication() {
793 auto *VPI = dyn_cast<VPIntrinsic>(&
I);
796 auto VPStrat = getVPLegalizationStrategy(*VPI);
797 sanitizeStrategy(*VPI, VPStrat);
798 if (!VPStrat.shouldDoNothing())
801 if (Worklist.
empty())
806 <<
" instructions ::::\n");
807 for (TransformJob Job : Worklist) {
809 switch (Job.Strategy.EVLParamStrategy) {
813 discardEVLParameter(*Job.PI);
816 if (foldEVLIntoMask(*Job.PI))
823 switch (Job.Strategy.OpStrategy) {
829 expandPredication(*Job.PI);
835 assert(Job.isDone() &&
"incomplete transformation");
848 const auto *
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
849 CachingVPExpander VPExpander(
F, *
TTI);
850 return VPExpander.expandVectorPredication();
860char ExpandVectorPredication::ID;
862 "Expand vector predication intrinsics",
false,
false)
869 return new ExpandVectorPredication();
875 CachingVPExpander VPExpander(
F,
TTI);
876 if (!VPExpander.expandVectorPredication())
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define LLVM_LIKELY(EXPR)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static VPTransform parseOverrideOption(const std::string &TextOpt)
static cl::opt< std::string > MaskTransformOverride("expandvp-override-mask-transform", cl::init(""), cl::Hidden, cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES ". If non-empty, Ignore " "TargetTransformInfo and " "always use this transformation for the %mask parameter (Used in " "testing)."))
static cl::opt< std::string > EVLTransformOverride("expandvp-override-evl-transform", cl::init(""), cl::Hidden, cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES ". If non-empty, ignore " "TargetTransformInfo and " "always use this transformation for the %evl parameter (Used in " "testing)."))
static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp)
Transfer all properties from OldOp to NewOp and replace all uses.
static bool isAllTrueMask(Value *MaskVal)
static void transferDecorations(Value &NewVal, VPIntrinsic &VPI)
Transfer operation properties from OldVPI to NewVal.
static bool anyExpandVPOverridesSet()
static bool maySpeculateLanes(VPIntrinsic &VPI)
Expand vector predication intrinsics
static Constant * getSafeDivisor(Type *DivTy)
#define VPINTERNAL_VPLEGAL_CASES
Select target instructions out of generic instructions
loop Loop Strength Reduction
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
@ ICMP_ULT
unsigned less than
ConstantFP - Floating Point Values [float, double].
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
static Constant * getNegativeZero(Type *Ty)
static Constant * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Legacy analysis pass which computes a DominatorTree.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Convenience struct for specifying and reasoning about fast-math flags.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
const BasicBlock * getParent() const
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
void setAlignment(Align Align)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
A switch()-like statement whose cases are string literals.
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
const fltSemantics & getFltSemantics() const
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt32Ty(LLVMContext &C)
Value * getOperand(unsigned i) const
static bool isVPCast(Intrinsic::ID ID)
CmpInst::Predicate getPredicate() const
This is the common base class for vector predication intrinsics.
bool canIgnoreVectorLengthParam() const
void setMaskParam(Value *)
Value * getVectorLengthParam() const
void setVectorLengthParam(Value *)
Value * getMemoryDataParam() const
Value * getMemoryPointerParam() const
std::optional< unsigned > getConstrainedIntrinsicID() const
MaybeAlign getPointerAlignment() const
Value * getMaskParam() const
ElementCount getStaticVectorLength() const
std::optional< unsigned > getFunctionalOpcode() const
This represents vector predication reduction intrinsics.
unsigned getStartParamPos() const
unsigned getVectorParamPos() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
StringRef getName() const
Return a constant reference to the value's name.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createExpandVectorPredicationPass()
This pass expands the vector predication intrinsics into unpredicated instructions with selects or ju...
void initializeExpandVectorPredicationPass(PassRegistry &)
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.