Go to the documentation of this file.
36 #define DEBUG_TYPE "hexagon-subtarget"
38 #define GET_SUBTARGETINFO_CTOR
39 #define GET_SUBTARGETINFO_TARGET_DESC
40 #include "HexagonGenSubtargetInfo.inc"
50 cl::desc(
"Enable the scheduler to generate .cur"));
54 cl::desc(
"Disable Hexagon MI Scheduling"));
58 cl::desc(
"Enable subregister liveness tracking for Hexagon"));
62 cl::desc(
"If present, forces/disables the use of long calls"));
66 cl::desc(
"Consider calls to be predicable"));
76 cl::desc(
"Enable checking for cache bank conflicts"));
80 cl::desc(
"Enable the code-generation for vector float instructions on v68."));
85 OptLevel(
TM.getOptLevel()),
87 TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU,
FS)),
88 RegInfo(getHwMode()), TLInfo(
TM, *
this),
89 InstrItins(getInstrItineraryForCPU(CPUString)) {
104 UseHVX128BOps =
false;
105 UseHVX64BOps =
false;
107 UseLongCalls =
false;
116 return F ==
"+hvx-qfloat" ||
F ==
"-hvx-qfloat";
121 if (
F.startswith(
"+hvxv"))
127 if (
F.startswith(
"+hvx") ||
F ==
"-hvx")
128 return F.take_front(4);
133 bool AddQFloat =
false;
139 }
else if (HvxVer ==
"+hvx") {
148 std::string FeatureString = Features.
getString();
153 bool GreaterThanV68 =
false;
155 GreaterThanV68 =
true;
158 if (!GreaterThanV68) {
160 UseHVXFloatingPoint =
true;
162 UseHVXFloatingPoint =
true;
165 if (UseHVXQFloatOps && UseHVXIEEEFPOps && UseHVXFloatingPoint)
167 dbgs() <<
"Behavior is undefined for simultaneous qfloat and ieee hvx codegen...");
183 setFeatureBits(FeatureBits.
reset(Hexagon::FeatureDuplex));
194 if (IncludeBool && Ty ==
MVT::i1)
204 if (!IncludeBool && ElemTy ==
MVT::i1)
211 if (IncludeBool && ElemTy ==
MVT::i1) {
214 for (
MVT T : ElemTypes)
215 if (NumElems *
T.getSizeInBits() == 8 * HwLen)
221 if (VecWidth != 8 * HwLen && VecWidth != 16 * HwLen)
227 if (!VecTy->
isVectorTy() || isa<ScalableVectorType>(VecTy))
240 auto isHvxTy = [
this, IncludeBool](
MVT SimpleTy) {
253 if (SimpleTy.
isValid() && isHvxTy(SimpleTy))
267 if (
D.getKind() ==
SDep::Output &&
D.getReg() == Hexagon::USR_OVF)
269 for (
auto &
E : Erase)
282 bool IsLoadMI1 = MI1.
mayLoad();
283 if (!QII->isHVXVec(MI1) || !(IsStoreMI1 || IsLoadMI1))
289 if (!QII->isHVXVec(MI2))
295 for (
SDep &PI :
SI.getSUnit()->Preds) {
296 if (PI.getSUnit() != &SU || PI.getKind() !=
SDep::Order)
299 SI.getSUnit()->setDepthDirty();
313 bool HexagonSubtarget::CallMutation::shouldTFRICallBind(
315 const SUnit &Inst2)
const {
327 SUnit* LastSequentialCall =
nullptr;
338 for (
unsigned su = 0,
e = DAG->
SUnits.size(); su !=
e; ++su) {
340 if (DAG->
SUnits[su].getInstr()->isCall())
341 LastSequentialCall = &DAG->
SUnits[su];
343 else if (DAG->
SUnits[su].getInstr()->isCompare() && LastSequentialCall)
347 shouldTFRICallBind(HII, DAG->
SUnits[su], DAG->
SUnits[su+1]))
368 VRegHoldingReg[
MI->getOperand(0).getReg()] =
MI->getOperand(1).getReg();
369 LastVRegUse.
erase(
MI->getOperand(1).getReg());
374 if (MO.isUse() && !
MI->isCopy() &&
375 VRegHoldingReg.
count(MO.getReg())) {
377 LastVRegUse[VRegHoldingReg[MO.getReg()]] = &DAG->
SUnits[su];
381 if (LastVRegUse.
count(*AI) &&
382 LastVRegUse[*AI] != &DAG->
SUnits[su])
385 LastVRegUse.
erase(*AI);
403 for (
unsigned i = 0,
e = DAG->
SUnits.size();
i !=
e; ++
i) {
413 if (BaseOp0 ==
nullptr || !BaseOp0->
isReg() || Size0 >= 32)
425 if (BaseOp1 ==
nullptr || !BaseOp1->
isReg() || Size1 >= 32 ||
430 if (((Offset0 ^ Offset1) & 0x18) != 0)
452 SUnit *Dst,
int DstOpIdx,
454 if (!Src->isInstr() || !Dst->isInstr())
465 isBestZeroLatency(Src, Dst, QII, ExclSrc, ExclDst)) {
483 for (
const auto &DDep : Dst->Succs) {
486 for (
unsigned OpNum = 0; OpNum < DDst->
getNumOperands(); OpNum++) {
500 DLatency = (DLatency == -1) ?
Latency : DLatency;
518 isBestZeroLatency(Src, Dst, QII, ExclSrc, ExclDst)) {
524 Latency = updateLatency(*SrcInst, *DstInst, IsArtificial,
Latency);
529 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
const {
530 Mutations.push_back(std::make_unique<UsrOverflowMutation>());
531 Mutations.push_back(std::make_unique<HVXMemLatencyMutation>());
532 Mutations.push_back(std::make_unique<BankConflictMutation>());
536 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
const {
537 Mutations.push_back(std::make_unique<UsrOverflowMutation>());
538 Mutations.push_back(std::make_unique<HVXMemLatencyMutation>());
542 void HexagonSubtarget::anchor() {}
554 int HexagonSubtarget::updateLatency(
MachineInstr &SrcInst,
569 void HexagonSubtarget::restoreLatency(
SUnit *Src,
SUnit *Dst)
const {
571 for (
auto &
I : Src->Succs) {
572 if (!
I.isAssignedRegDep() ||
I.getSUnit() != Dst)
576 for (
unsigned OpNum = 0; OpNum < SrcI->
getNumOperands(); OpNum++) {
578 bool IsSameOrSubReg =
false;
582 IsSameOrSubReg = (MOReg == DepR);
586 if (MO.
isDef() && IsSameOrSubReg)
590 assert(DefIdx >= 0 &&
"Def Reg not found in Src MI");
593 for (
unsigned OpNum = 0; OpNum < DstI->
getNumOperands(); OpNum++) {
597 DefIdx, *DstI, OpNum));
602 bool IsArtificial =
I.isArtificial();
610 auto F =
find(Dst->Preds,
T);
612 F->setLatency(
I.getLatency());
617 void HexagonSubtarget::changeLatency(
SUnit *Src,
SUnit *Dst,
unsigned Lat)
619 for (
auto &
I : Src->Succs) {
620 if (!
I.isAssignedRegDep() ||
I.getSUnit() != Dst)
627 auto F =
find(Dst->Preds,
T);
636 if (
I.isAssignedRegDep() &&
I.getLatency() == 0 &&
637 !
I.getSUnit()->getInstr()->isPseudo())
646 bool HexagonSubtarget::isBestZeroLatency(
SUnit *Src,
SUnit *Dst,
653 if (Dst->isBoundaryNode())
670 SUnit *Best =
nullptr;
671 SUnit *DstBest =
nullptr;
673 if (SrcBest ==
nullptr || Src->NodeNum >= SrcBest->
NodeNum) {
676 if (DstBest ==
nullptr || Dst->NodeNum <= DstBest->
NodeNum)
684 if ((Src == SrcBest && Dst == DstBest ) ||
685 (SrcBest ==
nullptr && Dst == DstBest) ||
686 (Src == SrcBest && Dst ==
nullptr))
691 if (SrcBest !=
nullptr) {
693 changeLatency(SrcBest, Dst, 1);
695 restoreLatency(SrcBest, Dst);
697 if (DstBest !=
nullptr) {
699 changeLatency(Src, DstBest, 1);
701 restoreLatency(Src, DstBest);
706 if (SrcBest && DstBest)
709 changeLatency(SrcBest, DstBest, 0);
714 for (
auto &
I : DstBest->
Preds)
715 if (ExclSrc.
count(
I.getSUnit()) == 0 &&
716 isBestZeroLatency(
I.getSUnit(), DstBest,
TII, ExclSrc, ExclDst))
717 changeLatency(
I.getSUnit(), DstBest, 0);
718 }
else if (SrcBest) {
722 for (
auto &
I : SrcBest->
Succs)
723 if (ExclDst.
count(
I.getSUnit()) == 0 &&
724 isBestZeroLatency(SrcBest,
I.getSUnit(),
TII, ExclSrc, ExclDst))
725 changeLatency(SrcBest,
I.getSUnit(), 0);
unsigned getVectorLength() const
unsigned getPosition() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
ParseSubtargetFeatures - Parses features string setting specified subtarget options.
LLVM_NODISCARD bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
MVT getVectorElementType() const
This is an optimization pass for GlobalISel generic memory operations.
static cl::opt< bool > SchedPredsCloser("sched-preds-closer", cl::Hidden, cl::ZeroOrMore, cl::init(true))
@ Artificial
Arbitrary strong DAG edge (no real dependence).
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
void AddFeature(StringRef String, bool Enable=true)
Adds Features.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM)
bool isVector() const
Return true if this is a vector value type.
static cl::opt< bool > EnablePredicatedCalls("hexagon-pred-calls", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Consider calls to be predicable"))
bool useHVXFloatingPoint() const
Triple - Helper class for working with autoconf configuration names.
void addArchSubtarget(MCSubtargetInfo const *STI, StringRef FS)
unsigned getAddrMode(const MachineInstr &MI) const
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
bool erase(const KeyT &Val)
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
The instances of the Type class are immutable: once they are created, they are never changed.
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
SmallVector< SDep, 4 > Succs
All sunit successors.
Hexagon::ArchEnum HexagonArchVersion
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool enableSubRegLiveness() const override
Container class for subtarget features.
static cl::opt< bool > SchedRetvalOptimization("sched-retval-optimization", cl::Hidden, cl::ZeroOrMore, cl::init(true))
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
unsigned const TargetRegisterInfo * TRI
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const
void apply(ScheduleDAGInstrs *DAG) override
bool useHVXV69Ops() const
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
ArrayRef< MVT > getHVXElementTypes() const
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isValid() const
Return true if this is a valid simple valuetype.
unsigned getL1CacheLineSize() const
void getSMSMutations(std::vector< std::unique_ptr< ScheduleDAGMutation >> &Mutations) const override
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool UseBSBScheduling
True if the target should use Back-Skip-Back scheduling.
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
cl::opt< bool > HexagonDisableDuplex
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
CodeGenOpt::Level OptLevel
const MachineOperand & getOperand(unsigned i) const
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
unsigned NodeNum
Entry # of node in the node vector.
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
bool enableMachineScheduler() const override
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
Manages the enabling and disabling of subtarget specific features.
bool isVectorTy() const
True if this is an instance of VectorType.
static cl::opt< bool > EnableBSBSched("enable-bsb-sched", cl::Hidden, cl::ZeroOrMore, cl::init(true))
const HexagonInstrInfo * TII
MachineOperand class - Representation of each machine instruction operand.
uint64_t getType(const MachineInstr &MI) const
int getNumOccurrences() const
@ Output
A register output-dependence (aka WAW).
const InstrItinerary * Itineraries
Array of itineraries selected.
MachineOperand * getBaseAndOffset(const MachineInstr &MI, int64_t &Offset, unsigned &AccessSize) const
bool isHVXVectorType(MVT VecTy, bool IncludeBool=false) const
@ Order
Any other ordering dependency.
unsigned getL1PrefetchDistance() const
bool isIntegerTy() const
True if this is an instance of IntegerType.
const HexagonInstrInfo * getInstrInfo() const override
static cl::opt< bool > DisableHexagonMISched("disable-hexagon-misched", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable Hexagon MI Scheduling"))
static cl::opt< bool > EnableV68FloatCodeGen("force-hvx-float", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Enable the code-generation for vector float instructions on v68."))
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
std::string getString() const
Returns features as a string.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
static SUnit * getZeroLatency(SUnit *N, SmallVector< SDep, 4 > &Deps)
If the SUnit has a zero latency edge, return the other SUnit.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Representation of each machine instruction.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
bool useAA() const override
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
static cl::opt< bool > EnableDotCurSched("enable-cur-sched", cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::desc("Enable the scheduler to generate .cur"))
void apply(ScheduleDAGInstrs *DAG) override
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
initializer< Ty > init(const Ty &Val)
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
unsigned getVectorNumElements() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Primary interface to the complete machine description for the target machine.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
StandardInstrumentations SI(Debug, VerifyEach)
Optional< Hexagon::ArchEnum > getCpu(StringRef CPU)
Register getReg() const
getReg - Returns the register number.
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
bool usePredicatedCalls() const
static MVT getVectorVT(MVT VT, unsigned NumElements)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
StringRef - Represent a constant reference to a string, i.e.
Analysis the ScalarEvolution expression for r is this
bool isHVXElementType(MVT Ty, bool IncludeBool=false) const
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
MachineFunction & MF
Machine function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation >> &Mutations) const override
@ Barrier
An unknown scheduling barrier.
std::enable_if_t< std::numeric_limits< T >::is_signed, bool > consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
constexpr FeatureBitset & reset(unsigned I)
Wrapper class representing virtual and physical registers.
void apply(ScheduleDAGInstrs *DAG) override
static cl::opt< bool > OverrideLongCalls("hexagon-long-calls", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("If present, forces/disables the use of long calls"))
bool isRegSequence() const
std::vector< SUnit > SUnits
The scheduling units.
LLVM_NODISCARD StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
const HexagonRegisterInfo * getRegisterInfo() const override
HexagonSubtarget & initializeSubtargetDependencies(StringRef CPU, StringRef FS)
const HexagonTargetLowering * getTargetLowering() const override
static cl::opt< bool > EnableTCLatencySched("enable-tc-latency-sched", cl::Hidden, cl::ZeroOrMore, cl::init(false))
void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
void setLatency(unsigned Lat)
Sets the latency for this edge.
const TargetInstrInfo * TII
Target instruction information.
bool useBSBScheduling() const
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
StringRef selectHexagonCPU(StringRef CPU)
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned getNumOperands() const
Retuns the total number of operands.
Align max(MaybeAlign Lhs, Align Rhs)
bool isToBeScheduledASAP(const MachineInstr &MI1, const MachineInstr &MI2) const
bool canExecuteInBundle(const MachineInstr &First, const MachineInstr &Second) const
Can these instructions execute at the same time in a bundle.
int getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
getOperandLatency - Compute and return the use operand latency of a given pair of def and use.
const char LLVMTargetMachineRef TM
FeatureBitset completeHVXFeatures(const FeatureBitset &FB)
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Scheduling unit. This is a node in the scheduling DAG.
A ScheduleDAG for scheduling lists of MachineInstr.
static cl::opt< bool > EnableCheckBankConflict("hexagon-check-bank-conflict", cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::desc("Enable checking for cache bank conflicts"))
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep) const override
Perform target specific adjustments to the latency of a schedule dependency.
void apply(ScheduleDAGInstrs *DAG) override
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
static cl::opt< bool > EnableSubregLiveness("hexagon-subreg-liveness", cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::desc("Enable subregister liveness tracking for Hexagon"))
MCRegAliasIterator enumerates all registers aliasing Reg.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.