27#include "llvm/IR/IntrinsicsHexagon.h"
38#define DEBUG_TYPE "hexagon-subtarget"
40#define GET_SUBTARGETINFO_CTOR
41#define GET_SUBTARGETINFO_TARGET_DESC
42#include "HexagonGenSubtargetInfo.inc"
52 cl::desc(
"Enable the scheduler to generate .cur"));
56 cl::desc(
"Disable Hexagon MI Scheduling"));
60 cl::desc(
"Enable subregister liveness tracking for Hexagon"));
64 cl::desc(
"If present, forces/disables the use of long calls"));
68 cl::desc(
"Consider calls to be predicable"));
78 cl::desc(
"Enable checking for cache bank conflicts"));
83 OptLevel(
TM.getOptLevel()),
84 CPUString(
std::
string(Hexagon_MC::selectHexagonCPU(CPU))),
85 TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
86 RegInfo(getHwMode()), TLInfo(
TM, *this),
87 InstrItins(getInstrItineraryForCPU(CPUString)) {
102 UseHVX128BOps =
false;
103 UseHVX64BOps =
false;
105 UseLongCalls =
false;
114 return F ==
"+hvx-qfloat" ||
F ==
"-hvx-qfloat";
119 if (
F.startswith(
"+hvxv"))
125 if (
F.startswith(
"+hvx") ||
F ==
"-hvx")
126 return F.take_front(4);
131 bool AddQFloat =
false;
137 }
else if (HvxVer ==
"+hvx") {
146 std::string FeatureString = Features.
getString();
150 UseHVXFloatingPoint = UseHVXIEEEFPOps || UseHVXQFloatOps;
152 if (UseHVXQFloatOps && UseHVXIEEEFPOps && UseHVXFloatingPoint)
154 dbgs() <<
"Behavior is undefined for simultaneous qfloat and ieee hvx codegen...");
170 setFeatureBits(FeatureBits.
reset(Hexagon::FeatureDuplex));
181 if (IncludeBool && Ty == MVT::i1)
193 if (!IncludeBool && ElemTy == MVT::i1)
200 if (IncludeBool && ElemTy == MVT::i1) {
203 for (
MVT T : ElemTypes)
204 if (NumElems *
T.getSizeInBits() == 8 * HwLen)
210 if (VecWidth != 8 * HwLen && VecWidth != 16 * HwLen)
216 if (!VecTy->
isVectorTy() || isa<ScalableVectorType>(VecTy))
229 auto isHvxTy = [
this, IncludeBool](
MVT SimpleTy) {
242 if (SimpleTy.
isValid() && isHvxTy(SimpleTy))
256 if (
D.getKind() ==
SDep::Output &&
D.getReg() == Hexagon::USR_OVF)
258 for (
auto &
E : Erase)
271 bool IsLoadMI1 = MI1.
mayLoad();
272 if (!QII->isHVXVec(MI1) || !(IsStoreMI1 || IsLoadMI1))
275 if (SI.getKind() !=
SDep::Order || SI.getLatency() != 0)
278 if (!QII->isHVXVec(MI2))
284 for (
SDep &PI : SI.getSUnit()->Preds) {
285 if (PI.getSUnit() != &SU || PI.getKind() !=
SDep::Order)
288 SI.getSUnit()->setDepthDirty();
302bool HexagonSubtarget::CallMutation::shouldTFRICallBind(
304 const SUnit &Inst2)
const {
316 SUnit* LastSequentialCall =
nullptr;
327 for (
unsigned su = 0, e = DAG->
SUnits.size(); su != e; ++su) {
329 if (DAG->
SUnits[su].getInstr()->isCall())
330 LastSequentialCall = &DAG->
SUnits[su];
332 else if (DAG->
SUnits[su].getInstr()->isCompare() && LastSequentialCall)
336 shouldTFRICallBind(HII, DAG->
SUnits[su], DAG->
SUnits[su+1]))
354 if (
MI->isCopy() &&
MI->getOperand(1).getReg().isPhysical()) {
356 VRegHoldingReg[
MI->getOperand(0).getReg()] =
MI->getOperand(1).getReg();
357 LastVRegUse.
erase(
MI->getOperand(1).getReg());
362 if (MO.isUse() && !
MI->isCopy() &&
363 VRegHoldingReg.
count(MO.getReg())) {
365 LastVRegUse[VRegHoldingReg[MO.getReg()]] = &DAG->
SUnits[su];
366 }
else if (MO.isDef() && MO.getReg().isPhysical()) {
369 if (LastVRegUse.
count(*AI) &&
370 LastVRegUse[*AI] != &DAG->
SUnits[su])
373 LastVRegUse.
erase(*AI);
391 for (
unsigned i = 0, e = DAG->
SUnits.size(); i != e; ++i) {
401 if (BaseOp0 ==
nullptr || !BaseOp0->
isReg() || Size0 >= 32)
404 for (
unsigned j = i+1, m = std::min(i+32, e); j != m; ++j) {
413 if (BaseOp1 ==
nullptr || !BaseOp1->
isReg() || Size1 >= 32 ||
418 if (((Offset0 ^ Offset1) & 0x18) != 0)
440 SUnit *Dst,
int DstOpIdx,
442 if (!Src->isInstr() || !Dst->isInstr())
453 isBestZeroLatency(Src, Dst, QII, ExclSrc, ExclDst)) {
470 std::optional<unsigned> DLatency;
471 for (
const auto &DDep : Dst->Succs) {
474 for (
unsigned OpNum = 0; OpNum < DDst->
getNumOperands(); OpNum++) {
485 std::optional<unsigned>
Latency =
495 DLatency = std::nullopt;
506 isBestZeroLatency(Src, Dst, QII, ExclSrc, ExclDst)) {
512 Latency = updateLatency(*SrcInst, *DstInst, IsArtificial,
Latency);
517 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
const {
518 Mutations.push_back(std::make_unique<UsrOverflowMutation>());
519 Mutations.push_back(std::make_unique<HVXMemLatencyMutation>());
520 Mutations.push_back(std::make_unique<BankConflictMutation>());
524 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
const {
525 Mutations.push_back(std::make_unique<UsrOverflowMutation>());
526 Mutations.push_back(std::make_unique<HVXMemLatencyMutation>());
530void HexagonSubtarget::anchor() {}
542int HexagonSubtarget::updateLatency(
MachineInstr &SrcInst,
557void HexagonSubtarget::restoreLatency(
SUnit *Src,
SUnit *Dst)
const {
559 for (
auto &
I : Src->Succs) {
560 if (!
I.isAssignedRegDep() ||
I.getSUnit() != Dst)
564 for (
unsigned OpNum = 0; OpNum < SrcI->
getNumOperands(); OpNum++) {
566 bool IsSameOrSubReg =
false;
570 IsSameOrSubReg = (MOReg == DepR);
574 if (MO.
isDef() && IsSameOrSubReg)
578 assert(DefIdx >= 0 &&
"Def Reg not found in Src MI");
581 for (
unsigned OpNum = 0; OpNum < DstI->
getNumOperands(); OpNum++) {
585 &InstrItins, *SrcI, DefIdx, *DstI, OpNum);
591 bool IsArtificial =
I.isArtificial();
599 auto F =
find(Dst->Preds,
T);
601 F->setLatency(
I.getLatency());
606void HexagonSubtarget::changeLatency(
SUnit *Src,
SUnit *Dst,
unsigned Lat)
608 for (
auto &
I : Src->Succs) {
609 if (!
I.isAssignedRegDep() ||
I.getSUnit() != Dst)
616 auto F =
find(Dst->Preds,
T);
625 if (
I.isAssignedRegDep() &&
I.getLatency() == 0 &&
626 !
I.getSUnit()->getInstr()->isPseudo())
635bool HexagonSubtarget::isBestZeroLatency(
SUnit *Src,
SUnit *Dst,
642 if (Dst->isBoundaryNode())
659 SUnit *Best =
nullptr;
660 SUnit *DstBest =
nullptr;
662 if (SrcBest ==
nullptr || Src->NodeNum >= SrcBest->
NodeNum) {
665 if (DstBest ==
nullptr || Dst->NodeNum <= DstBest->
NodeNum)
673 if ((Src == SrcBest && Dst == DstBest ) ||
674 (SrcBest ==
nullptr && Dst == DstBest) ||
675 (Src == SrcBest && Dst ==
nullptr))
680 if (SrcBest !=
nullptr) {
682 changeLatency(SrcBest, Dst, 1);
684 restoreLatency(SrcBest, Dst);
686 if (DstBest !=
nullptr) {
688 changeLatency(Src, DstBest, 1);
690 restoreLatency(Src, DstBest);
695 if (SrcBest && DstBest)
698 changeLatency(SrcBest, DstBest, 0);
703 for (
auto &
I : DstBest->
Preds)
704 if (ExclSrc.
count(
I.getSUnit()) == 0 &&
705 isBestZeroLatency(
I.getSUnit(), DstBest,
TII, ExclSrc, ExclDst))
706 changeLatency(
I.getSUnit(), DstBest, 0);
707 }
else if (SrcBest) {
711 for (
auto &
I : SrcBest->
Succs)
712 if (ExclDst.
count(
I.getSUnit()) == 0 &&
713 isBestZeroLatency(SrcBest,
I.getSUnit(),
TII, ExclSrc, ExclDst))
714 changeLatency(SrcBest,
I.getSUnit(), 0);
742 static Scalar ScalarInts[] = {
743#define GET_SCALAR_INTRINSICS
745#undef GET_SCALAR_INTRINSICS
748 static Hvx HvxInts[] = {
749#define GET_HVX_INTRINSICS
751#undef GET_HVX_INTRINSICS
754 const auto CmpOpcode = [](
auto A,
auto B) {
return A.Opcode <
B.Opcode; };
755 [[maybe_unused]]
static bool SortedScalar =
757 [[maybe_unused]]
static bool SortedHvx =
760 auto [BS, ES] = std::make_pair(std::begin(ScalarInts), std::end(ScalarInts));
761 auto [BH, EH] = std::make_pair(std::begin(HvxInts), std::end(HvxInts));
763 auto FoundScalar = std::lower_bound(BS, ES, Scalar{Opc, 0}, CmpOpcode);
764 if (FoundScalar != ES && FoundScalar->Opcode == Opc)
765 return FoundScalar->IntId;
767 auto FoundHvx = std::lower_bound(BH, EH, Hvx{Opc, 0, 0}, CmpOpcode);
768 if (FoundHvx != EH && FoundHvx->Opcode == Opc) {
771 return FoundHvx->Int64Id;
773 return FoundHvx->Int128Id;
776 std::string
error =
"Invalid opcode (" + std::to_string(Opc) +
")";
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const HexagonInstrInfo * TII
static cl::opt< bool > DisableHexagonMISched("disable-hexagon-misched", cl::Hidden, cl::desc("Disable Hexagon MI Scheduling"))
static cl::opt< bool > EnableDotCurSched("enable-cur-sched", cl::Hidden, cl::init(true), cl::desc("Enable the scheduler to generate .cur"))
static cl::opt< bool > EnableCheckBankConflict("hexagon-check-bank-conflict", cl::Hidden, cl::init(true), cl::desc("Enable checking for cache bank conflicts"))
static cl::opt< bool > EnableSubregLiveness("hexagon-subreg-liveness", cl::Hidden, cl::init(true), cl::desc("Enable subregister liveness tracking for Hexagon"))
static cl::opt< bool > OverrideLongCalls("hexagon-long-calls", cl::Hidden, cl::desc("If present, forces/disables the use of long calls"))
static cl::opt< bool > SchedPredsCloser("sched-preds-closer", cl::Hidden, cl::init(true))
static cl::opt< bool > SchedRetvalOptimization("sched-retval-optimization", cl::Hidden, cl::init(true))
static cl::opt< bool > EnableTCLatencySched("enable-tc-latency-sched", cl::Hidden, cl::init(false))
static cl::opt< bool > EnableBSBSched("enable-bsb-sched", cl::Hidden, cl::init(true))
static SUnit * getZeroLatency(SUnit *N, SmallVector< SDep, 4 > &Deps)
If the SUnit has a zero latency edge, return the other SUnit.
static cl::opt< bool > EnablePredicatedCalls("hexagon-pred-calls", cl::Hidden, cl::desc("Consider calls to be predicable"))
unsigned const TargetRegisterInfo * TRI
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallSet class.
This file defines the SmallVector class.
static constexpr uint32_t Opcode
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool erase(const KeyT &Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Container class for subtarget features.
constexpr FeatureBitset & reset(unsigned I)
unsigned getAddrMode(const MachineInstr &MI) const
bool canExecuteInBundle(const MachineInstr &First, const MachineInstr &Second) const
Can these instructions execute at the same time in a bundle.
std::optional< unsigned > getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
getOperandLatency - Compute and return the use operand latency of a given pair of def and use.
MachineOperand * getBaseAndOffset(const MachineInstr &MI, int64_t &Offset, unsigned &AccessSize) const
bool isToBeScheduledASAP(const MachineInstr &MI1, const MachineInstr &MI2) const
uint64_t getType(const MachineInstr &MI) const
Hexagon::ArchEnum HexagonArchVersion
bool usePredicatedCalls() const
const HexagonInstrInfo * getInstrInfo() const override
const HexagonRegisterInfo * getRegisterInfo() const override
void getSMSMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM)
bool isHVXVectorType(EVT VecTy, bool IncludeBool=false) const
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
const HexagonTargetLowering * getTargetLowering() const override
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep) const override
Perform target specific adjustments to the latency of a schedule dependency.
bool UseBSBScheduling
True if the target should use Back-Skip-Back scheduling.
unsigned getL1PrefetchDistance() const
ArrayRef< MVT > getHVXElementTypes() const
bool useHVXFloatingPoint() const
bool enableSubRegLiveness() const override
unsigned getVectorLength() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
ParseSubtargetFeatures - Parses features string setting specified subtarget options.
bool useHVXV68Ops() const
unsigned getL1CacheLineSize() const
bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const
Intrinsic::ID getIntrinsicId(unsigned Opc) const
HexagonSubtarget & initializeSubtargetDependencies(StringRef CPU, StringRef FS)
bool enableMachineScheduler() const override
bool useBSBScheduling() const
bool isHVXElementType(MVT Ty, bool IncludeBool=false) const
bool useAA() const override
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
const InstrItinerary * Itineraries
Array of itineraries selected.
MCRegAliasIterator enumerates all registers aliasing Reg.
bool isVector() const
Return true if this is a vector value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isValid() const
Return true if this is a valid simple valuetype.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
unsigned getNumOperands() const
Retuns the total number of operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool isRegSequence() const
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
@ Output
A register output-dependence (aka WAW).
@ Order
Any other ordering dependency.
void setLatency(unsigned Lat)
Sets the latency for this edge.
@ Barrier
An unknown scheduling barrier.
@ Artificial
Arbitrary strong DAG edge (no real dependence).
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
A ScheduleDAG for scheduling lists of MachineInstr.
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
MachineFunction & MF
Machine function.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
bool consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
bool startswith(StringRef Prefix) const
Manages the enabling and disabling of subtarget specific features.
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
std::string getString() const
Returns features as a string.
void AddFeature(StringRef String, bool Enable=true)
Adds Features.
Primary interface to the complete machine description for the target machine.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Triple - Helper class for working with autoconf configuration names.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void addArchSubtarget(MCSubtargetInfo const *STI, StringRef FS)
FeatureBitset completeHVXFeatures(const FeatureBitset &FB)
std::optional< Hexagon::ArchEnum > getCpu(StringRef CPU)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
cl::opt< bool > HexagonDisableDuplex
Implement std::hash so that hash_code can be used in STL containers.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
void apply(ScheduleDAGInstrs *DAG) override
void apply(ScheduleDAGInstrs *DAG) override
void apply(ScheduleDAGInstrs *DAG) override
void apply(ScheduleDAGInstrs *DAG) override