LLVM 18.0.0git
HexagonSubtarget.cpp
Go to the documentation of this file.
1//===- HexagonSubtarget.cpp - Hexagon Subtarget Information ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Hexagon specific subclass of TargetSubtarget.
10//
11//===----------------------------------------------------------------------===//
12
13#include "HexagonSubtarget.h"
14#include "Hexagon.h"
15#include "HexagonInstrInfo.h"
16#include "HexagonRegisterInfo.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/SmallSet.h"
21#include "llvm/ADT/StringRef.h"
27#include "llvm/IR/IntrinsicsHexagon.h"
31#include <algorithm>
32#include <cassert>
33#include <map>
34#include <optional>
35
36using namespace llvm;
37
38#define DEBUG_TYPE "hexagon-subtarget"
39
40#define GET_SUBTARGETINFO_CTOR
41#define GET_SUBTARGETINFO_TARGET_DESC
42#include "HexagonGenSubtargetInfo.inc"
43
44static cl::opt<bool> EnableBSBSched("enable-bsb-sched", cl::Hidden,
45 cl::init(true));
46
47static cl::opt<bool> EnableTCLatencySched("enable-tc-latency-sched", cl::Hidden,
48 cl::init(false));
49
50static cl::opt<bool>
51 EnableDotCurSched("enable-cur-sched", cl::Hidden, cl::init(true),
52 cl::desc("Enable the scheduler to generate .cur"));
53
54static cl::opt<bool>
55 DisableHexagonMISched("disable-hexagon-misched", cl::Hidden,
56 cl::desc("Disable Hexagon MI Scheduling"));
57
59 "hexagon-subreg-liveness", cl::Hidden, cl::init(true),
60 cl::desc("Enable subregister liveness tracking for Hexagon"));
61
63 "hexagon-long-calls", cl::Hidden,
64 cl::desc("If present, forces/disables the use of long calls"));
65
66static cl::opt<bool>
67 EnablePredicatedCalls("hexagon-pred-calls", cl::Hidden,
68 cl::desc("Consider calls to be predicable"));
69
70static cl::opt<bool> SchedPredsCloser("sched-preds-closer", cl::Hidden,
71 cl::init(true));
72
73static cl::opt<bool> SchedRetvalOptimization("sched-retval-optimization",
74 cl::Hidden, cl::init(true));
75
77 "hexagon-check-bank-conflict", cl::Hidden, cl::init(true),
78 cl::desc("Enable checking for cache bank conflicts"));
79
81 StringRef FS, const TargetMachine &TM)
82 : HexagonGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
83 OptLevel(TM.getOptLevel()),
84 CPUString(std::string(Hexagon_MC::selectHexagonCPU(CPU))),
85 TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
86 RegInfo(getHwMode()), TLInfo(TM, *this),
87 InstrItins(getInstrItineraryForCPU(CPUString)) {
89 // Beware of the default constructor of InstrItineraryData: it will
90 // reset all members to 0.
91 assert(InstrItins.Itineraries != nullptr && "InstrItins not initialized");
92}
93
96 std::optional<Hexagon::ArchEnum> ArchVer = Hexagon::getCpu(CPUString);
97 if (ArchVer)
98 HexagonArchVersion = *ArchVer;
99 else
100 llvm_unreachable("Unrecognized Hexagon processor version");
101
102 UseHVX128BOps = false;
103 UseHVX64BOps = false;
104 UseAudioOps = false;
105 UseLongCalls = false;
106
107 SubtargetFeatures Features(FS);
108
109 // Turn on QFloat if the HVX version is v68+.
110 // The function ParseSubtargetFeatures will set feature bits and initialize
111 // subtarget's variables all in one, so there isn't a good way to preprocess
112 // the feature string, other than by tinkering with it directly.
113 auto IsQFloatFS = [](StringRef F) {
114 return F == "+hvx-qfloat" || F == "-hvx-qfloat";
115 };
116 if (!llvm::count_if(Features.getFeatures(), IsQFloatFS)) {
117 auto getHvxVersion = [&Features](StringRef FS) -> StringRef {
118 for (StringRef F : llvm::reverse(Features.getFeatures())) {
119 if (F.startswith("+hvxv"))
120 return F;
121 }
122 for (StringRef F : llvm::reverse(Features.getFeatures())) {
123 if (F == "-hvx")
124 return StringRef();
125 if (F.startswith("+hvx") || F == "-hvx")
126 return F.take_front(4); // Return "+hvx" or "-hvx".
127 }
128 return StringRef();
129 };
130
131 bool AddQFloat = false;
132 StringRef HvxVer = getHvxVersion(FS);
133 if (HvxVer.startswith("+hvxv")) {
134 int Ver = 0;
135 if (!HvxVer.drop_front(5).consumeInteger(10, Ver) && Ver >= 68)
136 AddQFloat = true;
137 } else if (HvxVer == "+hvx") {
138 if (hasV68Ops())
139 AddQFloat = true;
140 }
141
142 if (AddQFloat)
143 Features.AddFeature("+hvx-qfloat");
144 }
145
146 std::string FeatureString = Features.getString();
147 ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, FeatureString);
148
149 if (useHVXV68Ops())
150 UseHVXFloatingPoint = UseHVXIEEEFPOps || UseHVXQFloatOps;
151
152 if (UseHVXQFloatOps && UseHVXIEEEFPOps && UseHVXFloatingPoint)
154 dbgs() << "Behavior is undefined for simultaneous qfloat and ieee hvx codegen...");
155
156 if (OverrideLongCalls.getPosition())
157 UseLongCalls = OverrideLongCalls;
158
160
161 if (isTinyCore()) {
162 // Tiny core has a single thread, so back-to-back scheduling is enabled by
163 // default.
164 if (!EnableBSBSched.getPosition())
165 UseBSBScheduling = false;
166 }
167
168 FeatureBitset FeatureBits = getFeatureBits();
170 setFeatureBits(FeatureBits.reset(Hexagon::FeatureDuplex));
171 setFeatureBits(Hexagon_MC::completeHVXFeatures(FeatureBits));
172
173 return *this;
174}
175
176bool HexagonSubtarget::isHVXElementType(MVT Ty, bool IncludeBool) const {
177 if (!useHVXOps())
178 return false;
179 if (Ty.isVector())
180 Ty = Ty.getVectorElementType();
181 if (IncludeBool && Ty == MVT::i1)
182 return true;
183 ArrayRef<MVT> ElemTypes = getHVXElementTypes();
184 return llvm::is_contained(ElemTypes, Ty);
185}
186
187bool HexagonSubtarget::isHVXVectorType(EVT VecTy, bool IncludeBool) const {
188 if (!VecTy.isSimple())
189 return false;
190 if (!VecTy.isVector() || !useHVXOps() || VecTy.isScalableVector())
191 return false;
192 MVT ElemTy = VecTy.getSimpleVT().getVectorElementType();
193 if (!IncludeBool && ElemTy == MVT::i1)
194 return false;
195
196 unsigned HwLen = getVectorLength();
197 unsigned NumElems = VecTy.getVectorNumElements();
198 ArrayRef<MVT> ElemTypes = getHVXElementTypes();
199
200 if (IncludeBool && ElemTy == MVT::i1) {
201 // Boolean HVX vector types are formed from regular HVX vector types
202 // by replacing the element type with i1.
203 for (MVT T : ElemTypes)
204 if (NumElems * T.getSizeInBits() == 8 * HwLen)
205 return true;
206 return false;
207 }
208
209 unsigned VecWidth = VecTy.getSizeInBits();
210 if (VecWidth != 8 * HwLen && VecWidth != 16 * HwLen)
211 return false;
212 return llvm::is_contained(ElemTypes, ElemTy);
213}
214
215bool HexagonSubtarget::isTypeForHVX(Type *VecTy, bool IncludeBool) const {
216 if (!VecTy->isVectorTy() || isa<ScalableVectorType>(VecTy))
217 return false;
218 // Avoid types like <2 x i32*>.
219 Type *ScalTy = VecTy->getScalarType();
220 if (!ScalTy->isIntegerTy() &&
221 !(ScalTy->isFloatingPointTy() && useHVXFloatingPoint()))
222 return false;
223 // The given type may be something like <17 x i32>, which is not MVT,
224 // but can be represented as (non-simple) EVT.
225 EVT Ty = EVT::getEVT(VecTy, /*HandleUnknown*/false);
226 if (!Ty.getVectorElementType().isSimple())
227 return false;
228
229 auto isHvxTy = [this, IncludeBool](MVT SimpleTy) {
230 if (isHVXVectorType(SimpleTy, IncludeBool))
231 return true;
232 auto Action = getTargetLowering()->getPreferredVectorAction(SimpleTy);
234 };
235
236 // Round up EVT to have power-of-2 elements, and keep checking if it
237 // qualifies for HVX, dividing it in half after each step.
238 MVT ElemTy = Ty.getVectorElementType().getSimpleVT();
239 unsigned VecLen = PowerOf2Ceil(Ty.getVectorNumElements());
240 while (VecLen > 1) {
241 MVT SimpleTy = MVT::getVectorVT(ElemTy, VecLen);
242 if (SimpleTy.isValid() && isHvxTy(SimpleTy))
243 return true;
244 VecLen /= 2;
245 }
246
247 return false;
248}
249
251 for (SUnit &SU : DAG->SUnits) {
252 if (!SU.isInstr())
253 continue;
255 for (auto &D : SU.Preds)
256 if (D.getKind() == SDep::Output && D.getReg() == Hexagon::USR_OVF)
257 Erase.push_back(D);
258 for (auto &E : Erase)
259 SU.removePred(E);
260 }
261}
262
264 for (SUnit &SU : DAG->SUnits) {
265 // Update the latency of chain edges between v60 vector load or store
266 // instructions to be 1. These instruction cannot be scheduled in the
267 // same packet.
268 MachineInstr &MI1 = *SU.getInstr();
269 auto *QII = static_cast<const HexagonInstrInfo*>(DAG->TII);
270 bool IsStoreMI1 = MI1.mayStore();
271 bool IsLoadMI1 = MI1.mayLoad();
272 if (!QII->isHVXVec(MI1) || !(IsStoreMI1 || IsLoadMI1))
273 continue;
274 for (SDep &SI : SU.Succs) {
275 if (SI.getKind() != SDep::Order || SI.getLatency() != 0)
276 continue;
277 MachineInstr &MI2 = *SI.getSUnit()->getInstr();
278 if (!QII->isHVXVec(MI2))
279 continue;
280 if ((IsStoreMI1 && MI2.mayStore()) || (IsLoadMI1 && MI2.mayLoad())) {
281 SI.setLatency(1);
282 SU.setHeightDirty();
283 // Change the dependence in the opposite direction too.
284 for (SDep &PI : SI.getSUnit()->Preds) {
285 if (PI.getSUnit() != &SU || PI.getKind() != SDep::Order)
286 continue;
287 PI.setLatency(1);
288 SI.getSUnit()->setDepthDirty();
289 }
290 }
291 }
292 }
293}
294
295// Check if a call and subsequent A2_tfrpi instructions should maintain
296// scheduling affinity. We are looking for the TFRI to be consumed in
297// the next instruction. This should help reduce the instances of
298// double register pairs being allocated and scheduled before a call
299// when not used until after the call. This situation is exacerbated
300// by the fact that we allocate the pair from the callee saves list,
301// leading to excess spills and restores.
302bool HexagonSubtarget::CallMutation::shouldTFRICallBind(
303 const HexagonInstrInfo &HII, const SUnit &Inst1,
304 const SUnit &Inst2) const {
305 if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi)
306 return false;
307
308 // TypeXTYPE are 64 bit operations.
309 unsigned Type = HII.getType(*Inst2.getInstr());
312}
313
315 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
316 SUnit* LastSequentialCall = nullptr;
317 // Map from virtual register to physical register from the copy.
318 DenseMap<unsigned, unsigned> VRegHoldingReg;
319 // Map from the physical register to the instruction that uses virtual
320 // register. This is used to create the barrier edge.
321 DenseMap<unsigned, SUnit *> LastVRegUse;
322 auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo();
323 auto &HII = *DAG->MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
324
325 // Currently we only catch the situation when compare gets scheduled
326 // before preceding call.
327 for (unsigned su = 0, e = DAG->SUnits.size(); su != e; ++su) {
328 // Remember the call.
329 if (DAG->SUnits[su].getInstr()->isCall())
330 LastSequentialCall = &DAG->SUnits[su];
331 // Look for a compare that defines a predicate.
332 else if (DAG->SUnits[su].getInstr()->isCompare() && LastSequentialCall)
333 DAG->addEdge(&DAG->SUnits[su], SDep(LastSequentialCall, SDep::Barrier));
334 // Look for call and tfri* instructions.
335 else if (SchedPredsCloser && LastSequentialCall && su > 1 && su < e-1 &&
336 shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1]))
337 DAG->addEdge(&DAG->SUnits[su], SDep(&DAG->SUnits[su-1], SDep::Barrier));
338 // Prevent redundant register copies due to reads and writes of physical
339 // registers. The original motivation for this was the code generated
340 // between two calls, which are caused both the return value and the
341 // argument for the next call being in %r0.
342 // Example:
343 // 1: <call1>
344 // 2: %vreg = COPY %r0
345 // 3: <use of %vreg>
346 // 4: %r0 = ...
347 // 5: <call2>
348 // The scheduler would often swap 3 and 4, so an additional register is
349 // needed. This code inserts a Barrier dependence between 3 & 4 to prevent
350 // this.
351 // The code below checks for all the physical registers, not just R0/D0/V0.
352 else if (SchedRetvalOptimization) {
353 const MachineInstr *MI = DAG->SUnits[su].getInstr();
354 if (MI->isCopy() && MI->getOperand(1).getReg().isPhysical()) {
355 // %vregX = COPY %r0
356 VRegHoldingReg[MI->getOperand(0).getReg()] = MI->getOperand(1).getReg();
357 LastVRegUse.erase(MI->getOperand(1).getReg());
358 } else {
359 for (const MachineOperand &MO : MI->operands()) {
360 if (!MO.isReg())
361 continue;
362 if (MO.isUse() && !MI->isCopy() &&
363 VRegHoldingReg.count(MO.getReg())) {
364 // <use of %vregX>
365 LastVRegUse[VRegHoldingReg[MO.getReg()]] = &DAG->SUnits[su];
366 } else if (MO.isDef() && MO.getReg().isPhysical()) {
367 for (MCRegAliasIterator AI(MO.getReg(), &TRI, true); AI.isValid();
368 ++AI) {
369 if (LastVRegUse.count(*AI) &&
370 LastVRegUse[*AI] != &DAG->SUnits[su])
371 // %r0 = ...
372 DAG->addEdge(&DAG->SUnits[su], SDep(LastVRegUse[*AI], SDep::Barrier));
373 LastVRegUse.erase(*AI);
374 }
375 }
376 }
377 }
378 }
379 }
380}
381
384 return;
385
386 const auto &HII = static_cast<const HexagonInstrInfo&>(*DAG->TII);
387
388 // Create artificial edges between loads that could likely cause a bank
389 // conflict. Since such loads would normally not have any dependency
390 // between them, we cannot rely on existing edges.
391 for (unsigned i = 0, e = DAG->SUnits.size(); i != e; ++i) {
392 SUnit &S0 = DAG->SUnits[i];
393 MachineInstr &L0 = *S0.getInstr();
394 if (!L0.mayLoad() || L0.mayStore() ||
396 continue;
397 int64_t Offset0;
398 unsigned Size0;
399 MachineOperand *BaseOp0 = HII.getBaseAndOffset(L0, Offset0, Size0);
400 // Is the access size is longer than the L1 cache line, skip the check.
401 if (BaseOp0 == nullptr || !BaseOp0->isReg() || Size0 >= 32)
402 continue;
403 // Scan only up to 32 instructions ahead (to avoid n^2 complexity).
404 for (unsigned j = i+1, m = std::min(i+32, e); j != m; ++j) {
405 SUnit &S1 = DAG->SUnits[j];
406 MachineInstr &L1 = *S1.getInstr();
407 if (!L1.mayLoad() || L1.mayStore() ||
409 continue;
410 int64_t Offset1;
411 unsigned Size1;
412 MachineOperand *BaseOp1 = HII.getBaseAndOffset(L1, Offset1, Size1);
413 if (BaseOp1 == nullptr || !BaseOp1->isReg() || Size1 >= 32 ||
414 BaseOp0->getReg() != BaseOp1->getReg())
415 continue;
416 // Check bits 3 and 4 of the offset: if they differ, a bank conflict
417 // is unlikely.
418 if (((Offset0 ^ Offset1) & 0x18) != 0)
419 continue;
420 // Bits 3 and 4 are the same, add an artificial edge and set extra
421 // latency.
422 SDep A(&S0, SDep::Artificial);
423 A.setLatency(1);
424 S1.addPred(A, true);
425 }
426 }
427}
428
429/// Enable use of alias analysis during code generation (during MI
430/// scheduling, DAGCombine, etc.).
433 return true;
434 return false;
435}
436
437/// Perform target specific adjustments to the latency of a schedule
438/// dependency.
440 SUnit *Dst, int DstOpIdx,
441 SDep &Dep) const {
442 if (!Src->isInstr() || !Dst->isInstr())
443 return;
444
445 MachineInstr *SrcInst = Src->getInstr();
446 MachineInstr *DstInst = Dst->getInstr();
447 const HexagonInstrInfo *QII = getInstrInfo();
448
449 // Instructions with .new operands have zero latency.
450 SmallSet<SUnit *, 4> ExclSrc;
451 SmallSet<SUnit *, 4> ExclDst;
452 if (QII->canExecuteInBundle(*SrcInst, *DstInst) &&
453 isBestZeroLatency(Src, Dst, QII, ExclSrc, ExclDst)) {
454 Dep.setLatency(0);
455 return;
456 }
457
458 // Set the latency for a copy to zero since we hope that is will get
459 // removed.
460 if (DstInst->isCopy())
461 Dep.setLatency(0);
462
463 // If it's a REG_SEQUENCE/COPY, use its destination instruction to determine
464 // the correct latency.
465 // If there are multiple uses of the def of COPY/REG_SEQUENCE, set the latency
466 // only if the latencies on all the uses are equal, otherwise set it to
467 // default.
468 if ((DstInst->isRegSequence() || DstInst->isCopy())) {
469 Register DReg = DstInst->getOperand(0).getReg();
470 std::optional<unsigned> DLatency;
471 for (const auto &DDep : Dst->Succs) {
472 MachineInstr *DDst = DDep.getSUnit()->getInstr();
473 int UseIdx = -1;
474 for (unsigned OpNum = 0; OpNum < DDst->getNumOperands(); OpNum++) {
475 const MachineOperand &MO = DDst->getOperand(OpNum);
476 if (MO.isReg() && MO.getReg() && MO.isUse() && MO.getReg() == DReg) {
477 UseIdx = OpNum;
478 break;
479 }
480 }
481
482 if (UseIdx == -1)
483 continue;
484
485 std::optional<unsigned> Latency =
486 InstrInfo.getOperandLatency(&InstrItins, *SrcInst, 0, *DDst, UseIdx);
487
488 // Set DLatency for the first time.
489 if (!DLatency)
490 DLatency = Latency;
491
492 // For multiple uses, if the Latency is different across uses, reset
493 // DLatency.
494 if (DLatency != Latency) {
495 DLatency = std::nullopt;
496 break;
497 }
498 }
499 Dep.setLatency(DLatency ? *DLatency : 0);
500 }
501
502 // Try to schedule uses near definitions to generate .cur.
503 ExclSrc.clear();
504 ExclDst.clear();
505 if (EnableDotCurSched && QII->isToBeScheduledASAP(*SrcInst, *DstInst) &&
506 isBestZeroLatency(Src, Dst, QII, ExclSrc, ExclDst)) {
507 Dep.setLatency(0);
508 return;
509 }
510 int Latency = Dep.getLatency();
511 bool IsArtificial = Dep.isArtificial();
512 Latency = updateLatency(*SrcInst, *DstInst, IsArtificial, Latency);
513 Dep.setLatency(Latency);
514}
515
517 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
518 Mutations.push_back(std::make_unique<UsrOverflowMutation>());
519 Mutations.push_back(std::make_unique<HVXMemLatencyMutation>());
520 Mutations.push_back(std::make_unique<BankConflictMutation>());
521}
522
524 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
525 Mutations.push_back(std::make_unique<UsrOverflowMutation>());
526 Mutations.push_back(std::make_unique<HVXMemLatencyMutation>());
527}
528
529// Pin the vtable to this file.
530void HexagonSubtarget::anchor() {}
531
533 if (DisableHexagonMISched.getNumOccurrences())
534 return !DisableHexagonMISched;
535 return true;
536}
537
540}
541
542int HexagonSubtarget::updateLatency(MachineInstr &SrcInst,
543 MachineInstr &DstInst, bool IsArtificial,
544 int Latency) const {
545 if (IsArtificial)
546 return 1;
547 if (!hasV60Ops())
548 return Latency;
549
550 auto &QII = static_cast<const HexagonInstrInfo &>(*getInstrInfo());
551 // BSB scheduling.
552 if (QII.isHVXVec(SrcInst) || useBSBScheduling())
553 Latency = (Latency + 1) >> 1;
554 return Latency;
555}
556
557void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const {
558 MachineInstr *SrcI = Src->getInstr();
559 for (auto &I : Src->Succs) {
560 if (!I.isAssignedRegDep() || I.getSUnit() != Dst)
561 continue;
562 Register DepR = I.getReg();
563 int DefIdx = -1;
564 for (unsigned OpNum = 0; OpNum < SrcI->getNumOperands(); OpNum++) {
565 const MachineOperand &MO = SrcI->getOperand(OpNum);
566 bool IsSameOrSubReg = false;
567 if (MO.isReg()) {
568 Register MOReg = MO.getReg();
569 if (DepR.isVirtual()) {
570 IsSameOrSubReg = (MOReg == DepR);
571 } else {
572 IsSameOrSubReg = getRegisterInfo()->isSubRegisterEq(DepR, MOReg);
573 }
574 if (MO.isDef() && IsSameOrSubReg)
575 DefIdx = OpNum;
576 }
577 }
578 assert(DefIdx >= 0 && "Def Reg not found in Src MI");
579 MachineInstr *DstI = Dst->getInstr();
580 SDep T = I;
581 for (unsigned OpNum = 0; OpNum < DstI->getNumOperands(); OpNum++) {
582 const MachineOperand &MO = DstI->getOperand(OpNum);
583 if (MO.isReg() && MO.isUse() && MO.getReg() == DepR) {
584 std::optional<unsigned> Latency = InstrInfo.getOperandLatency(
585 &InstrItins, *SrcI, DefIdx, *DstI, OpNum);
586
587 // For some instructions (ex: COPY), we might end up with < 0 latency
588 // as they don't have any Itinerary class associated with them.
589 if (!Latency)
590 Latency = 0;
591 bool IsArtificial = I.isArtificial();
592 Latency = updateLatency(*SrcI, *DstI, IsArtificial, *Latency);
593 I.setLatency(*Latency);
594 }
595 }
596
597 // Update the latency of opposite edge too.
598 T.setSUnit(Src);
599 auto F = find(Dst->Preds, T);
600 assert(F != Dst->Preds.end());
601 F->setLatency(I.getLatency());
602 }
603}
604
605/// Change the latency between the two SUnits.
606void HexagonSubtarget::changeLatency(SUnit *Src, SUnit *Dst, unsigned Lat)
607 const {
608 for (auto &I : Src->Succs) {
609 if (!I.isAssignedRegDep() || I.getSUnit() != Dst)
610 continue;
611 SDep T = I;
612 I.setLatency(Lat);
613
614 // Update the latency of opposite edge too.
615 T.setSUnit(Src);
616 auto F = find(Dst->Preds, T);
617 assert(F != Dst->Preds.end());
618 F->setLatency(Lat);
619 }
620}
621
622/// If the SUnit has a zero latency edge, return the other SUnit.
624 for (auto &I : Deps)
625 if (I.isAssignedRegDep() && I.getLatency() == 0 &&
626 !I.getSUnit()->getInstr()->isPseudo())
627 return I.getSUnit();
628 return nullptr;
629}
630
631// Return true if these are the best two instructions to schedule
632// together with a zero latency. Only one dependence should have a zero
633// latency. If there are multiple choices, choose the best, and change
634// the others, if needed.
635bool HexagonSubtarget::isBestZeroLatency(SUnit *Src, SUnit *Dst,
637 SmallSet<SUnit*, 4> &ExclDst) const {
638 MachineInstr &SrcInst = *Src->getInstr();
639 MachineInstr &DstInst = *Dst->getInstr();
640
641 // Ignore Boundary SU nodes as these have null instructions.
642 if (Dst->isBoundaryNode())
643 return false;
644
645 if (SrcInst.isPHI() || DstInst.isPHI())
646 return false;
647
648 if (!TII->isToBeScheduledASAP(SrcInst, DstInst) &&
649 !TII->canExecuteInBundle(SrcInst, DstInst))
650 return false;
651
652 // The architecture doesn't allow three dependent instructions in the same
653 // packet. So, if the destination has a zero latency successor, then it's
654 // not a candidate for a zero latency predecessor.
655 if (getZeroLatency(Dst, Dst->Succs) != nullptr)
656 return false;
657
658 // Check if the Dst instruction is the best candidate first.
659 SUnit *Best = nullptr;
660 SUnit *DstBest = nullptr;
661 SUnit *SrcBest = getZeroLatency(Dst, Dst->Preds);
662 if (SrcBest == nullptr || Src->NodeNum >= SrcBest->NodeNum) {
663 // Check that Src doesn't have a better candidate.
664 DstBest = getZeroLatency(Src, Src->Succs);
665 if (DstBest == nullptr || Dst->NodeNum <= DstBest->NodeNum)
666 Best = Dst;
667 }
668 if (Best != Dst)
669 return false;
670
671 // The caller frequently adds the same dependence twice. If so, then
672 // return true for this case too.
673 if ((Src == SrcBest && Dst == DstBest ) ||
674 (SrcBest == nullptr && Dst == DstBest) ||
675 (Src == SrcBest && Dst == nullptr))
676 return true;
677
678 // Reassign the latency for the previous bests, which requires setting
679 // the dependence edge in both directions.
680 if (SrcBest != nullptr) {
681 if (!hasV60Ops())
682 changeLatency(SrcBest, Dst, 1);
683 else
684 restoreLatency(SrcBest, Dst);
685 }
686 if (DstBest != nullptr) {
687 if (!hasV60Ops())
688 changeLatency(Src, DstBest, 1);
689 else
690 restoreLatency(Src, DstBest);
691 }
692
693 // Attempt to find another opprotunity for zero latency in a different
694 // dependence.
695 if (SrcBest && DstBest)
696 // If there is an edge from SrcBest to DstBst, then try to change that
697 // to 0 now.
698 changeLatency(SrcBest, DstBest, 0);
699 else if (DstBest) {
700 // Check if the previous best destination instruction has a new zero
701 // latency dependence opportunity.
702 ExclSrc.insert(Src);
703 for (auto &I : DstBest->Preds)
704 if (ExclSrc.count(I.getSUnit()) == 0 &&
705 isBestZeroLatency(I.getSUnit(), DstBest, TII, ExclSrc, ExclDst))
706 changeLatency(I.getSUnit(), DstBest, 0);
707 } else if (SrcBest) {
708 // Check if previous best source instruction has a new zero latency
709 // dependence opportunity.
710 ExclDst.insert(Dst);
711 for (auto &I : SrcBest->Succs)
712 if (ExclDst.count(I.getSUnit()) == 0 &&
713 isBestZeroLatency(SrcBest, I.getSUnit(), TII, ExclSrc, ExclDst))
714 changeLatency(SrcBest, I.getSUnit(), 0);
715 }
716
717 return true;
718}
719
721 return 32;
722}
723
725 return 32;
726}
727
730}
731
733 struct Scalar {
734 unsigned Opcode;
735 Intrinsic::ID IntId;
736 };
737 struct Hvx {
738 unsigned Opcode;
739 Intrinsic::ID Int64Id, Int128Id;
740 };
741
742 static Scalar ScalarInts[] = {
743#define GET_SCALAR_INTRINSICS
745#undef GET_SCALAR_INTRINSICS
746 };
747
748 static Hvx HvxInts[] = {
749#define GET_HVX_INTRINSICS
751#undef GET_HVX_INTRINSICS
752 };
753
754 const auto CmpOpcode = [](auto A, auto B) { return A.Opcode < B.Opcode; };
755 [[maybe_unused]] static bool SortedScalar =
756 (llvm::sort(ScalarInts, CmpOpcode), true);
757 [[maybe_unused]] static bool SortedHvx =
758 (llvm::sort(HvxInts, CmpOpcode), true);
759
760 auto [BS, ES] = std::make_pair(std::begin(ScalarInts), std::end(ScalarInts));
761 auto [BH, EH] = std::make_pair(std::begin(HvxInts), std::end(HvxInts));
762
763 auto FoundScalar = std::lower_bound(BS, ES, Scalar{Opc, 0}, CmpOpcode);
764 if (FoundScalar != ES && FoundScalar->Opcode == Opc)
765 return FoundScalar->IntId;
766
767 auto FoundHvx = std::lower_bound(BH, EH, Hvx{Opc, 0, 0}, CmpOpcode);
768 if (FoundHvx != EH && FoundHvx->Opcode == Opc) {
769 unsigned HwLen = getVectorLength();
770 if (HwLen == 64)
771 return FoundHvx->Int64Id;
772 if (HwLen == 128)
773 return FoundHvx->Int128Id;
774 }
775
776 std::string error = "Invalid opcode (" + std::to_string(Opc) + ")";
777 llvm_unreachable(error.c_str());
778 return 0;
779}
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
const HexagonInstrInfo * TII
static cl::opt< bool > DisableHexagonMISched("disable-hexagon-misched", cl::Hidden, cl::desc("Disable Hexagon MI Scheduling"))
static cl::opt< bool > EnableDotCurSched("enable-cur-sched", cl::Hidden, cl::init(true), cl::desc("Enable the scheduler to generate .cur"))
static cl::opt< bool > EnableCheckBankConflict("hexagon-check-bank-conflict", cl::Hidden, cl::init(true), cl::desc("Enable checking for cache bank conflicts"))
static cl::opt< bool > EnableSubregLiveness("hexagon-subreg-liveness", cl::Hidden, cl::init(true), cl::desc("Enable subregister liveness tracking for Hexagon"))
static cl::opt< bool > OverrideLongCalls("hexagon-long-calls", cl::Hidden, cl::desc("If present, forces/disables the use of long calls"))
static cl::opt< bool > SchedPredsCloser("sched-preds-closer", cl::Hidden, cl::init(true))
static cl::opt< bool > SchedRetvalOptimization("sched-retval-optimization", cl::Hidden, cl::init(true))
static cl::opt< bool > EnableTCLatencySched("enable-tc-latency-sched", cl::Hidden, cl::init(false))
static cl::opt< bool > EnableBSBSched("enable-bsb-sched", cl::Hidden, cl::init(true))
static SUnit * getZeroLatency(SUnit *N, SmallVector< SDep, 4 > &Deps)
If the SUnit has a zero latency edge, return the other SUnit.
static cl::opt< bool > EnablePredicatedCalls("hexagon-pred-calls", cl::Hidden, cl::desc("Consider calls to be predicable"))
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallSet class.
This file defines the SmallVector class.
#define error(X)
static constexpr uint32_t Opcode
Definition: aarch32.h:200
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
bool erase(const KeyT &Val)
Definition: DenseMap.h:329
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
Container class for subtarget features.
constexpr FeatureBitset & reset(unsigned I)
unsigned getAddrMode(const MachineInstr &MI) const
bool canExecuteInBundle(const MachineInstr &First, const MachineInstr &Second) const
Can these instructions execute at the same time in a bundle.
std::optional< unsigned > getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
getOperandLatency - Compute and return the use operand latency of a given pair of def and use.
MachineOperand * getBaseAndOffset(const MachineInstr &MI, int64_t &Offset, unsigned &AccessSize) const
bool isToBeScheduledASAP(const MachineInstr &MI1, const MachineInstr &MI2) const
uint64_t getType(const MachineInstr &MI) const
Hexagon::ArchEnum HexagonArchVersion
const HexagonInstrInfo * getInstrInfo() const override
const HexagonRegisterInfo * getRegisterInfo() const override
void getSMSMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM)
bool isHVXVectorType(EVT VecTy, bool IncludeBool=false) const
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
const HexagonTargetLowering * getTargetLowering() const override
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep) const override
Perform target specific adjustments to the latency of a schedule dependency.
bool UseBSBScheduling
True if the target should use Back-Skip-Back scheduling.
unsigned getL1PrefetchDistance() const
ArrayRef< MVT > getHVXElementTypes() const
bool useHVXFloatingPoint() const
bool enableSubRegLiveness() const override
CodeGenOptLevel OptLevel
unsigned getVectorLength() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
ParseSubtargetFeatures - Parses features string setting specified subtarget options.
unsigned getL1CacheLineSize() const
bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const
Intrinsic::ID getIntrinsicId(unsigned Opc) const
HexagonSubtarget & initializeSubtargetDependencies(StringRef CPU, StringRef FS)
bool enableMachineScheduler() const override
bool isHVXElementType(MVT Ty, bool IncludeBool=false) const
bool useAA() const override
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
const InstrItinerary * Itineraries
Array of itineraries selected.
MCRegAliasIterator enumerates all registers aliasing Reg.
Machine Value Type.
bool isVector() const
Return true if this is a vector value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isValid() const
Return true if this is a valid simple valuetype.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Representation of each machine instruction.
Definition: MachineInstr.h:68
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:543
bool isCopy() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:546
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool isRegSequence() const
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:553
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Scheduling dependency.
Definition: ScheduleDAG.h:49
@ Output
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:55
@ Order
Any other ordering dependency.
Definition: ScheduleDAG.h:56
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
@ Barrier
An unknown scheduling barrier.
Definition: ScheduleDAG.h:69
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
Definition: ScheduleDAG.h:362
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
A ScheduleDAG for scheduling lists of MachineInstr.
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:557
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:561
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:559
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
void clear()
Definition: SmallSet.h:218
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:503
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:613
bool startswith(StringRef Prefix) const
Definition: StringRef.h:261
Manages the enabling and disabling of subtarget specific features.
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
std::string getString() const
Returns features as a string.
void AddFeature(StringRef String, bool Enable=true)
Adds Features.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void addArchSubtarget(MCSubtargetInfo const *STI, StringRef FS)
FeatureBitset completeHVXFeatures(const FeatureBitset &FB)
std::optional< Hexagon::ArchEnum > getCpu(StringRef CPU)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:361
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1651
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1925
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1883
cl::opt< bool > HexagonDisableDuplex
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
#define N
Extended Value Type.
Definition: ValueTypes.h:34
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:129
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:351
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:624
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:299
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:160
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:166
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:311
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:319
void apply(ScheduleDAGInstrs *DAG) override
void apply(ScheduleDAGInstrs *DAG) override
void apply(ScheduleDAGInstrs *DAG) override
void apply(ScheduleDAGInstrs *DAG) override