LLVM 22.0.0git
AMDGPUBarrierLatency.cpp
Go to the documentation of this file.
1//===--- AMDGPUBarrierLatency.cpp - AMDGPU Barrier Latency ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file contains a DAG scheduling mutation to add latency to
10/// barrier edges between ATOMIC_FENCE instructions and preceding
11/// memory accesses potentially affected by the fence.
12/// This encourages the scheduling of more instructions before
13/// ATOMIC_FENCE instructions. ATOMIC_FENCE instructions may
14/// introduce wait counting or indicate an impending S_BARRIER
15/// wait. Having more instructions in-flight across these
16/// constructs improves latency hiding.
17//
18//===----------------------------------------------------------------------===//
19
22#include "SIInstrInfo.h"
24
25using namespace llvm;
26
27namespace {
28
29class BarrierLatency : public ScheduleDAGMutation {
30public:
31 BarrierLatency() = default;
32 void apply(ScheduleDAGInstrs *DAG) override;
33};
34
35void BarrierLatency::apply(ScheduleDAGInstrs *DAG) {
36 constexpr unsigned SyntheticLatency = 2000;
37 for (SUnit &SU : DAG->SUnits) {
38 const MachineInstr *MI = SU.getInstr();
39 if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
40 continue;
41
42 // Update latency on barrier edges of ATOMIC_FENCE.
43 // We don't consider the scope of the fence or type of instruction
44 // involved in the barrier edge.
45 for (SDep &PredDep : SU.Preds) {
46 if (!PredDep.isBarrier())
47 continue;
48 SUnit *PredSU = PredDep.getSUnit();
49 MachineInstr *MI = PredSU->getInstr();
50 // Only consider memory loads
51 if (!MI->mayLoad() || MI->mayStore())
52 continue;
53 SDep ForwardD = PredDep;
54 ForwardD.setSUnit(&SU);
55 for (SDep &SuccDep : PredSU->Succs) {
56 if (SuccDep == ForwardD) {
57 SuccDep.setLatency(SuccDep.getLatency() + SyntheticLatency);
58 break;
59 }
60 }
61 PredDep.setLatency(PredDep.getLatency() + SyntheticLatency);
62 PredSU->setDepthDirty();
63 SU.setDepthDirty();
64 }
65 }
66}
67
68} // end namespace
69
70std::unique_ptr<ScheduleDAGMutation>
72 return std::make_unique<BarrierLatency>();
73}
Provides AMDGPU specific target descriptions.
IRTranslator LLVM IR MI
Interface definition for SIInstrInfo.
SUnit * getSUnit() const
void setLatency(unsigned Lat)
Sets the latency for this edge.
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
void setSUnit(SUnit *SU)
bool isBarrier() const
Tests if this is an Order dependence that is marked as a barrier.
SmallVector< SDep, 4 > Succs
All sunit successors.
LLVM_ABI void setDepthDirty()
Sets a flag in this node to indicate that its stored Depth value will require recomputation the next ...
SmallVector< SDep, 4 > Preds
All sunit predecessors.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
A ScheduleDAG for scheduling lists of MachineInstr.
Mutate the DAG as a postpass after normal DAG building.
std::vector< SUnit > SUnits
The scheduling units.
void apply(Opt *O, const Mod &M, const Mods &... Ms)
This is an optimization pass for GlobalISel generic memory operations.
std::unique_ptr< ScheduleDAGMutation > createAMDGPUBarrierLatencyDAGMutation()