LLVM 23.0.0git
DXILShaderFlags.cpp
Go to the documentation of this file.
1//===- DXILShaderFlags.cpp - DXIL Shader Flags helper objects -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file This file contains helper objects and APIs for working with DXIL
10/// Shader Flags.
11///
12//===----------------------------------------------------------------------===//
13
14#include "DXILShaderFlags.h"
15#include "DirectX.h"
20#include "llvm/IR/Attributes.h"
22#include "llvm/IR/Instruction.h"
25#include "llvm/IR/Intrinsics.h"
26#include "llvm/IR/IntrinsicsDirectX.h"
27#include "llvm/IR/Module.h"
31
32using namespace llvm;
33using namespace llvm::dxil;
34
63
64static bool checkWaveOps(Intrinsic::ID IID) {
65 // Currently unsupported intrinsics
66 // case Intrinsic::dx_wave_getlanecount:
67 // case Intrinsic::dx_wave_readfirst:
68 // case Intrinsic::dx_wave_reduce.and:
69 // case Intrinsic::dx_wave_reduce.or:
70 // case Intrinsic::dx_wave_reduce.xor:
71 // case Intrinsic::dx_wave_prefixop:
72 // case Intrinsic::dx_quad.readat:
73 // case Intrinsic::dx_quad.readacrossy:
74 // case Intrinsic::dx_quad.readacrossdiagonal:
75 // case Intrinsic::dx_wave_prefixballot:
76 // case Intrinsic::dx_wave_match:
77 // case Intrinsic::dx_wavemulti.*:
78 // case Intrinsic::dx_wavemulti.ballot:
79 // case Intrinsic::dx_quad.vote:
80 switch (IID) {
81 default:
82 return false;
83 case Intrinsic::dx_wave_is_first_lane:
84 case Intrinsic::dx_wave_getlaneindex:
85 case Intrinsic::dx_wave_any:
86 case Intrinsic::dx_wave_all_equal:
87 case Intrinsic::dx_wave_all:
88 case Intrinsic::dx_wave_readlane:
89 case Intrinsic::dx_wave_active_countbits:
90 case Intrinsic::dx_wave_ballot:
91 case Intrinsic::dx_wave_prefix_bit_count:
92 // Wave Active Op Variants
93 case Intrinsic::dx_wave_reduce_or:
94 case Intrinsic::dx_wave_reduce_xor:
95 case Intrinsic::dx_wave_reduce_and:
96 case Intrinsic::dx_wave_reduce_sum:
97 case Intrinsic::dx_wave_reduce_usum:
98 case Intrinsic::dx_wave_product:
99 case Intrinsic::dx_wave_uproduct:
100 case Intrinsic::dx_wave_reduce_max:
101 case Intrinsic::dx_wave_reduce_umax:
102 case Intrinsic::dx_wave_reduce_min:
103 case Intrinsic::dx_wave_reduce_umin:
104 // Wave Prefix Op Variants
105 case Intrinsic::dx_wave_prefix_sum:
106 case Intrinsic::dx_wave_prefix_usum:
107 case Intrinsic::dx_wave_prefix_product:
108 case Intrinsic::dx_wave_prefix_uproduct:
109 // Quad Op Variants
110 case Intrinsic::dx_quad_read_across_x:
111 case Intrinsic::dx_quad_read_across_y:
112 case Intrinsic::dx_quad_read_across_diagonal:
113 return true;
114 }
115}
116
118 switch (IID) {
119 default:
120 return false;
121 case Intrinsic::fma:
122 return true;
123 }
124}
125
126static bool isOptimizationDisabled(const Module &M) {
127 const StringRef Key = "dx.disable_optimizations";
128 if (auto *Flag = mdconst::extract_or_null<ConstantInt>(M.getModuleFlag(Key)))
129 return Flag->getValue().getBoolValue();
130 return false;
131}
132
133// Checks to see if the status bit from a load with status
134// instruction is ever extracted. If it is, the module needs
135// to have the TiledResources shader flag set.
137 [[maybe_unused]] Intrinsic::ID IID = II.getIntrinsicID();
138 assert(IID == Intrinsic::dx_resource_load_typedbuffer ||
139 IID == Intrinsic::dx_resource_load_rawbuffer &&
140 "unexpected intrinsic ID");
141 for (const User *U : II.users()) {
142 if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U)) {
143 // Resource load operations return a {result, status} pair.
144 // Check if we extract the status
145 if (EVI->getNumIndices() == 1 && EVI->getIndices()[0] == 1)
146 return true;
147 }
148 }
149
150 return false;
151}
152
153/// Update the shader flags mask based on the given instruction.
154/// \param CSF Shader flags mask to update.
155/// \param I Instruction to check.
156void ModuleShaderFlags::updateFunctionFlags(ComputedShaderFlags &CSF,
157 const Instruction &I,
159 const ModuleMetadataInfo &MMDI) {
160 if (!CSF.Doubles)
161 CSF.Doubles = I.getType()->getScalarType()->isDoubleTy();
162
163 if (!CSF.Doubles) {
164 for (const Value *Op : I.operands()) {
165 if (Op->getType()->getScalarType()->isDoubleTy()) {
166 CSF.Doubles = true;
167 break;
168 }
169 }
170 }
171
172 if (CSF.Doubles) {
173 switch (I.getOpcode()) {
174 case Instruction::FDiv:
175 case Instruction::UIToFP:
176 case Instruction::SIToFP:
177 case Instruction::FPToUI:
178 case Instruction::FPToSI:
179 CSF.DX11_1_DoubleExtensions = true;
180 break;
181 }
182 }
183
184 if (!CSF.LowPrecisionPresent)
185 CSF.LowPrecisionPresent = I.getType()->getScalarType()->isIntegerTy(16) ||
186 I.getType()->getScalarType()->isHalfTy();
187
188 if (!CSF.LowPrecisionPresent) {
189 for (const Value *Op : I.operands()) {
190 if (Op->getType()->getScalarType()->isIntegerTy(16) ||
191 Op->getType()->getScalarType()->isHalfTy()) {
192 CSF.LowPrecisionPresent = true;
193 break;
194 }
195 }
196 }
197
198 if (CSF.LowPrecisionPresent) {
199 if (CSF.NativeLowPrecisionMode)
200 CSF.NativeLowPrecision = true;
201 else
202 CSF.MinimumPrecision = true;
203 }
204
205 if (!CSF.Int64Ops)
206 CSF.Int64Ops = I.getType()->getScalarType()->isIntegerTy(64);
207
208 if (!CSF.Int64Ops && !isa<LifetimeIntrinsic>(&I)) {
209 for (const Value *Op : I.operands()) {
210 if (Op->getType()->getScalarType()->isIntegerTy(64)) {
211 CSF.Int64Ops = true;
212 break;
213 }
214 }
215 }
216
217 if (const auto *II = dyn_cast<IntrinsicInst>(&I)) {
218 switch (II->getIntrinsicID()) {
219 default:
220 break;
221 case Intrinsic::dx_resource_handlefrombinding: {
222 dxil::ResourceTypeInfo &RTI = DRTM[cast<TargetExtType>(II->getType())];
223
224 // Set ResMayNotAlias if DXIL validator version >= 1.8 and the function
225 // uses UAVs
226 if (!CSF.ResMayNotAlias && CanSetResMayNotAlias &&
227 MMDI.ValidatorVersion >= VersionTuple(1, 8) && RTI.isUAV())
228 CSF.ResMayNotAlias = true;
229
230 switch (RTI.getResourceKind()) {
233 CSF.EnableRawAndStructuredBuffers = true;
234 break;
235 default:
236 break;
237 }
238 break;
239 }
240 case Intrinsic::dx_resource_load_typedbuffer: {
241 dxil::ResourceTypeInfo &RTI =
242 DRTM[cast<TargetExtType>(II->getArgOperand(0)->getType())];
243 if (RTI.isTyped() && RTI.isUAV())
244 CSF.TypedUAVLoadAdditionalFormats |= RTI.getTyped().ElementCount > 1;
245 if (!CSF.TiledResources && checkIfStatusIsExtracted(*II))
246 CSF.TiledResources = true;
247 break;
248 }
249 case Intrinsic::dx_resource_load_rawbuffer: {
250 if (!CSF.TiledResources && checkIfStatusIsExtracted(*II))
251 CSF.TiledResources = true;
252 break;
253 }
254 }
255 }
256 // Handle call instructions
257 if (auto *CI = dyn_cast<CallInst>(&I)) {
258 const Function *CF = CI->getCalledFunction();
259 // Merge-in shader flags mask of the called function in the current module
260 if (FunctionFlags.contains(CF))
261 CSF.merge(FunctionFlags[CF]);
262
263 CSF.DX11_1_DoubleExtensions |=
264 checkDoubleExtensionOps(CI->getIntrinsicID());
265 CSF.WaveOps |= checkWaveOps(CI->getIntrinsicID());
266 }
267}
268
269/// Set shader flags that apply to all functions within the module
271ModuleShaderFlags::gatherGlobalModuleFlags(const Module &M,
272 const DXILResourceMap &DRM,
273 const ModuleMetadataInfo &MMDI) {
274
275 ComputedShaderFlags CSF;
276
277 CSF.DisableOptimizations = isOptimizationDisabled(M);
278
279 CSF.UAVsAtEveryStage = hasUAVsAtEveryStage(DRM, MMDI);
280
281 // Set the Max64UAVs flag if the number of UAVs is > 8
282 uint32_t NumUAVs = 0;
283 for (auto &UAV : DRM.uavs())
284 if (MMDI.ValidatorVersion < VersionTuple(1, 6)) {
285 NumUAVs++;
286 } else { // MMDI.ValidatorVersion >= VersionTuple(1, 6)
287 uint32_t Size = UAV.getBinding().Size;
288 uint32_t NewNum = NumUAVs + (Size == 0 ? ~0U : Size);
289 if (NewNum < NumUAVs)
290 NewNum = ~0U;
291 NumUAVs = NewNum;
292 }
293 if (NumUAVs > 8)
294 CSF.Max64UAVs = true;
295
296 // Set the module flag that enables native low-precision execution mode.
297 // NativeLowPrecisionMode can only be set when the command line option
298 // -enable-16bit-types is provided. This is indicated by the dx.nativelowprec
299 // module flag being set
300 // This flag is needed even if the module does not use 16-bit types because a
301 // corresponding debug module may include 16-bit types, and tools that use the
302 // debug module may expect it to have the same flags as the original
303 if (auto *NativeLowPrec = mdconst::extract_or_null<ConstantInt>(
304 M.getModuleFlag("dx.nativelowprec")))
305 if (MMDI.ShaderModelVersion >= VersionTuple(6, 2))
306 CSF.NativeLowPrecisionMode = NativeLowPrec->getValue().getBoolValue();
307
308 // Set ResMayNotAlias to true if DXIL validator version < 1.8 and there
309 // are UAVs present globally.
310 if (CanSetResMayNotAlias && MMDI.ValidatorVersion < VersionTuple(1, 8))
311 CSF.ResMayNotAlias = !DRM.uavs().empty();
312
313 // The command line option -all-resources-bound will set the
314 // dx.allresourcesbound module flag to 1
315 if (auto *AllResourcesBound = mdconst::extract_or_null<ConstantInt>(
316 M.getModuleFlag("dx.allresourcesbound")))
317 if (AllResourcesBound->getValue().getBoolValue())
318 CSF.AllResourcesBound = true;
319
320 return CSF;
321}
322
323/// Construct ModuleShaderFlags for module Module M
325 const DXILResourceMap &DRM,
326 const ModuleMetadataInfo &MMDI) {
327
328 CanSetResMayNotAlias = MMDI.DXILVersion >= VersionTuple(1, 7);
329 // The command line option -res-may-alias will set the dx.resmayalias module
330 // flag to 1, thereby disabling the ability to set the ResMayNotAlias flag
331 if (auto *ResMayAlias = mdconst::extract_or_null<ConstantInt>(
332 M.getModuleFlag("dx.resmayalias")))
333 if (ResMayAlias->getValue().getBoolValue())
334 CanSetResMayNotAlias = false;
335
336 ComputedShaderFlags GlobalSFMask = gatherGlobalModuleFlags(M, DRM, MMDI);
337
338 CallGraph CG(M);
339
340 // Compute Shader Flags Mask for all functions using post-order visit of SCC
341 // of the call graph.
342 for (scc_iterator<CallGraph *> SCCI = scc_begin(&CG); !SCCI.isAtEnd();
343 ++SCCI) {
344 const std::vector<CallGraphNode *> &CurSCC = *SCCI;
345
346 // Union of shader masks of all functions in CurSCC
348 // List of functions in CurSCC that are neither external nor declarations
349 // and hence whose flags are collected
350 SmallVector<Function *> CurSCCFuncs;
351 for (CallGraphNode *CGN : CurSCC) {
352 Function *F = CGN->getFunction();
353 if (!F)
354 continue;
355
356 if (F->isDeclaration()) {
357 assert(!F->getName().starts_with("dx.op.") &&
358 "DXIL Shader Flag analysis should not be run post-lowering.");
359 continue;
360 }
361
362 ComputedShaderFlags CSF = GlobalSFMask;
363 for (const auto &BB : *F)
364 for (const auto &I : BB)
365 updateFunctionFlags(CSF, I, DRTM, MMDI);
366 // Update combined shader flags mask for all functions in this SCC
367 SCCSF.merge(CSF);
368
369 CurSCCFuncs.push_back(F);
370 }
371
372 // Update combined shader flags mask for all functions of the module
373 CombinedSFMask.merge(SCCSF);
374
375 // Shader flags mask of each of the functions in an SCC of the call graph is
376 // the union of all functions in the SCC. Update shader flags masks of
377 // functions in CurSCC accordingly. This is trivially true if SCC contains
378 // one function.
379 for (Function *F : CurSCCFuncs)
380 // Merge SCCSF with that of F
381 FunctionFlags[F].merge(SCCSF);
382 }
383}
384
386 uint64_t FlagVal = (uint64_t) * this;
387 OS << formatv("; Shader Flags Value: {0:x8}\n;\n", FlagVal);
388 if (FlagVal == 0)
389 return;
390 OS << "; Note: shader requires additional functionality:\n";
391#define SHADER_FEATURE_FLAG(FeatureBit, DxilModuleNum, FlagName, Str) \
392 if (FlagName) \
393 (OS << ";").indent(7) << Str << "\n";
394#include "llvm/BinaryFormat/DXContainerConstants.def"
395 OS << "; Note: extra DXIL module flags:\n";
396#define DXIL_MODULE_FLAG(DxilModuleBit, FlagName, Str) \
397 if (FlagName) \
398 (OS << ";").indent(7) << Str << "\n";
399#include "llvm/BinaryFormat/DXContainerConstants.def"
400 OS << ";\n";
401}
402
403/// Return the shader flags mask of the specified function Func.
406 auto Iter = FunctionFlags.find(Func);
407 assert((Iter != FunctionFlags.end() && Iter->first == Func) &&
408 "Get Shader Flags : No Shader Flags Mask exists for function");
409 return Iter->second;
410}
411
412//===----------------------------------------------------------------------===//
413// ShaderFlagsAnalysis and ShaderFlagsAnalysisPrinterPass
414
415// Provide an explicit template instantiation for the static ID.
416AnalysisKey ShaderFlagsAnalysis::Key;
417
423
425 MSFI.initialize(M, DRTM, DRM, MMDI);
426
427 return MSFI;
428}
429
432 const ModuleShaderFlags &FlagsInfo = AM.getResult<ShaderFlagsAnalysis>(M);
433 // Print description of combined shader flags for all module functions
434 OS << "; Combined Shader Flags for Module\n";
435 FlagsInfo.getCombinedFlags().print(OS);
436 // Print shader flags mask for each of the module functions
437 OS << "; Shader Flags for Module Functions\n";
438 for (const auto &F : M.getFunctionList()) {
439 if (F.isDeclaration())
440 continue;
441 const ComputedShaderFlags &SFMask = FlagsInfo.getFunctionFlags(&F);
442 OS << formatv("; Function {0} : {1:x8}\n;\n", F.getName(),
443 (uint64_t)(SFMask));
444 }
445
446 return PreservedAnalyses::all();
447}
448
449//===----------------------------------------------------------------------===//
450// ShaderFlagsAnalysis and ShaderFlagsAnalysisPrinterPass
451
453 DXILResourceTypeMap &DRTM =
454 getAnalysis<DXILResourceTypeWrapperPass>().getResourceTypeMap();
455 DXILResourceMap &DRM =
456 getAnalysis<DXILResourceWrapperPass>().getResourceMap();
457 const ModuleMetadataInfo MMDI =
459
460 MSFI.initialize(M, DRTM, DRM, MMDI);
461 return false;
462}
463
470
472
474 "DXIL Shader Flag Analysis", true, true)
478 "DXIL Shader Flag Analysis", true, true)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file contains the simple types necessary to represent the attributes associated with functions a...
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
bool checkIfStatusIsExtracted(const IntrinsicInst &II)
static bool isOptimizationDisabled(const Module &M)
static bool hasUAVsAtEveryStage(const DXILResourceMap &DRM, const ModuleMetadataInfo &MMDI)
static bool checkDoubleExtensionOps(Intrinsic::ID IID)
static bool checkWaveOps(Intrinsic::ID IID)
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
This file defines the SmallVector class.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
AnalysisUsage & addRequiredTransitive()
A node in the call graph for a module.
Definition CallGraph.h:162
The basic data container for the call graph of a Module of IR.
Definition CallGraph.h:72
iterator_range< iterator > uavs()
This instruction extracts a struct member or array element value from an aggregate value.
A wrapper class for inspecting calls to intrinsic functions.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
@ RayGeneration
Definition Triple.h:315
@ Amplification
Definition Triple.h:322
Represents a version number in the form major[.minor[.subminor[.build]]].
LLVM_ABI bool isUAV() const
LLVM_ABI bool isTyped() const
LLVM_ABI TypedInfo getTyped() const
dxil::ResourceKind getResourceKind() const
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Wrapper pass for the legacy pass manager.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool runOnModule(Module &M) override
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
ModuleShaderFlags run(Module &M, ModuleAnalysisManager &AM)
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
Definition SCCIterator.h:48
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract_or_null(Y &&MD)
Extract a Value from Metadata, allowing null.
Definition Metadata.h:683
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29
void merge(const ComputedShaderFlags CSF)
void print(raw_ostream &OS=dbgs()) const
Triple::EnvironmentType ShaderProfile
const ComputedShaderFlags & getFunctionFlags(const Function *) const
Return the shader flags mask of the specified function Func.
void initialize(Module &, DXILResourceTypeMap &DRTM, const DXILResourceMap &DRM, const ModuleMetadataInfo &MMDI)
Construct ModuleShaderFlags for module Module M.
const ComputedShaderFlags & getCombinedFlags() const