LLVM  14.0.0git
Macros | Functions | Variables
OpenMPOpt.cpp File Reference
#include "llvm/Transforms/IPO/OpenMPOpt.h"
#include "llvm/ADT/EnumeratedArray.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/IR/Assumptions.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/Attributor.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
#include "llvm/Transforms/Utils/CodeExtractor.h"
#include "llvm/Frontend/OpenMP/OMPKinds.def"
Include dependency graph for OpenMPOpt.cpp:

Go to the source code of this file.

Macros

#define DEBUG_TYPE   "openmp-opt"
 
#define ICV_RT_SET(_Name, RTL)
 
#define ICV_RT_GET(Name, RTL)
 
#define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init)
 
#define OMP_TYPE(VarName, ...)
 
#define OMP_ARRAY_TYPE(VarName, ...)
 
#define OMP_FUNCTION_TYPE(VarName, ...)
 
#define OMP_STRUCT_TYPE(VarName, ...)
 
#define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...)
 

Functions

 STATISTIC (NumOpenMPRuntimeCallsDeduplicated, "Number of OpenMP runtime calls deduplicated")
 
 STATISTIC (NumOpenMPParallelRegionsDeleted, "Number of OpenMP parallel regions deleted")
 
 STATISTIC (NumOpenMPRuntimeFunctionsIdentified, "Number of OpenMP runtime functions identified")
 
 STATISTIC (NumOpenMPRuntimeFunctionUsesIdentified, "Number of OpenMP runtime function uses identified")
 
 STATISTIC (NumOpenMPTargetRegionKernels, "Number of OpenMP target region entry points (=kernels) identified")
 
 STATISTIC (NumOpenMPTargetRegionKernelsSPMD, "Number of OpenMP target region entry points (=kernels) executed in " "SPMD-mode instead of generic-mode")
 
 STATISTIC (NumOpenMPTargetRegionKernelsWithoutStateMachine, "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode without a state machines")
 
 STATISTIC (NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback, "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines with fallback")
 
 STATISTIC (NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback, "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines without fallback")
 
 STATISTIC (NumOpenMPParallelRegionsReplacedInGPUStateMachine, "Number of OpenMP parallel regions replaced with ID in GPU state machines")
 
 STATISTIC (NumOpenMPParallelRegionsMerged, "Number of OpenMP parallel regions merged")
 
 STATISTIC (NumBytesMovedToSharedMemory, "Amount of memory pushed to shared memory")
 
 INITIALIZE_PASS_BEGIN (OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc", "OpenMP specific optimizations", false, false) INITIALIZE_PASS_END(OpenMPOptCGSCCLegacyPass
 

Variables

static cl::opt< bool > DisableOpenMPOptimizations ("openmp-opt-disable", cl::ZeroOrMore, cl::desc("Disable OpenMP specific optimizations."), cl::Hidden, cl::init(false))
 
static cl::opt< bool > EnableParallelRegionMerging ("openmp-opt-enable-merging", cl::ZeroOrMore, cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden, cl::init(false))
 
static cl::opt< bool > DisableInternalization ("openmp-opt-disable-internalization", cl::ZeroOrMore, cl::desc("Disable function internalization."), cl::Hidden, cl::init(false))
 
static cl::opt< bool > PrintICVValues ("openmp-print-icv-values", cl::init(false), cl::Hidden)
 
static cl::opt< bool > PrintOpenMPKernels ("openmp-print-gpu-kernels", cl::init(false), cl::Hidden)
 
static cl::opt< bool > HideMemoryTransferLatency ("openmp-hide-memory-transfer-latency", cl::desc("[WIP] Tries to hide the latency of host to device memory" " transfers"), cl::Hidden, cl::init(false))
 
static cl::opt< bool > DisableOpenMPOptDeglobalization ("openmp-opt-disable-deglobalization", cl::ZeroOrMore, cl::desc("Disable OpenMP optimizations involving deglobalization."), cl::Hidden, cl::init(false))
 
static cl::opt< bool > DisableOpenMPOptSPMDization ("openmp-opt-disable-spmdization", cl::ZeroOrMore, cl::desc("Disable OpenMP optimizations involving SPMD-ization."), cl::Hidden, cl::init(false))
 
static cl::opt< bool > DisableOpenMPOptFolding ("openmp-opt-disable-folding", cl::ZeroOrMore, cl::desc("Disable OpenMP optimizations involving folding."), cl::Hidden, cl::init(false))
 
static cl::opt< bool > DisableOpenMPOptStateMachineRewrite ("openmp-opt-disable-state-machine-rewrite", cl::ZeroOrMore, cl::desc("Disable OpenMP optimizations that replace the state machine."), cl::Hidden, cl::init(false))
 
static cl::opt< bool > PrintModuleAfterOptimizations ("openmp-opt-print-module", cl::ZeroOrMore, cl::desc("Print the current module after OpenMP optimizations."), cl::Hidden, cl::init(false))
 
static cl::opt< bool > AlwaysInlineDeviceFunctions ("openmp-opt-inline-device", cl::ZeroOrMore, cl::desc("Inline all applicible functions on the device."), cl::Hidden, cl::init(false))
 
static cl::opt< bool > EnableVerboseRemarks ("openmp-opt-verbose-remarks", cl::ZeroOrMore, cl::desc("Enables more verbose remarks."), cl::Hidden, cl::init(false))
 
static cl::opt< unsigned > SetFixpointIterations ("openmp-opt-max-iterations", cl::Hidden, cl::desc("Maximal number of attributor iterations."), cl::init(256))
 
static constexpr auto TAG = "[" DEBUG_TYPE "]"
 
openmp opt cgscc
 
openmp opt OpenMP specific optimizations
 
openmp opt OpenMP specific false
 

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE   "openmp-opt"

Definition at line 49 of file OpenMPOpt.cpp.

◆ ICV_DATA_ENV

#define ICV_DATA_ENV (   Enum,
  _Name,
  _EnvVarName,
  Init 
)
Value:
{ \
auto &ICV = ICVs[Enum]; \
ICV.Name = _Name; \
ICV.Kind = Enum; \
ICV.InitKind = Init; \
ICV.EnvVarName = _EnvVarName; \
switch (ICV.InitKind) { \
case ICV_IMPLEMENTATION_DEFINED: \
ICV.InitValue = nullptr; \
break; \
case ICV_ZERO: \
ICV.InitValue = ConstantInt::get( \
Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0); \
break; \
case ICV_FALSE: \
ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext()); \
break; \
case ICV_LAST: \
break; \
} \
}

◆ ICV_RT_GET

#define ICV_RT_GET (   Name,
  RTL 
)
Value:
{ \
auto &ICV = ICVs[Name]; \
ICV.Getter = RTL; \
}

◆ ICV_RT_SET

#define ICV_RT_SET (   _Name,
  RTL 
)
Value:
{ \
auto &ICV = ICVs[_Name]; \
ICV.Setter = RTL; \
}

◆ OMP_ARRAY_TYPE

#define OMP_ARRAY_TYPE (   VarName,
  ... 
)
Value:
ArrayType *VarName##Ty = OMPBuilder.VarName##Ty; \
(void)VarName##Ty; \
PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy; \
(void)VarName##PtrTy;

◆ OMP_FUNCTION_TYPE

#define OMP_FUNCTION_TYPE (   VarName,
  ... 
)
Value:
FunctionType *VarName = OMPBuilder.VarName; \
(void)VarName; \
PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \
(void)VarName##Ptr;

◆ OMP_RTL

#define OMP_RTL (   _Enum,
  _Name,
  _IsVarArg,
  _ReturnType,
  ... 
)
Value:
{ \
SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__}); \
Function *F = M.getFunction(_Name); \
RTLFunctions.insert(F); \
if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \
RuntimeFunctionIDMap[F] = _Enum; \
F->removeFnAttr(Attribute::NoInline); \
auto &RFI = RFIs[_Enum]; \
RFI.Kind = _Enum; \
RFI.Name = _Name; \
RFI.IsVarArg = _IsVarArg; \
RFI.ReturnType = OMPBuilder._ReturnType; \
RFI.ArgumentTypes = std::move(ArgsTypes); \
RFI.Declaration = F; \
unsigned NumUses = collectUses(RFI); \
(void)NumUses; \
LLVM_DEBUG({ \
dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \
<< " found\n"; \
if (RFI.Declaration) \
dbgs() << TAG << "-> got " << NumUses << " uses in " \
<< RFI.getNumFunctionsWithUses() \
<< " different functions.\n"; \
}); \
} \
}

◆ OMP_STRUCT_TYPE

#define OMP_STRUCT_TYPE (   VarName,
  ... 
)
Value:
StructType *VarName = OMPBuilder.VarName; \
(void)VarName; \
PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \
(void)VarName##Ptr;

◆ OMP_TYPE

#define OMP_TYPE (   VarName,
  ... 
)
Value:
Type *VarName = OMPBuilder.VarName; \
(void)VarName;

Function Documentation

◆ INITIALIZE_PASS_BEGIN()

INITIALIZE_PASS_BEGIN ( OpenMPOptCGSCCLegacyPass  ,
"openmp-opt-cgscc ,
"OpenMP specific optimizations ,
false  ,
false   
)

◆ STATISTIC() [1/12]

STATISTIC ( NumBytesMovedToSharedMemory  ,
"Amount of memory pushed to shared memory"   
)

◆ STATISTIC() [2/12]

STATISTIC ( NumOpenMPParallelRegionsDeleted  ,
"Number of OpenMP parallel regions deleted"   
)

◆ STATISTIC() [3/12]

STATISTIC ( NumOpenMPParallelRegionsMerged  ,
"Number of OpenMP parallel regions merged"   
)

◆ STATISTIC() [4/12]

STATISTIC ( NumOpenMPParallelRegionsReplacedInGPUStateMachine  ,
"Number of OpenMP parallel regions replaced with ID in GPU state machines"   
)

◆ STATISTIC() [5/12]

STATISTIC ( NumOpenMPRuntimeCallsDeduplicated  ,
"Number of OpenMP runtime calls deduplicated"   
)

◆ STATISTIC() [6/12]

STATISTIC ( NumOpenMPRuntimeFunctionsIdentified  ,
"Number of OpenMP runtime functions identified"   
)

◆ STATISTIC() [7/12]

STATISTIC ( NumOpenMPRuntimeFunctionUsesIdentified  ,
"Number of OpenMP runtime function uses identified"   
)

◆ STATISTIC() [8/12]

STATISTIC ( NumOpenMPTargetRegionKernels  ,
"Number of OpenMP target region entry points (=kernels) identified"   
)

◆ STATISTIC() [9/12]

STATISTIC ( NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback  ,
"Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines with fallback"   
)

◆ STATISTIC() [10/12]

STATISTIC ( NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback  ,
"Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines without fallback"   
)

◆ STATISTIC() [11/12]

STATISTIC ( NumOpenMPTargetRegionKernelsSPMD  ,
"Number of OpenMP target region entry points (=kernels) executed in " "SPMD-mode instead of generic-mode  
)

◆ STATISTIC() [12/12]

STATISTIC ( NumOpenMPTargetRegionKernelsWithoutStateMachine  ,
"Number of OpenMP target region entry points (=kernels) executed in " "generic-mode without a state machines"   
)

Variable Documentation

◆ AlwaysInlineDeviceFunctions

cl::opt<bool> AlwaysInlineDeviceFunctions("openmp-opt-inline-device", cl::ZeroOrMore, cl::desc("Inline all applicible functions on the device."), cl::Hidden, cl::init(false))
static

◆ cgscc

openmp opt cgscc

Definition at line 4795 of file OpenMPOpt.cpp.

◆ DisableInternalization

cl::opt<bool> DisableInternalization("openmp-opt-disable-internalization", cl::ZeroOrMore, cl::desc("Disable function internalization."), cl::Hidden, cl::init(false))
static

◆ DisableOpenMPOptDeglobalization

cl::opt<bool> DisableOpenMPOptDeglobalization("openmp-opt-disable-deglobalization", cl::ZeroOrMore, cl::desc("Disable OpenMP optimizations involving deglobalization."), cl::Hidden, cl::init(false))
static

◆ DisableOpenMPOptFolding

cl::opt<bool> DisableOpenMPOptFolding("openmp-opt-disable-folding", cl::ZeroOrMore, cl::desc("Disable OpenMP optimizations involving folding."), cl::Hidden, cl::init(false))
static

◆ DisableOpenMPOptimizations

cl::opt<bool> DisableOpenMPOptimizations("openmp-opt-disable", cl::ZeroOrMore, cl::desc("Disable OpenMP specific optimizations."), cl::Hidden, cl::init(false))
static

◆ DisableOpenMPOptSPMDization

cl::opt<bool> DisableOpenMPOptSPMDization("openmp-opt-disable-spmdization", cl::ZeroOrMore, cl::desc("Disable OpenMP optimizations involving SPMD-ization."), cl::Hidden, cl::init(false))
static

◆ DisableOpenMPOptStateMachineRewrite

cl::opt<bool> DisableOpenMPOptStateMachineRewrite("openmp-opt-disable-state-machine-rewrite", cl::ZeroOrMore, cl::desc("Disable OpenMP optimizations that replace the state machine."), cl::Hidden, cl::init(false))
static

◆ EnableParallelRegionMerging

cl::opt<bool> EnableParallelRegionMerging("openmp-opt-enable-merging", cl::ZeroOrMore, cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden, cl::init(false))
static

◆ EnableVerboseRemarks

cl::opt<bool> EnableVerboseRemarks("openmp-opt-verbose-remarks", cl::ZeroOrMore, cl::desc("Enables more verbose remarks."), cl::Hidden, cl::init(false))
static

◆ false

openmp opt OpenMP specific false

Definition at line 4796 of file OpenMPOpt.cpp.

◆ HideMemoryTransferLatency

cl::opt<bool> HideMemoryTransferLatency("openmp-hide-memory-transfer-latency", cl::desc("[WIP] Tries to hide the latency of host to device memory" " transfers"), cl::Hidden, cl::init(false))
static

◆ optimizations

openmp opt OpenMP specific optimizations

Definition at line 4796 of file OpenMPOpt.cpp.

◆ PrintICVValues

cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false), cl::Hidden)
static

◆ PrintModuleAfterOptimizations

cl::opt<bool> PrintModuleAfterOptimizations("openmp-opt-print-module", cl::ZeroOrMore, cl::desc("Print the current module after OpenMP optimizations."), cl::Hidden, cl::init(false))
static

◆ PrintOpenMPKernels

cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels", cl::init(false), cl::Hidden)
static

◆ SetFixpointIterations

cl::opt<unsigned> SetFixpointIterations("openmp-opt-max-iterations", cl::Hidden, cl::desc("Maximal number of attributor iterations."), cl::init(256))
static

◆ TAG

constexpr auto TAG = "[" DEBUG_TYPE "]"
staticconstexpr
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
TAG
static constexpr auto TAG
Definition: OpenMPOpt.cpp:148
llvm::tgtok::VarName
@ VarName
Definition: TGLexer.h:71
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::ArrayType
Class to represent array types.
Definition: DerivedTypes.h:357
F
#define F(x, y, z)
Definition: MD5.cpp:56
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
getFalse
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
Definition: InstructionSimplify.cpp:122
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
llvm::pdb::PDB_SymType::Enum
@ Enum
llvm::Init
Definition: Record.h:271
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
get
Should compile to something r4 addze r3 instead we get
Definition: README.txt:24
llvm::FunctionType
Class to represent function types.
Definition: DerivedTypes.h:103