20 using namespace clang;
21 using namespace CodeGen;
26 return Bld.CreateCall(
27 llvm::Intrinsic::getDeclaration(
28 &
CGM.
getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
35 return Bld.CreateCall(
36 llvm::Intrinsic::getDeclaration(
37 &
CGM.
getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
44 return Bld.CreateCall(
45 llvm::Intrinsic::getDeclaration(
46 &
CGM.
getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
53 Bld.CreateCall(llvm::Intrinsic::getDeclaration(
59 getNVPTXCTABarrier(CGF);
74 llvm::Value *Mask = Bld.CreateSub(getNVPTXWarpSize(CGF), Bld.getInt32(1));
76 return Bld.CreateAnd(Bld.CreateSub(NumThreads, Bld.getInt32(1)),
77 Bld.CreateNot(Mask),
"master_tid");
84 OMPRTL_NVPTX__kmpc_kernel_init,
89 ADDRESS_SPACE_SHARED = 3,
95 : WorkerFn(nullptr), CGFI(nullptr) {
96 createWorkerFunction(CGM);
99 void CGOpenMPRuntimeNVPTX::WorkerFunctionState::createWorkerFunction(
109 WorkerFn->addFnAttr(llvm::Attribute::NoInline);
112 void CGOpenMPRuntimeNVPTX::initializeEnvironment() {
118 ActiveWorkers =
new llvm::GlobalVariable(
120 llvm::GlobalValue::CommonLinkage,
121 llvm::Constant::getNullValue(CGM.
Int32Ty),
"__omp_num_threads", 0,
122 llvm::GlobalVariable::NotThreadLocal, ADDRESS_SPACE_SHARED);
123 ActiveWorkers->setAlignment(DL.getPrefTypeAlignment(CGM.
Int32Ty));
125 WorkID =
new llvm::GlobalVariable(
127 llvm::GlobalValue::CommonLinkage,
128 llvm::Constant::getNullValue(CGM.
Int64Ty),
"__tgt_work_id", 0,
129 llvm::GlobalVariable::NotThreadLocal, ADDRESS_SPACE_SHARED);
130 WorkID->setAlignment(DL.getPrefTypeAlignment(CGM.
Int64Ty));
133 void CGOpenMPRuntimeNVPTX::emitWorkerFunction(WorkerFunctionState &WST) {
138 emitWorkerLoop(CGF, WST);
143 WorkerFunctionState &WST) {
156 llvm::BasicBlock *SelectWorkersBB = CGF.
createBasicBlock(
".select.workers");
158 llvm::BasicBlock *TerminateBB = CGF.
createBasicBlock(
".terminate.parallel");
171 llvm::Constant::getNullValue(WorkID->getType()->getElementType()),
173 Bld.CreateCondBr(ShouldTerminate, ExitBB, SelectWorkersBB);
182 Bld.CreateCondBr(ActiveThread, ExecuteBB, BarrierBB);
214 llvm::BasicBlock *WorkerCheckBB = CGF.
createBasicBlock(
".check.for.worker");
222 Bld.CreateICmpUGT(ThreadID, MasterID,
"excess_in_master_warp");
223 Bld.CreateCondBr(ShouldDie, EST.
ExitBB, WorkerCheckBB);
227 llvm::Value *IsWorker = Bld.CreateICmpULT(ThreadID, MasterID,
"is_worker");
228 Bld.CreateCondBr(IsWorker, WorkerBB, MasterBB);
240 llvm::Value *Args[] = {Bld.getInt32(0), getNVPTXThreadID(CGF)};
241 CGF.
EmitRuntimeCall(createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init),
248 llvm::BasicBlock *TerminateBB = CGF.
createBasicBlock(
".termination.notifier");
254 llvm::Constant::getNullValue(WorkID->getType()->getElementType()), WorkID,
255 WorkID->getAlignment());
269 CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(
unsigned Function) {
270 llvm::Constant *RTLFn =
nullptr;
271 switch (static_cast<OpenMPRTLFunctionNVPTX>(Function)) {
272 case OMPRTL_NVPTX__kmpc_kernel_init: {
276 llvm::FunctionType *FnTy =
277 llvm::FunctionType::get(CGM.
VoidTy, TypeParams,
false);
285 void CGOpenMPRuntimeNVPTX::createOffloadEntry(llvm::Constant *
ID,
286 llvm::Constant *Addr,
288 auto *F = dyn_cast<llvm::Function>(Addr);
293 llvm::Module *M = F->getParent();
294 llvm::LLVMContext &Ctx = M->getContext();
297 llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata(
"nvvm.annotations");
299 llvm::Metadata *MDVals[] = {
300 llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx,
"kernel"),
301 llvm::ConstantAsMetadata::get(
302 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
304 MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
307 void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction(
309 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
314 assert(!ParentName.empty() &&
"Invalid target region parent name!");
316 EntryFunctionState EST;
317 WorkerFunctionState WST(CGM);
329 : RT(RT), EST(EST), WST(WST) {}
331 RT.emitEntryHeader(CGF, EST, WST);
333 void Exit(
CodeGenFunction &CGF)
override { RT.emitEntryFooter(CGF, EST); }
334 }
Action(*
this, EST, WST);
337 IsOffloadEntry, CodeGen);
340 emitWorkerFunction(WST);
344 WST.WorkerFn->setName(OutlinedFn->getName() +
"_worker");
350 llvm_unreachable(
"OpenMP NVPTX can only handle device code.");
353 initializeEnvironment();
357 const Expr *NumTeams,
358 const Expr *ThreadLimit,
365 llvm::Function *OutlinedFun =
nullptr;
366 if (isa<OMPTeamsDirective>(D)) {
369 D, ThreadIDVar, InnermostKind, CodeGen);
370 OutlinedFun = cast<llvm::Function>(OutlinedFunVal);
371 OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
373 llvm_unreachable(
"parallel directive is not yet supported for nvptx "
392 OutlinedFnArgs.push_back(ZeroAddr.
getPointer());
393 OutlinedFnArgs.push_back(ZeroAddr.
getPointer());
394 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
void emitEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST)
Signal termination of OMP execution.
CodeGenTypes & getTypes()
llvm::Module & getModule() const
llvm::AllocaInst * CreateTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateTempAlloca - This creates a alloca and inserts it into the entry block.
const llvm::DataLayout & getDataLayout() const
VarDecl - An instance of this class is created to represent a variable declaration or definition...
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::IntegerType * Int64Ty
llvm::CallInst * EmitRuntimeCall(llvm::Value *callee, const Twine &name="")
llvm::CallSite EmitCallOrInvoke(llvm::Value *Callee, ArrayRef< llvm::Value * > Args, const Twine &Name="")
Emits a call or invoke instruction to the given function, depending on the current state of the EH st...
void InitTempAlloca(Address Alloca, llvm::Value *Value)
InitTempAlloca - Provide an initial value for the given alloca which will be observable at all locati...
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
This function ought to emit, in the general case, a call to.
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
llvm::Value * getPointer() const
Expr - This represents one expression.
const CGFunctionInfo & arrangeNullaryFunction()
A nullary function is a freestanding function of type 'void ()'.
void SetInternalFunctionAttributes(const Decl *D, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
ASTContext & getContext() const
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
llvm::IntegerType * Int32Ty
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for 'target' directive.
GlobalDecl - represents a global declaration.
The l-value was considered opaque, so the alignment was determined from a type.
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
llvm::Constant * CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeSet ExtraAttrs=llvm::AttributeSet())
Create a new runtime function with the specified type and name.
Encodes a location in the source.
This is a basic class for representing single OpenMP executable directive.
static OMPLinearClause * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, OpenMPLinearClauseKind Modifier, SourceLocation ModifierLoc, SourceLocation ColonLoc, SourceLocation EndLoc, ArrayRef< Expr * > VL, ArrayRef< Expr * > PL, ArrayRef< Expr * > IL, Expr *Step, Expr *CalcStep, Stmt *PreInit, Expr *PostUpdate)
Creates clause with a list of variables VL and a linear step Step.
OpenMPDirectiveKind
OpenMP directives.
This file defines OpenMP nodes for declarative directives.
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
const LangOptions & getLangOpts() const
void setAction(PrePostActionTy &Action) const
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
This class organizes the cross-function state that is used while generating LLVM code.
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
llvm::LoadInst * CreateAlignedLoad(llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
llvm::Value * emitParallelOrTeamsOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits inlined function for the specified OpenMP parallel.
llvm::BasicBlock * ExitBB
This file defines OpenMP AST classes for executable directives and clauses.
virtual llvm::Value * emitParallelOrTeamsOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
WorkerFunctionState(CodeGenModule &CGM)
Internal linkage, which indicates that the entity can be referred to from within the translation unit...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block, taking care to avoid creation of branches from dummy blocks.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile=false)
llvm::Function * WorkerFn
void emitEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST, WorkerFunctionState &WST)
Helper for target entry function.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.