LLVM  14.0.0git
OMPIRBuilder.cpp
Go to the documentation of this file.
1 //===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file implements the OpenMPIRBuilder class, which is used as a
11 /// convenient way to create LLVM instructions for OpenMP directives.
12 ///
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/Triple.h"
23 #include "llvm/IR/CFG.h"
24 #include "llvm/IR/DebugInfo.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/MDBuilder.h"
27 #include "llvm/IR/PassManager.h"
28 #include "llvm/IR/Value.h"
30 #include "llvm/Support/Error.h"
39 
40 #include <sstream>
41 
42 #define DEBUG_TYPE "openmp-ir-builder"
43 
44 using namespace llvm;
45 using namespace omp;
46 
47 static cl::opt<bool>
48  OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden,
49  cl::desc("Use optimistic attributes describing "
50  "'as-if' properties of runtime calls."),
51  cl::init(false));
52 
54  "openmp-ir-builder-unroll-threshold-factor", cl::Hidden,
55  cl::desc("Factor for the unroll threshold to account for code "
56  "simplifications still taking place"),
57  cl::init(1.5));
58 
60  LLVMContext &Ctx = Fn.getContext();
61 
62  // Get the function's current attributes.
63  auto Attrs = Fn.getAttributes();
64  auto FnAttrs = Attrs.getFnAttrs();
65  auto RetAttrs = Attrs.getRetAttrs();
67  for (size_t ArgNo = 0; ArgNo < Fn.arg_size(); ++ArgNo)
68  ArgAttrs.emplace_back(Attrs.getParamAttrs(ArgNo));
69 
70 #define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
71 #include "llvm/Frontend/OpenMP/OMPKinds.def"
72 
73  // Add attributes to the function declaration.
74  switch (FnID) {
75 #define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
76  case Enum: \
77  FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
78  RetAttrs = RetAttrs.addAttributes(Ctx, RetAttrSet); \
79  for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
80  ArgAttrs[ArgNo] = \
81  ArgAttrs[ArgNo].addAttributes(Ctx, ArgAttrSets[ArgNo]); \
82  Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
83  break;
84 #include "llvm/Frontend/OpenMP/OMPKinds.def"
85  default:
86  // Attributes are optional.
87  break;
88  }
89 }
90 
93  FunctionType *FnTy = nullptr;
94  Function *Fn = nullptr;
95 
96  // Try to find the declation in the module first.
97  switch (FnID) {
98 #define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
99  case Enum: \
100  FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
101  IsVarArg); \
102  Fn = M.getFunction(Str); \
103  break;
104 #include "llvm/Frontend/OpenMP/OMPKinds.def"
105  }
106 
107  if (!Fn) {
108  // Create a new declaration if we need one.
109  switch (FnID) {
110 #define OMP_RTL(Enum, Str, ...) \
111  case Enum: \
112  Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
113  break;
114 #include "llvm/Frontend/OpenMP/OMPKinds.def"
115  }
116 
117  // Add information if the runtime function takes a callback function
118  if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
119  if (!Fn->hasMetadata(LLVMContext::MD_callback)) {
120  LLVMContext &Ctx = Fn->getContext();
121  MDBuilder MDB(Ctx);
122  // Annotate the callback behavior of the runtime function:
123  // - The callback callee is argument number 2 (microtask).
124  // - The first two arguments of the callback callee are unknown (-1).
125  // - All variadic arguments to the runtime function are passed to the
126  // callback callee.
127  Fn->addMetadata(
128  LLVMContext::MD_callback,
130  2, {-1, -1}, /* VarArgsArePassed */ true)}));
131  }
132  }
133 
134  LLVM_DEBUG(dbgs() << "Created OpenMP runtime function " << Fn->getName()
135  << " with type " << *Fn->getFunctionType() << "\n");
136  addAttributes(FnID, *Fn);
137 
138  } else {
139  LLVM_DEBUG(dbgs() << "Found OpenMP runtime function " << Fn->getName()
140  << " with type " << *Fn->getFunctionType() << "\n");
141  }
142 
143  assert(Fn && "Failed to create OpenMP runtime function");
144 
145  // Cast the function to the expected type if necessary
147  return {FnTy, C};
148 }
149 
151  FunctionCallee RTLFn = getOrCreateRuntimeFunction(M, FnID);
152  auto *Fn = dyn_cast<llvm::Function>(RTLFn.getCallee());
153  assert(Fn && "Failed to create OpenMP runtime function pointer");
154  return Fn;
155 }
156 
157 void OpenMPIRBuilder::initialize() { initializeTypes(M); }
158 
159 void OpenMPIRBuilder::finalize(Function *Fn, bool AllowExtractorSinking) {
160  SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
162  SmallVector<OutlineInfo, 16> DeferredOutlines;
163  for (OutlineInfo &OI : OutlineInfos) {
164  // Skip functions that have not finalized yet; may happen with nested
165  // function generation.
166  if (Fn && OI.getFunction() != Fn) {
167  DeferredOutlines.push_back(OI);
168  continue;
169  }
170 
171  ParallelRegionBlockSet.clear();
172  Blocks.clear();
173  OI.collectBlocks(ParallelRegionBlockSet, Blocks);
174 
175  Function *OuterFn = OI.getFunction();
176  CodeExtractorAnalysisCache CEAC(*OuterFn);
177  CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
178  /* AggregateArgs */ false,
179  /* BlockFrequencyInfo */ nullptr,
180  /* BranchProbabilityInfo */ nullptr,
181  /* AssumptionCache */ nullptr,
182  /* AllowVarArgs */ true,
183  /* AllowAlloca */ true,
184  /* Suffix */ ".omp_par");
185 
186  LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n");
187  LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName()
188  << " Exit: " << OI.ExitBB->getName() << "\n");
189  assert(Extractor.isEligible() &&
190  "Expected OpenMP outlining to be possible!");
191 
192  Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
193 
194  LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n");
195  LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n");
196  assert(OutlinedFn->getReturnType()->isVoidTy() &&
197  "OpenMP outlined functions should not return a value!");
198 
199  // For compability with the clang CG we move the outlined function after the
200  // one with the parallel region.
201  OutlinedFn->removeFromParent();
202  M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
203 
204  // Remove the artificial entry introduced by the extractor right away, we
205  // made our own entry block after all.
206  {
207  BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
208  assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB);
209  assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
210  if (AllowExtractorSinking) {
211  // Move instructions from the to-be-deleted ArtificialEntry to the entry
212  // basic block of the parallel region. CodeExtractor may have sunk
213  // allocas/bitcasts for values that are solely used in the outlined
214  // region and do not escape.
215  assert(!ArtificialEntry.empty() &&
216  "Expected instructions to sink in the outlined region");
217  for (BasicBlock::iterator It = ArtificialEntry.begin(),
218  End = ArtificialEntry.end();
219  It != End;) {
220  Instruction &I = *It;
221  It++;
222 
223  if (I.isTerminator())
224  continue;
225 
226  I.moveBefore(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
227  }
228  }
229  OI.EntryBB->moveBefore(&ArtificialEntry);
230  ArtificialEntry.eraseFromParent();
231  }
232  assert(&OutlinedFn->getEntryBlock() == OI.EntryBB);
233  assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
234 
235  // Run a user callback, e.g. to add attributes.
236  if (OI.PostOutlineCB)
237  OI.PostOutlineCB(*OutlinedFn);
238  }
239 
240  // Remove work items that have been completed.
241  OutlineInfos = std::move(DeferredOutlines);
242 }
243 
245  assert(OutlineInfos.empty() && "There must be no outstanding outlinings");
246 }
247 
249  IntegerType *I32Ty = Type::getInt32Ty(M.getContext());
250  auto *GV = new GlobalVariable(
251  M, I32Ty,
252  /* isConstant = */ true, GlobalValue::WeakODRLinkage,
253  ConstantInt::get(I32Ty, DebugKind), "__omp_rtl_debug_kind");
254 
255  return GV;
256 }
257 
259  IdentFlag LocFlags,
260  unsigned Reserve2Flags) {
261  // Enable "C-mode".
262  LocFlags |= OMP_IDENT_FLAG_KMPC;
263 
264  Value *&Ident =
265  IdentMap[{SrcLocStr, uint64_t(LocFlags) << 31 | Reserve2Flags}];
266  if (!Ident) {
267  Constant *I32Null = ConstantInt::getNullValue(Int32);
268  Constant *IdentData[] = {
269  I32Null, ConstantInt::get(Int32, uint32_t(LocFlags)),
270  ConstantInt::get(Int32, Reserve2Flags), I32Null, SrcLocStr};
271  Constant *Initializer =
272  ConstantStruct::get(OpenMPIRBuilder::Ident, IdentData);
273 
274  // Look for existing encoding of the location + flags, not needed but
275  // minimizes the difference to the existing solution while we transition.
276  for (GlobalVariable &GV : M.getGlobalList())
277  if (GV.getValueType() == OpenMPIRBuilder::Ident && GV.hasInitializer())
278  if (GV.getInitializer() == Initializer)
279  return Ident = &GV;
280 
281  auto *GV = new GlobalVariable(M, OpenMPIRBuilder::Ident,
282  /* isConstant = */ true,
283  GlobalValue::PrivateLinkage, Initializer);
284  GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
285  GV->setAlignment(Align(8));
286  Ident = GV;
287  }
288  return Builder.CreatePointerCast(Ident, IdentPtr);
289 }
290 
292  Constant *&SrcLocStr = SrcLocStrMap[LocStr];
293  if (!SrcLocStr) {
294  Constant *Initializer =
295  ConstantDataArray::getString(M.getContext(), LocStr);
296 
297  // Look for existing encoding of the location, not needed but minimizes the
298  // difference to the existing solution while we transition.
299  for (GlobalVariable &GV : M.getGlobalList())
300  if (GV.isConstant() && GV.hasInitializer() &&
301  GV.getInitializer() == Initializer)
302  return SrcLocStr = ConstantExpr::getPointerCast(&GV, Int8Ptr);
303 
304  SrcLocStr = Builder.CreateGlobalStringPtr(LocStr, /* Name */ "",
305  /* AddressSpace */ 0, &M);
306  }
307  return SrcLocStr;
308 }
309 
311  StringRef FileName,
312  unsigned Line,
313  unsigned Column) {
314  SmallString<128> Buffer;
315  Buffer.push_back(';');
316  Buffer.append(FileName);
317  Buffer.push_back(';');
318  Buffer.append(FunctionName);
319  Buffer.push_back(';');
320  Buffer.append(std::to_string(Line));
321  Buffer.push_back(';');
322  Buffer.append(std::to_string(Column));
323  Buffer.push_back(';');
324  Buffer.push_back(';');
325  return getOrCreateSrcLocStr(Buffer.str());
326 }
327 
329  return getOrCreateSrcLocStr(";unknown;unknown;0;0;;");
330 }
331 
333  DILocation *DIL = DL.get();
334  if (!DIL)
335  return getOrCreateDefaultSrcLocStr();
336  StringRef FileName = M.getName();
337  if (DIFile *DIF = DIL->getFile())
338  if (Optional<StringRef> Source = DIF->getSource())
339  FileName = *Source;
340  StringRef Function = DIL->getScope()->getSubprogram()->getName();
341  if (Function.empty() && F)
342  Function = F->getName();
343  return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(),
344  DIL->getColumn());
345 }
346 
347 Constant *
349  return getOrCreateSrcLocStr(Loc.DL, Loc.IP.getBlock()->getParent());
350 }
351 
353  return Builder.CreateCall(
354  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
355  "omp_global_thread_num");
356 }
357 
360  bool ForceSimpleCall, bool CheckCancelFlag) {
361  if (!updateToLocation(Loc))
362  return Loc.IP;
363  return emitBarrierImpl(Loc, DK, ForceSimpleCall, CheckCancelFlag);
364 }
365 
368  bool ForceSimpleCall, bool CheckCancelFlag) {
369  // Build call __kmpc_cancel_barrier(loc, thread_id) or
370  // __kmpc_barrier(loc, thread_id);
371 
372  IdentFlag BarrierLocFlags;
373  switch (Kind) {
374  case OMPD_for:
375  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
376  break;
377  case OMPD_sections:
378  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
379  break;
380  case OMPD_single:
381  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
382  break;
383  case OMPD_barrier:
384  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
385  break;
386  default:
387  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
388  break;
389  }
390 
391  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
392  Value *Args[] = {getOrCreateIdent(SrcLocStr, BarrierLocFlags),
393  getOrCreateThreadID(getOrCreateIdent(SrcLocStr))};
394 
395  // If we are in a cancellable parallel region, barriers are cancellation
396  // points.
397  // TODO: Check why we would force simple calls or to ignore the cancel flag.
398  bool UseCancelBarrier =
399  !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
400 
401  Value *Result =
402  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
403  UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
404  : OMPRTL___kmpc_barrier),
405  Args);
406 
407  if (UseCancelBarrier && CheckCancelFlag)
408  emitCancelationCheckImpl(Result, OMPD_parallel);
409 
410  return Builder.saveIP();
411 }
412 
415  Value *IfCondition,
416  omp::Directive CanceledDirective) {
417  if (!updateToLocation(Loc))
418  return Loc.IP;
419 
420  // LLVM utilities like blocks with terminators.
421  auto *UI = Builder.CreateUnreachable();
422 
423  Instruction *ThenTI = UI, *ElseTI = nullptr;
424  if (IfCondition)
425  SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
426  Builder.SetInsertPoint(ThenTI);
427 
428  Value *CancelKind = nullptr;
429  switch (CanceledDirective) {
430 #define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
431  case DirectiveEnum: \
432  CancelKind = Builder.getInt32(Value); \
433  break;
434 #include "llvm/Frontend/OpenMP/OMPKinds.def"
435  default:
436  llvm_unreachable("Unknown cancel kind!");
437  }
438 
439  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
440  Value *Ident = getOrCreateIdent(SrcLocStr);
441  Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
442  Value *Result = Builder.CreateCall(
443  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
444  auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) {
445  if (CanceledDirective == OMPD_parallel) {
447  Builder.restoreIP(IP);
448  createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
449  omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false,
450  /* CheckCancelFlag */ false);
451  }
452  };
453 
454  // The actual cancel logic is shared with others, e.g., cancel_barriers.
455  emitCancelationCheckImpl(Result, CanceledDirective, ExitCB);
456 
457  // Update the insertion point and remove the terminator we introduced.
458  Builder.SetInsertPoint(UI->getParent());
459  UI->eraseFromParent();
460 
461  return Builder.saveIP();
462 }
463 
465  omp::Directive CanceledDirective,
466  FinalizeCallbackTy ExitCB) {
467  assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
468  "Unexpected cancellation!");
469 
470  // For a cancel barrier we create two new blocks.
471  BasicBlock *BB = Builder.GetInsertBlock();
472  BasicBlock *NonCancellationBlock;
473  if (Builder.GetInsertPoint() == BB->end()) {
474  // TODO: This branch will not be needed once we moved to the
475  // OpenMPIRBuilder codegen completely.
476  NonCancellationBlock = BasicBlock::Create(
477  BB->getContext(), BB->getName() + ".cont", BB->getParent());
478  } else {
479  NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint());
480  BB->getTerminator()->eraseFromParent();
481  Builder.SetInsertPoint(BB);
482  }
483  BasicBlock *CancellationBlock = BasicBlock::Create(
484  BB->getContext(), BB->getName() + ".cncl", BB->getParent());
485 
486  // Jump to them based on the return value.
487  Value *Cmp = Builder.CreateIsNull(CancelFlag);
488  Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
489  /* TODO weight */ nullptr, nullptr);
490 
491  // From the cancellation block we finalize all variables and go to the
492  // post finalization block that is known to the FiniCB callback.
493  Builder.SetInsertPoint(CancellationBlock);
494  if (ExitCB)
495  ExitCB(Builder.saveIP());
496  auto &FI = FinalizationStack.back();
497  FI.FiniCB(Builder.saveIP());
498 
499  // The continuation block is where code generation continues.
500  Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
501 }
502 
504  const LocationDescription &Loc, InsertPointTy OuterAllocaIP,
505  BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
506  FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads,
507  omp::ProcBindKind ProcBind, bool IsCancellable) {
508  if (!updateToLocation(Loc))
509  return Loc.IP;
510 
511  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
512  Value *Ident = getOrCreateIdent(SrcLocStr);
513  Value *ThreadID = getOrCreateThreadID(Ident);
514 
515  if (NumThreads) {
516  // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads)
517  Value *Args[] = {
518  Ident, ThreadID,
519  Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)};
520  Builder.CreateCall(
521  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args);
522  }
523 
524  if (ProcBind != OMP_PROC_BIND_default) {
525  // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind)
526  Value *Args[] = {
527  Ident, ThreadID,
528  ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)};
529  Builder.CreateCall(
530  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args);
531  }
532 
533  BasicBlock *InsertBB = Builder.GetInsertBlock();
534  Function *OuterFn = InsertBB->getParent();
535 
536  // Save the outer alloca block because the insertion iterator may get
537  // invalidated and we still need this later.
538  BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
539 
540  // Vector to remember instructions we used only during the modeling but which
541  // we want to delete at the end.
542  SmallVector<Instruction *, 4> ToBeDeleted;
543 
544  // Change the location to the outer alloca insertion point to create and
545  // initialize the allocas we pass into the parallel region.
546  Builder.restoreIP(OuterAllocaIP);
547  AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
548  AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr");
549 
550  // If there is an if condition we actually use the TIDAddr and ZeroAddr in the
551  // program, otherwise we only need them for modeling purposes to get the
552  // associated arguments in the outlined function. In the former case,
553  // initialize the allocas properly, in the latter case, delete them later.
554  if (IfCondition) {
555  Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr);
556  Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr);
557  } else {
558  ToBeDeleted.push_back(TIDAddr);
559  ToBeDeleted.push_back(ZeroAddr);
560  }
561 
562  // Create an artificial insertion point that will also ensure the blocks we
563  // are about to split are not degenerated.
564  auto *UI = new UnreachableInst(Builder.getContext(), InsertBB);
565 
566  Instruction *ThenTI = UI, *ElseTI = nullptr;
567  if (IfCondition)
568  SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
569 
570  BasicBlock *ThenBB = ThenTI->getParent();
571  BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock(ThenTI, "omp.par.entry");
572  BasicBlock *PRegBodyBB =
573  PRegEntryBB->splitBasicBlock(ThenTI, "omp.par.region");
574  BasicBlock *PRegPreFiniBB =
575  PRegBodyBB->splitBasicBlock(ThenTI, "omp.par.pre_finalize");
576  BasicBlock *PRegExitBB =
577  PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit");
578 
579  auto FiniCBWrapper = [&](InsertPointTy IP) {
580  // Hide "open-ended" blocks from the given FiniCB by setting the right jump
581  // target to the region exit block.
582  if (IP.getBlock()->end() == IP.getPoint()) {
584  Builder.restoreIP(IP);
585  Instruction *I = Builder.CreateBr(PRegExitBB);
586  IP = InsertPointTy(I->getParent(), I->getIterator());
587  }
588  assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
589  IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
590  "Unexpected insertion point for finalization call!");
591  return FiniCB(IP);
592  };
593 
594  FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
595 
596  // Generate the privatization allocas in the block that will become the entry
597  // of the outlined function.
598  Builder.SetInsertPoint(PRegEntryBB->getTerminator());
599  InsertPointTy InnerAllocaIP = Builder.saveIP();
600 
601  AllocaInst *PrivTIDAddr =
602  Builder.CreateAlloca(Int32, nullptr, "tid.addr.local");
603  Instruction *PrivTID = Builder.CreateLoad(Int32, PrivTIDAddr, "tid");
604 
605  // Add some fake uses for OpenMP provided arguments.
606  ToBeDeleted.push_back(Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use"));
607  Instruction *ZeroAddrUse = Builder.CreateLoad(Int32, ZeroAddr,
608  "zero.addr.use");
609  ToBeDeleted.push_back(ZeroAddrUse);
610 
611  // ThenBB
612  // |
613  // V
614  // PRegionEntryBB <- Privatization allocas are placed here.
615  // |
616  // V
617  // PRegionBodyBB <- BodeGen is invoked here.
618  // |
619  // V
620  // PRegPreFiniBB <- The block we will start finalization from.
621  // |
622  // V
623  // PRegionExitBB <- A common exit to simplify block collection.
624  //
625 
626  LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n");
627 
628  // Let the caller create the body.
629  assert(BodyGenCB && "Expected body generation callback!");
630  InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
631  BodyGenCB(InnerAllocaIP, CodeGenIP, *PRegPreFiniBB);
632 
633  LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n");
634 
635  FunctionCallee RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
636  if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
637  if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
638  llvm::LLVMContext &Ctx = F->getContext();
639  MDBuilder MDB(Ctx);
640  // Annotate the callback behavior of the __kmpc_fork_call:
641  // - The callback callee is argument number 2 (microtask).
642  // - The first two arguments of the callback callee are unknown (-1).
643  // - All variadic arguments to the __kmpc_fork_call are passed to the
644  // callback callee.
645  F->addMetadata(
646  llvm::LLVMContext::MD_callback,
648  Ctx, {MDB.createCallbackEncoding(2, {-1, -1},
649  /* VarArgsArePassed */ true)}));
650  }
651  }
652 
653  OutlineInfo OI;
654  OI.PostOutlineCB = [=](Function &OutlinedFn) {
655  // Add some known attributes.
656  OutlinedFn.addParamAttr(0, Attribute::NoAlias);
657  OutlinedFn.addParamAttr(1, Attribute::NoAlias);
658  OutlinedFn.addFnAttr(Attribute::NoUnwind);
659  OutlinedFn.addFnAttr(Attribute::NoRecurse);
660 
661  assert(OutlinedFn.arg_size() >= 2 &&
662  "Expected at least tid and bounded tid as arguments");
663  unsigned NumCapturedVars =
664  OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
665 
666  CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
667  CI->getParent()->setName("omp_parallel");
668  Builder.SetInsertPoint(CI);
669 
670  // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
671  Value *ForkCallArgs[] = {
672  Ident, Builder.getInt32(NumCapturedVars),
673  Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)};
674 
675  SmallVector<Value *, 16> RealArgs;
676  RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
677  RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
678 
679  Builder.CreateCall(RTLFn, RealArgs);
680 
681  LLVM_DEBUG(dbgs() << "With fork_call placed: "
682  << *Builder.GetInsertBlock()->getParent() << "\n");
683 
684  InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
685 
686  // Initialize the local TID stack location with the argument value.
687  Builder.SetInsertPoint(PrivTID);
688  Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin();
689  Builder.CreateStore(Builder.CreateLoad(Int32, OutlinedAI), PrivTIDAddr);
690 
691  // If no "if" clause was present we do not need the call created during
692  // outlining, otherwise we reuse it in the serialized parallel region.
693  if (!ElseTI) {
694  CI->eraseFromParent();
695  } else {
696 
697  // If an "if" clause was present we are now generating the serialized
698  // version into the "else" branch.
699  Builder.SetInsertPoint(ElseTI);
700 
701  // Build calls __kmpc_serialized_parallel(&Ident, GTid);
702  Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
703  Builder.CreateCall(
704  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_serialized_parallel),
705  SerializedParallelCallArgs);
706 
707  // OutlinedFn(&GTid, &zero, CapturedStruct);
708  CI->removeFromParent();
709  Builder.Insert(CI);
710 
711  // __kmpc_end_serialized_parallel(&Ident, GTid);
712  Value *EndArgs[] = {Ident, ThreadID};
713  Builder.CreateCall(
714  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_serialized_parallel),
715  EndArgs);
716 
717  LLVM_DEBUG(dbgs() << "With serialized parallel region: "
718  << *Builder.GetInsertBlock()->getParent() << "\n");
719  }
720 
721  for (Instruction *I : ToBeDeleted)
722  I->eraseFromParent();
723  };
724 
725  // Adjust the finalization stack, verify the adjustment, and call the
726  // finalize function a last time to finalize values between the pre-fini
727  // block and the exit block if we left the parallel "the normal way".
728  auto FiniInfo = FinalizationStack.pop_back_val();
729  (void)FiniInfo;
730  assert(FiniInfo.DK == OMPD_parallel &&
731  "Unexpected finalization stack state!");
732 
733  Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator();
734 
735  InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator());
736  FiniCB(PreFiniIP);
737 
738  OI.EntryBB = PRegEntryBB;
739  OI.ExitBB = PRegExitBB;
740 
741  SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
743  OI.collectBlocks(ParallelRegionBlockSet, Blocks);
744 
745  // Ensure a single exit node for the outlined region by creating one.
746  // We might have multiple incoming edges to the exit now due to finalizations,
747  // e.g., cancel calls that cause the control flow to leave the region.
748  BasicBlock *PRegOutlinedExitBB = PRegExitBB;
749  PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt());
750  PRegOutlinedExitBB->setName("omp.par.outlined.exit");
751  Blocks.push_back(PRegOutlinedExitBB);
752 
753  CodeExtractorAnalysisCache CEAC(*OuterFn);
754  CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
755  /* AggregateArgs */ false,
756  /* BlockFrequencyInfo */ nullptr,
757  /* BranchProbabilityInfo */ nullptr,
758  /* AssumptionCache */ nullptr,
759  /* AllowVarArgs */ true,
760  /* AllowAlloca */ true,
761  /* Suffix */ ".omp_par");
762 
763  // Find inputs to, outputs from the code region.
764  BasicBlock *CommonExit = nullptr;
765  SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
766  Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
767  Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
768 
769  LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n");
770 
771  FunctionCallee TIDRTLFn =
772  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);
773 
774  auto PrivHelper = [&](Value &V) {
775  if (&V == TIDAddr || &V == ZeroAddr)
776  return;
777 
779  for (Use &U : V.uses())
780  if (auto *UserI = dyn_cast<Instruction>(U.getUser()))
781  if (ParallelRegionBlockSet.count(UserI->getParent()))
782  Uses.insert(&U);
783 
784  // __kmpc_fork_call expects extra arguments as pointers. If the input
785  // already has a pointer type, everything is fine. Otherwise, store the
786  // value onto stack and load it back inside the to-be-outlined region. This
787  // will ensure only the pointer will be passed to the function.
788  // FIXME: if there are more than 15 trailing arguments, they must be
789  // additionally packed in a struct.
790  Value *Inner = &V;
791  if (!V.getType()->isPointerTy()) {
793  LLVM_DEBUG(llvm::dbgs() << "Forwarding input as pointer: " << V << "\n");
794 
795  Builder.restoreIP(OuterAllocaIP);
796  Value *Ptr =
797  Builder.CreateAlloca(V.getType(), nullptr, V.getName() + ".reloaded");
798 
799  // Store to stack at end of the block that currently branches to the entry
800  // block of the to-be-outlined region.
801  Builder.SetInsertPoint(InsertBB,
802  InsertBB->getTerminator()->getIterator());
803  Builder.CreateStore(&V, Ptr);
804 
805  // Load back next to allocations in the to-be-outlined region.
806  Builder.restoreIP(InnerAllocaIP);
807  Inner = Builder.CreateLoad(V.getType(), Ptr);
808  }
809 
810  Value *ReplacementValue = nullptr;
811  CallInst *CI = dyn_cast<CallInst>(&V);
812  if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) {
813  ReplacementValue = PrivTID;
814  } else {
815  Builder.restoreIP(
816  PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue));
817  assert(ReplacementValue &&
818  "Expected copy/create callback to set replacement value!");
819  if (ReplacementValue == &V)
820  return;
821  }
822 
823  for (Use *UPtr : Uses)
824  UPtr->set(ReplacementValue);
825  };
826 
827  // Reset the inner alloca insertion as it will be used for loading the values
828  // wrapped into pointers before passing them into the to-be-outlined region.
829  // Configure it to insert immediately after the fake use of zero address so
830  // that they are available in the generated body and so that the
831  // OpenMP-related values (thread ID and zero address pointers) remain leading
832  // in the argument list.
833  InnerAllocaIP = IRBuilder<>::InsertPoint(
834  ZeroAddrUse->getParent(), ZeroAddrUse->getNextNode()->getIterator());
835 
836  // Reset the outer alloca insertion point to the entry of the relevant block
837  // in case it was invalidated.
838  OuterAllocaIP = IRBuilder<>::InsertPoint(
839  OuterAllocaBlock, OuterAllocaBlock->getFirstInsertionPt());
840 
841  for (Value *Input : Inputs) {
842  LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
843  PrivHelper(*Input);
844  }
845  LLVM_DEBUG({
846  for (Value *Output : Outputs)
847  LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n");
848  });
849  assert(Outputs.empty() &&
850  "OpenMP outlining should not produce live-out values!");
851 
852  LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n");
853  LLVM_DEBUG({
854  for (auto *BB : Blocks)
855  dbgs() << " PBR: " << BB->getName() << "\n";
856  });
857 
858  // Register the outlined info.
859  addOutlineInfo(std::move(OI));
860 
861  InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
862  UI->eraseFromParent();
863 
864  return AfterIP;
865 }
866 
868  // Build call void __kmpc_flush(ident_t *loc)
869  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
870  Value *Args[] = {getOrCreateIdent(SrcLocStr)};
871 
872  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args);
873 }
874 
876  if (!updateToLocation(Loc))
877  return;
878  emitFlush(Loc);
879 }
880 
882  // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
883  // global_tid);
884  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
885  Value *Ident = getOrCreateIdent(SrcLocStr);
886  Value *Args[] = {Ident, getOrCreateThreadID(Ident)};
887 
888  // Ignore return result until untied tasks are supported.
889  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait),
890  Args);
891 }
892 
894  if (!updateToLocation(Loc))
895  return;
896  emitTaskwaitImpl(Loc);
897 }
898 
900  // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
901  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
902  Value *Ident = getOrCreateIdent(SrcLocStr);
903  Constant *I32Null = ConstantInt::getNullValue(Int32);
904  Value *Args[] = {Ident, getOrCreateThreadID(Ident), I32Null};
905 
906  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield),
907  Args);
908 }
909 
911  if (!updateToLocation(Loc))
912  return;
913  emitTaskyieldImpl(Loc);
914 }
915 
917  const LocationDescription &Loc, InsertPointTy AllocaIP,
919  FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait) {
920  if (!updateToLocation(Loc))
921  return Loc.IP;
922 
923  auto FiniCBWrapper = [&](InsertPointTy IP) {
924  if (IP.getBlock()->end() != IP.getPoint())
925  return FiniCB(IP);
926  // This must be done otherwise any nested constructs using FinalizeOMPRegion
927  // will fail because that function requires the Finalization Basic Block to
928  // have a terminator, which is already removed by EmitOMPRegionBody.
929  // IP is currently at cancelation block.
930  // We need to backtrack to the condition block to fetch
931  // the exit block and create a branch from cancelation
932  // to exit block.
934  Builder.restoreIP(IP);
935  auto *CaseBB = IP.getBlock()->getSinglePredecessor();
936  auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
937  auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
938  Instruction *I = Builder.CreateBr(ExitBB);
939  IP = InsertPointTy(I->getParent(), I->getIterator());
940  return FiniCB(IP);
941  };
942 
943  FinalizationStack.push_back({FiniCBWrapper, OMPD_sections, IsCancellable});
944 
945  // Each section is emitted as a switch case
946  // Each finalization callback is handled from clang.EmitOMPSectionDirective()
947  // -> OMP.createSection() which generates the IR for each section
948  // Iterate through all sections and emit a switch construct:
949  // switch (IV) {
950  // case 0:
951  // <SectionStmt[0]>;
952  // break;
953  // ...
954  // case <NumSection> - 1:
955  // <SectionStmt[<NumSection> - 1]>;
956  // break;
957  // }
958  // ...
959  // section_loop.after:
960  // <FiniCB>;
961  auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, Value *IndVar) {
962  auto *CurFn = CodeGenIP.getBlock()->getParent();
963  auto *ForIncBB = CodeGenIP.getBlock()->getSingleSuccessor();
964  auto *ForExitBB = CodeGenIP.getBlock()
965  ->getSinglePredecessor()
966  ->getTerminator()
967  ->getSuccessor(1);
968  SwitchInst *SwitchStmt = Builder.CreateSwitch(IndVar, ForIncBB);
969  Builder.restoreIP(CodeGenIP);
970  unsigned CaseNumber = 0;
971  for (auto SectionCB : SectionCBs) {
972  auto *CaseBB = BasicBlock::Create(M.getContext(),
973  "omp_section_loop.body.case", CurFn);
974  SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB);
975  Builder.SetInsertPoint(CaseBB);
976  SectionCB(InsertPointTy(), Builder.saveIP(), *ForExitBB);
977  CaseNumber++;
978  }
979  // remove the existing terminator from body BB since there can be no
980  // terminators after switch/case
981  CodeGenIP.getBlock()->getTerminator()->eraseFromParent();
982  };
983  // Loop body ends here
984  // LowerBound, UpperBound, and STride for createCanonicalLoop
985  Type *I32Ty = Type::getInt32Ty(M.getContext());
986  Value *LB = ConstantInt::get(I32Ty, 0);
987  Value *UB = ConstantInt::get(I32Ty, SectionCBs.size());
988  Value *ST = ConstantInt::get(I32Ty, 1);
989  llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop(
990  Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop");
991  InsertPointTy AfterIP =
992  applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, true);
993  BasicBlock *LoopAfterBB = AfterIP.getBlock();
994  Instruction *SplitPos = LoopAfterBB->getTerminator();
995  if (!isa_and_nonnull<BranchInst>(SplitPos))
996  SplitPos = new UnreachableInst(Builder.getContext(), LoopAfterBB);
997  // ExitBB after LoopAfterBB because LoopAfterBB is used for FinalizationCB,
998  // which requires a BB with branch
999  BasicBlock *ExitBB =
1000  LoopAfterBB->splitBasicBlock(SplitPos, "omp_sections.end");
1001  SplitPos->eraseFromParent();
1002 
1003  // Apply the finalization callback in LoopAfterBB
1004  auto FiniInfo = FinalizationStack.pop_back_val();
1005  assert(FiniInfo.DK == OMPD_sections &&
1006  "Unexpected finalization stack state!");
1007  Builder.SetInsertPoint(LoopAfterBB->getTerminator());
1008  FiniInfo.FiniCB(Builder.saveIP());
1009  Builder.SetInsertPoint(ExitBB);
1010 
1011  return Builder.saveIP();
1012 }
1013 
1016  BodyGenCallbackTy BodyGenCB,
1017  FinalizeCallbackTy FiniCB) {
1018  if (!updateToLocation(Loc))
1019  return Loc.IP;
1020 
1021  auto FiniCBWrapper = [&](InsertPointTy IP) {
1022  if (IP.getBlock()->end() != IP.getPoint())
1023  return FiniCB(IP);
1024  // This must be done otherwise any nested constructs using FinalizeOMPRegion
1025  // will fail because that function requires the Finalization Basic Block to
1026  // have a terminator, which is already removed by EmitOMPRegionBody.
1027  // IP is currently at cancelation block.
1028  // We need to backtrack to the condition block to fetch
1029  // the exit block and create a branch from cancelation
1030  // to exit block.
1032  Builder.restoreIP(IP);
1033  auto *CaseBB = Loc.IP.getBlock();
1034  auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
1035  auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
1036  Instruction *I = Builder.CreateBr(ExitBB);
1037  IP = InsertPointTy(I->getParent(), I->getIterator());
1038  return FiniCB(IP);
1039  };
1040 
1041  Directive OMPD = Directive::OMPD_sections;
1042  // Since we are using Finalization Callback here, HasFinalize
1043  // and IsCancellable have to be true
1044  return EmitOMPInlinedRegion(OMPD, nullptr, nullptr, BodyGenCB, FiniCBWrapper,
1045  /*Conditional*/ false, /*hasFinalize*/ true,
1046  /*IsCancellable*/ true);
1047 }
1048 
1049 /// Create a function with a unique name and a "void (i8*, i8*)" signature in
1050 /// the given module and return it.
1052  Type *VoidTy = Type::getVoidTy(M.getContext());
1053  Type *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
1054  auto *FuncTy =
1055  FunctionType::get(VoidTy, {Int8PtrTy, Int8PtrTy}, /* IsVarArg */ false);
1057  M.getDataLayout().getDefaultGlobalsAddressSpace(),
1058  ".omp.reduction.func", &M);
1059 }
1060 
1062  const LocationDescription &Loc, InsertPointTy AllocaIP,
1063  ArrayRef<ReductionInfo> ReductionInfos, bool IsNoWait) {
1064  for (const ReductionInfo &RI : ReductionInfos) {
1065  (void)RI;
1066  assert(RI.Variable && "expected non-null variable");
1067  assert(RI.PrivateVariable && "expected non-null private variable");
1068  assert(RI.ReductionGen && "expected non-null reduction generator callback");
1069  assert(RI.Variable->getType() == RI.PrivateVariable->getType() &&
1070  "expected variables and their private equivalents to have the same "
1071  "type");
1072  assert(RI.Variable->getType()->isPointerTy() &&
1073  "expected variables to be pointers");
1074  }
1075 
1076  if (!updateToLocation(Loc))
1077  return InsertPointTy();
1078 
1079  BasicBlock *InsertBlock = Loc.IP.getBlock();
1080  BasicBlock *ContinuationBlock =
1081  InsertBlock->splitBasicBlock(Loc.IP.getPoint(), "reduce.finalize");
1082  InsertBlock->getTerminator()->eraseFromParent();
1083 
1084  // Create and populate array of type-erased pointers to private reduction
1085  // values.
1086  unsigned NumReductions = ReductionInfos.size();
1087  Type *RedArrayTy = ArrayType::get(Builder.getInt8PtrTy(), NumReductions);
1088  Builder.restoreIP(AllocaIP);
1089  Value *RedArray = Builder.CreateAlloca(RedArrayTy, nullptr, "red.array");
1090 
1091  Builder.SetInsertPoint(InsertBlock, InsertBlock->end());
1092 
1093  for (auto En : enumerate(ReductionInfos)) {
1094  unsigned Index = En.index();
1095  const ReductionInfo &RI = En.value();
1096  Value *RedArrayElemPtr = Builder.CreateConstInBoundsGEP2_64(
1097  RedArrayTy, RedArray, 0, Index, "red.array.elem." + Twine(Index));
1098  Value *Casted =
1099  Builder.CreateBitCast(RI.PrivateVariable, Builder.getInt8PtrTy(),
1100  "private.red.var." + Twine(Index) + ".casted");
1101  Builder.CreateStore(Casted, RedArrayElemPtr);
1102  }
1103 
1104  // Emit a call to the runtime function that orchestrates the reduction.
1105  // Declare the reduction function in the process.
1106  Function *Func = Builder.GetInsertBlock()->getParent();
1107  Module *Module = Func->getParent();
1108  Value *RedArrayPtr =
1109  Builder.CreateBitCast(RedArray, Builder.getInt8PtrTy(), "red.array.ptr");
1110  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1111  bool CanGenerateAtomic =
1112  llvm::all_of(ReductionInfos, [](const ReductionInfo &RI) {
1113  return RI.AtomicReductionGen;
1114  });
1115  Value *Ident = getOrCreateIdent(
1116  SrcLocStr, CanGenerateAtomic ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
1117  : IdentFlag(0));
1118  Value *ThreadId = getOrCreateThreadID(Ident);
1119  Constant *NumVariables = Builder.getInt32(NumReductions);
1120  const DataLayout &DL = Module->getDataLayout();
1121  unsigned RedArrayByteSize = DL.getTypeStoreSize(RedArrayTy);
1122  Constant *RedArraySize = Builder.getInt64(RedArrayByteSize);
1123  Function *ReductionFunc = getFreshReductionFunc(*Module);
1124  Value *Lock = getOMPCriticalRegionLock(".reduction");
1125  Function *ReduceFunc = getOrCreateRuntimeFunctionPtr(
1126  IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
1127  : RuntimeFunction::OMPRTL___kmpc_reduce);
1128  CallInst *ReduceCall =
1129  Builder.CreateCall(ReduceFunc,
1130  {Ident, ThreadId, NumVariables, RedArraySize,
1131  RedArrayPtr, ReductionFunc, Lock},
1132  "reduce");
1133 
1134  // Create final reduction entry blocks for the atomic and non-atomic case.
1135  // Emit IR that dispatches control flow to one of the blocks based on the
1136  // reduction supporting the atomic mode.
1137  BasicBlock *NonAtomicRedBlock =
1138  BasicBlock::Create(Module->getContext(), "reduce.switch.nonatomic", Func);
1139  BasicBlock *AtomicRedBlock =
1140  BasicBlock::Create(Module->getContext(), "reduce.switch.atomic", Func);
1141  SwitchInst *Switch =
1142  Builder.CreateSwitch(ReduceCall, ContinuationBlock, /* NumCases */ 2);
1143  Switch->addCase(Builder.getInt32(1), NonAtomicRedBlock);
1144  Switch->addCase(Builder.getInt32(2), AtomicRedBlock);
1145 
1146  // Populate the non-atomic reduction using the elementwise reduction function.
1147  // This loads the elements from the global and private variables and reduces
1148  // them before storing back the result to the global variable.
1149  Builder.SetInsertPoint(NonAtomicRedBlock);
1150  for (auto En : enumerate(ReductionInfos)) {
1151  const ReductionInfo &RI = En.value();
1152  Type *ValueType = RI.getElementType();
1153  Value *RedValue = Builder.CreateLoad(ValueType, RI.Variable,
1154  "red.value." + Twine(En.index()));
1155  Value *PrivateRedValue =
1156  Builder.CreateLoad(ValueType, RI.PrivateVariable,
1157  "red.private.value." + Twine(En.index()));
1158  Value *Reduced;
1159  Builder.restoreIP(
1160  RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced));
1161  if (!Builder.GetInsertBlock())
1162  return InsertPointTy();
1163  Builder.CreateStore(Reduced, RI.Variable);
1164  }
1165  Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr(
1166  IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
1167  : RuntimeFunction::OMPRTL___kmpc_end_reduce);
1168  Builder.CreateCall(EndReduceFunc, {Ident, ThreadId, Lock});
1169  Builder.CreateBr(ContinuationBlock);
1170 
1171  // Populate the atomic reduction using the atomic elementwise reduction
1172  // function. There are no loads/stores here because they will be happening
1173  // inside the atomic elementwise reduction.
1174  Builder.SetInsertPoint(AtomicRedBlock);
1175  if (CanGenerateAtomic) {
1176  for (const ReductionInfo &RI : ReductionInfos) {
1177  Builder.restoreIP(RI.AtomicReductionGen(Builder.saveIP(), RI.Variable,
1178  RI.PrivateVariable));
1179  if (!Builder.GetInsertBlock())
1180  return InsertPointTy();
1181  }
1182  Builder.CreateBr(ContinuationBlock);
1183  } else {
1184  Builder.CreateUnreachable();
1185  }
1186 
1187  // Populate the outlined reduction function using the elementwise reduction
1188  // function. Partial values are extracted from the type-erased array of
1189  // pointers to private variables.
1190  BasicBlock *ReductionFuncBlock =
1191  BasicBlock::Create(Module->getContext(), "", ReductionFunc);
1192  Builder.SetInsertPoint(ReductionFuncBlock);
1193  Value *LHSArrayPtr = Builder.CreateBitCast(ReductionFunc->getArg(0),
1194  RedArrayTy->getPointerTo());
1195  Value *RHSArrayPtr = Builder.CreateBitCast(ReductionFunc->getArg(1),
1196  RedArrayTy->getPointerTo());
1197  for (auto En : enumerate(ReductionInfos)) {
1198  const ReductionInfo &RI = En.value();
1199  Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
1200  RedArrayTy, LHSArrayPtr, 0, En.index());
1201  Value *LHSI8Ptr = Builder.CreateLoad(Builder.getInt8PtrTy(), LHSI8PtrPtr);
1202  Value *LHSPtr = Builder.CreateBitCast(LHSI8Ptr, RI.Variable->getType());
1203  Value *LHS = Builder.CreateLoad(RI.getElementType(), LHSPtr);
1204  Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
1205  RedArrayTy, RHSArrayPtr, 0, En.index());
1206  Value *RHSI8Ptr = Builder.CreateLoad(Builder.getInt8PtrTy(), RHSI8PtrPtr);
1207  Value *RHSPtr =
1208  Builder.CreateBitCast(RHSI8Ptr, RI.PrivateVariable->getType());
1209  Value *RHS = Builder.CreateLoad(RI.getElementType(), RHSPtr);
1210  Value *Reduced;
1211  Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced));
1212  if (!Builder.GetInsertBlock())
1213  return InsertPointTy();
1214  Builder.CreateStore(Reduced, LHSPtr);
1215  }
1216  Builder.CreateRetVoid();
1217 
1218  Builder.SetInsertPoint(ContinuationBlock);
1219  return Builder.saveIP();
1220 }
1221 
1224  BodyGenCallbackTy BodyGenCB,
1225  FinalizeCallbackTy FiniCB) {
1226 
1227  if (!updateToLocation(Loc))
1228  return Loc.IP;
1229 
1230  Directive OMPD = Directive::OMPD_master;
1231  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1232  Value *Ident = getOrCreateIdent(SrcLocStr);
1233  Value *ThreadId = getOrCreateThreadID(Ident);
1234  Value *Args[] = {Ident, ThreadId};
1235 
1236  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master);
1237  Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
1238 
1239  Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master);
1240  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
1241 
1242  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
1243  /*Conditional*/ true, /*hasFinalize*/ true);
1244 }
1245 
1248  BodyGenCallbackTy BodyGenCB,
1249  FinalizeCallbackTy FiniCB, Value *Filter) {
1250  if (!updateToLocation(Loc))
1251  return Loc.IP;
1252 
1253  Directive OMPD = Directive::OMPD_masked;
1254  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1255  Value *Ident = getOrCreateIdent(SrcLocStr);
1256  Value *ThreadId = getOrCreateThreadID(Ident);
1257  Value *Args[] = {Ident, ThreadId, Filter};
1258  Value *ArgsEnd[] = {Ident, ThreadId};
1259 
1260  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked);
1261  Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
1262 
1263  Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked);
1264  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, ArgsEnd);
1265 
1266  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
1267  /*Conditional*/ true, /*hasFinalize*/ true);
1268 }
1269 
1271  DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore,
1272  BasicBlock *PostInsertBefore, const Twine &Name) {
1273  Module *M = F->getParent();
1274  LLVMContext &Ctx = M->getContext();
1275  Type *IndVarTy = TripCount->getType();
1276 
1277  // Create the basic block structure.
1278  BasicBlock *Preheader =
1279  BasicBlock::Create(Ctx, "omp_" + Name + ".preheader", F, PreInsertBefore);
1280  BasicBlock *Header =
1281  BasicBlock::Create(Ctx, "omp_" + Name + ".header", F, PreInsertBefore);
1282  BasicBlock *Cond =
1283  BasicBlock::Create(Ctx, "omp_" + Name + ".cond", F, PreInsertBefore);
1284  BasicBlock *Body =
1285  BasicBlock::Create(Ctx, "omp_" + Name + ".body", F, PreInsertBefore);
1286  BasicBlock *Latch =
1287  BasicBlock::Create(Ctx, "omp_" + Name + ".inc", F, PostInsertBefore);
1288  BasicBlock *Exit =
1289  BasicBlock::Create(Ctx, "omp_" + Name + ".exit", F, PostInsertBefore);
1290  BasicBlock *After =
1291  BasicBlock::Create(Ctx, "omp_" + Name + ".after", F, PostInsertBefore);
1292 
1293  // Use specified DebugLoc for new instructions.
1294  Builder.SetCurrentDebugLocation(DL);
1295 
1296  Builder.SetInsertPoint(Preheader);
1297  Builder.CreateBr(Header);
1298 
1299  Builder.SetInsertPoint(Header);
1300  PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2, "omp_" + Name + ".iv");
1301  IndVarPHI->addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
1302  Builder.CreateBr(Cond);
1303 
1304  Builder.SetInsertPoint(Cond);
1305  Value *Cmp =
1306  Builder.CreateICmpULT(IndVarPHI, TripCount, "omp_" + Name + ".cmp");
1307  Builder.CreateCondBr(Cmp, Body, Exit);
1308 
1309  Builder.SetInsertPoint(Body);
1310  Builder.CreateBr(Latch);
1311 
1312  Builder.SetInsertPoint(Latch);
1313  Value *Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1),
1314  "omp_" + Name + ".next", /*HasNUW=*/true);
1315  Builder.CreateBr(Header);
1316  IndVarPHI->addIncoming(Next, Latch);
1317 
1318  Builder.SetInsertPoint(Exit);
1319  Builder.CreateBr(After);
1320 
1321  // Remember and return the canonical control flow.
1322  LoopInfos.emplace_front();
1323  CanonicalLoopInfo *CL = &LoopInfos.front();
1324 
1325  CL->Preheader = Preheader;
1326  CL->Header = Header;
1327  CL->Cond = Cond;
1328  CL->Body = Body;
1329  CL->Latch = Latch;
1330  CL->Exit = Exit;
1331  CL->After = After;
1332 
1333 #ifndef NDEBUG
1334  CL->assertOK();
1335 #endif
1336  return CL;
1337 }
1338 
1341  LoopBodyGenCallbackTy BodyGenCB,
1342  Value *TripCount, const Twine &Name) {
1343  BasicBlock *BB = Loc.IP.getBlock();
1344  BasicBlock *NextBB = BB->getNextNode();
1345 
1346  CanonicalLoopInfo *CL = createLoopSkeleton(Loc.DL, TripCount, BB->getParent(),
1347  NextBB, NextBB, Name);
1348  BasicBlock *After = CL->getAfter();
1349 
1350  // If location is not set, don't connect the loop.
1351  if (updateToLocation(Loc)) {
1352  // Split the loop at the insertion point: Branch to the preheader and move
1353  // every following instruction to after the loop (the After BB). Also, the
1354  // new successor is the loop's after block.
1355  Builder.CreateBr(CL->Preheader);
1356  After->getInstList().splice(After->begin(), BB->getInstList(),
1357  Builder.GetInsertPoint(), BB->end());
1358  After->replaceSuccessorsPhiUsesWith(BB, After);
1359  }
1360 
1361  // Emit the body content. We do it after connecting the loop to the CFG to
1362  // avoid that the callback encounters degenerate BBs.
1363  BodyGenCB(CL->getBodyIP(), CL->getIndVar());
1364 
1365 #ifndef NDEBUG
1366  CL->assertOK();
1367 #endif
1368  return CL;
1369 }
1370 
1372  const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
1373  Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
1374  InsertPointTy ComputeIP, const Twine &Name) {
1375 
1376  // Consider the following difficulties (assuming 8-bit signed integers):
1377  // * Adding \p Step to the loop counter which passes \p Stop may overflow:
1378  // DO I = 1, 100, 50
1379  /// * A \p Step of INT_MIN cannot not be normalized to a positive direction:
1380  // DO I = 100, 0, -128
1381 
1382  // Start, Stop and Step must be of the same integer type.
1383  auto *IndVarTy = cast<IntegerType>(Start->getType());
1384  assert(IndVarTy == Stop->getType() && "Stop type mismatch");
1385  assert(IndVarTy == Step->getType() && "Step type mismatch");
1386 
1387  LocationDescription ComputeLoc =
1388  ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc;
1389  updateToLocation(ComputeLoc);
1390 
1391  ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
1392  ConstantInt *One = ConstantInt::get(IndVarTy, 1);
1393 
1394  // Like Step, but always positive.
1395  Value *Incr = Step;
1396 
1397  // Distance between Start and Stop; always positive.
1398  Value *Span;
1399 
1400  // Condition whether there are no iterations are executed at all, e.g. because
1401  // UB < LB.
1402  Value *ZeroCmp;
1403 
1404  if (IsSigned) {
1405  // Ensure that increment is positive. If not, negate and invert LB and UB.
1406  Value *IsNeg = Builder.CreateICmpSLT(Step, Zero);
1407  Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step);
1408  Value *LB = Builder.CreateSelect(IsNeg, Stop, Start);
1409  Value *UB = Builder.CreateSelect(IsNeg, Start, Stop);
1410  Span = Builder.CreateSub(UB, LB, "", false, true);
1411  ZeroCmp = Builder.CreateICmp(
1412  InclusiveStop ? CmpInst::ICMP_SLT : CmpInst::ICMP_SLE, UB, LB);
1413  } else {
1414  Span = Builder.CreateSub(Stop, Start, "", true);
1415  ZeroCmp = Builder.CreateICmp(
1416  InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Stop, Start);
1417  }
1418 
1419  Value *CountIfLooping;
1420  if (InclusiveStop) {
1421  CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One);
1422  } else {
1423  // Avoid incrementing past stop since it could overflow.
1424  Value *CountIfTwo = Builder.CreateAdd(
1425  Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One);
1426  Value *OneCmp = Builder.CreateICmp(
1427  InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Span, Incr);
1428  CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo);
1429  }
1430  Value *TripCount = Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
1431  "omp_" + Name + ".tripcount");
1432 
1433  auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) {
1434  Builder.restoreIP(CodeGenIP);
1435  Value *Span = Builder.CreateMul(IV, Step);
1436  Value *IndVar = Builder.CreateAdd(Span, Start);
1437  BodyGenCB(Builder.saveIP(), IndVar);
1438  };
1439  LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP();
1440  return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
1441 }
1442 
1443 // Returns an LLVM function to call for initializing loop bounds using OpenMP
1444 // static scheduling depending on `type`. Only i32 and i64 are supported by the
1445 // runtime. Always interpret integers as unsigned similarly to
1446 // CanonicalLoopInfo.
1448  OpenMPIRBuilder &OMPBuilder) {
1449  unsigned Bitwidth = Ty->getIntegerBitWidth();
1450  if (Bitwidth == 32)
1451  return OMPBuilder.getOrCreateRuntimeFunction(
1452  M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
1453  if (Bitwidth == 64)
1454  return OMPBuilder.getOrCreateRuntimeFunction(
1455  M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
1456  llvm_unreachable("unknown OpenMP loop iterator bitwidth");
1457 }
1458 
1459 // Sets the number of loop iterations to the given value. This value must be
1460 // valid in the condition block (i.e., defined in the preheader) and is
1461 // interpreted as an unsigned integer.
1463  Instruction *CmpI = &CLI->getCond()->front();
1464  assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
1465  CmpI->setOperand(1, TripCount);
1466  CLI->assertOK();
1467 }
1468 
1471  InsertPointTy AllocaIP,
1472  bool NeedsBarrier, Value *Chunk) {
1473  assert(CLI->isValid() && "Requires a valid canonical loop");
1474 
1475  // Set up the source location value for OpenMP runtime.
1476  Builder.restoreIP(CLI->getPreheaderIP());
1477  Builder.SetCurrentDebugLocation(DL);
1478 
1479  Constant *SrcLocStr = getOrCreateSrcLocStr(DL);
1480  Value *SrcLoc = getOrCreateIdent(SrcLocStr);
1481 
1482  // Declare useful OpenMP runtime functions.
1483  Value *IV = CLI->getIndVar();
1484  Type *IVTy = IV->getType();
1485  FunctionCallee StaticInit = getKmpcForStaticInitForType(IVTy, M, *this);
1486  FunctionCallee StaticFini =
1487  getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
1488 
1489  // Allocate space for computed loop bounds as expected by the "init" function.
1490  Builder.restoreIP(AllocaIP);
1491  Type *I32Type = Type::getInt32Ty(M.getContext());
1492  Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
1493  Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
1494  Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound");
1495  Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride");
1496 
1497  // At the end of the preheader, prepare for calling the "init" function by
1498  // storing the current loop bounds into the allocated space. A canonical loop
1499  // always iterates from 0 to trip-count with step 1. Note that "init" expects
1500  // and produces an inclusive upper bound.
1501  Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
1502  Constant *Zero = ConstantInt::get(IVTy, 0);
1503  Constant *One = ConstantInt::get(IVTy, 1);
1504  Builder.CreateStore(Zero, PLowerBound);
1505  Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One);
1506  Builder.CreateStore(UpperBound, PUpperBound);
1507  Builder.CreateStore(One, PStride);
1508 
1509  // FIXME: schedule(static) is NOT the same as schedule(static,1)
1510  if (!Chunk)
1511  Chunk = One;
1512 
1513  Value *ThreadNum = getOrCreateThreadID(SrcLoc);
1514 
1515  Constant *SchedulingType =
1516  ConstantInt::get(I32Type, static_cast<int>(OMPScheduleType::Static));
1517 
1518  // Call the "init" function and update the trip count of the loop with the
1519  // value it produced.
1520  Builder.CreateCall(StaticInit,
1521  {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
1522  PUpperBound, PStride, One, Chunk});
1523  Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
1524  Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
1525  Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
1526  Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
1527  setCanonicalLoopTripCount(CLI, TripCount);
1528 
1529  // Update all uses of the induction variable except the one in the condition
1530  // block that compares it with the actual upper bound, and the increment in
1531  // the latch block.
1532  // TODO: this can eventually move to CanonicalLoopInfo or to a new
1533  // CanonicalLoopInfoUpdater interface.
1534  Builder.SetInsertPoint(CLI->getBody(), CLI->getBody()->getFirstInsertionPt());
1535  Value *UpdatedIV = Builder.CreateAdd(IV, LowerBound);
1536  IV->replaceUsesWithIf(UpdatedIV, [&](Use &U) {
1537  auto *Instr = dyn_cast<Instruction>(U.getUser());
1538  return !Instr ||
1539  (Instr->getParent() != CLI->getCond() &&
1540  Instr->getParent() != CLI->getLatch() && Instr != UpdatedIV);
1541  });
1542 
1543  // In the "exit" block, call the "fini" function.
1544  Builder.SetInsertPoint(CLI->getExit(),
1545  CLI->getExit()->getTerminator()->getIterator());
1546  Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
1547 
1548  // Add the barrier if requested.
1549  if (NeedsBarrier)
1550  createBarrier(LocationDescription(Builder.saveIP(), DL),
1551  omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
1552  /* CheckCancelFlag */ false);
1553 
1554  InsertPointTy AfterIP = CLI->getAfterIP();
1555  CLI->invalidate();
1556 
1557  return AfterIP;
1558 }
1559 
1562  InsertPointTy AllocaIP, bool NeedsBarrier) {
1563  // Currently only supports static schedules.
1564  return applyStaticWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier);
1565 }
1566 
1567 /// Returns an LLVM function to call for initializing loop bounds using OpenMP
1568 /// dynamic scheduling depending on `type`. Only i32 and i64 are supported by
1569 /// the runtime. Always interpret integers as unsigned similarly to
1570 /// CanonicalLoopInfo.
1571 static FunctionCallee
1573  unsigned Bitwidth = Ty->getIntegerBitWidth();
1574  if (Bitwidth == 32)
1575  return OMPBuilder.getOrCreateRuntimeFunction(
1576  M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
1577  if (Bitwidth == 64)
1578  return OMPBuilder.getOrCreateRuntimeFunction(
1579  M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
1580  llvm_unreachable("unknown OpenMP loop iterator bitwidth");
1581 }
1582 
1583 /// Returns an LLVM function to call for updating the next loop using OpenMP
1584 /// dynamic scheduling depending on `type`. Only i32 and i64 are supported by
1585 /// the runtime. Always interpret integers as unsigned similarly to
1586 /// CanonicalLoopInfo.
1587 static FunctionCallee
1589  unsigned Bitwidth = Ty->getIntegerBitWidth();
1590  if (Bitwidth == 32)
1591  return OMPBuilder.getOrCreateRuntimeFunction(
1592  M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
1593  if (Bitwidth == 64)
1594  return OMPBuilder.getOrCreateRuntimeFunction(
1595  M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
1596  llvm_unreachable("unknown OpenMP loop iterator bitwidth");
1597 }
1598 
1600  DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
1601  OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk) {
1602  assert(CLI->isValid() && "Requires a valid canonical loop");
1603 
1604  // Set up the source location value for OpenMP runtime.
1605  Builder.SetCurrentDebugLocation(DL);
1606 
1607  Constant *SrcLocStr = getOrCreateSrcLocStr(DL);
1608  Value *SrcLoc = getOrCreateIdent(SrcLocStr);
1609 
1610  // Declare useful OpenMP runtime functions.
1611  Value *IV = CLI->getIndVar();
1612  Type *IVTy = IV->getType();
1613  FunctionCallee DynamicInit = getKmpcForDynamicInitForType(IVTy, M, *this);
1614  FunctionCallee DynamicNext = getKmpcForDynamicNextForType(IVTy, M, *this);
1615 
1616  // Allocate space for computed loop bounds as expected by the "init" function.
1617  Builder.restoreIP(AllocaIP);
1618  Type *I32Type = Type::getInt32Ty(M.getContext());
1619  Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
1620  Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
1621  Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound");
1622  Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride");
1623 
1624  // At the end of the preheader, prepare for calling the "init" function by
1625  // storing the current loop bounds into the allocated space. A canonical loop
1626  // always iterates from 0 to trip-count with step 1. Note that "init" expects
1627  // and produces an inclusive upper bound.
1628  BasicBlock *PreHeader = CLI->getPreheader();
1629  Builder.SetInsertPoint(PreHeader->getTerminator());
1630  Constant *One = ConstantInt::get(IVTy, 1);
1631  Builder.CreateStore(One, PLowerBound);
1632  Value *UpperBound = CLI->getTripCount();
1633  Builder.CreateStore(UpperBound, PUpperBound);
1634  Builder.CreateStore(One, PStride);
1635 
1636  BasicBlock *Header = CLI->getHeader();
1637  BasicBlock *Exit = CLI->getExit();
1638  BasicBlock *Cond = CLI->getCond();
1639  InsertPointTy AfterIP = CLI->getAfterIP();
1640 
1641  // The CLI will be "broken" in the code below, as the loop is no longer
1642  // a valid canonical loop.
1643 
1644  if (!Chunk)
1645  Chunk = One;
1646 
1647  Value *ThreadNum = getOrCreateThreadID(SrcLoc);
1648 
1649  Constant *SchedulingType =
1650  ConstantInt::get(I32Type, static_cast<int>(SchedType));
1651 
1652  // Call the "init" function.
1653  Builder.CreateCall(DynamicInit,
1654  {SrcLoc, ThreadNum, SchedulingType, /* LowerBound */ One,
1655  UpperBound, /* step */ One, Chunk});
1656 
1657  // An outer loop around the existing one.
1658  BasicBlock *OuterCond = BasicBlock::Create(
1659  PreHeader->getContext(), Twine(PreHeader->getName()) + ".outer.cond",
1660  PreHeader->getParent());
1661  // This needs to be 32-bit always, so can't use the IVTy Zero above.
1662  Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
1663  Value *Res =
1664  Builder.CreateCall(DynamicNext, {SrcLoc, ThreadNum, PLastIter,
1665  PLowerBound, PUpperBound, PStride});
1666  Constant *Zero32 = ConstantInt::get(I32Type, 0);
1667  Value *MoreWork = Builder.CreateCmp(CmpInst::ICMP_NE, Res, Zero32);
1668  Value *LowerBound =
1669  Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One, "lb");
1670  Builder.CreateCondBr(MoreWork, Header, Exit);
1671 
1672  // Change PHI-node in loop header to use outer cond rather than preheader,
1673  // and set IV to the LowerBound.
1674  Instruction *Phi = &Header->front();
1675  auto *PI = cast<PHINode>(Phi);
1676  PI->setIncomingBlock(0, OuterCond);
1677  PI->setIncomingValue(0, LowerBound);
1678 
1679  // Then set the pre-header to jump to the OuterCond
1680  Instruction *Term = PreHeader->getTerminator();
1681  auto *Br = cast<BranchInst>(Term);
1682  Br->setSuccessor(0, OuterCond);
1683 
1684  // Modify the inner condition:
1685  // * Use the UpperBound returned from the DynamicNext call.
1686  // * jump to the loop outer loop when done with one of the inner loops.
1687  Builder.SetInsertPoint(Cond, Cond->getFirstInsertionPt());
1688  UpperBound = Builder.CreateLoad(IVTy, PUpperBound, "ub");
1689  Instruction *Comp = &*Builder.GetInsertPoint();
1690  auto *CI = cast<CmpInst>(Comp);
1691  CI->setOperand(1, UpperBound);
1692  // Redirect the inner exit to branch to outer condition.
1693  Instruction *Branch = &Cond->back();
1694  auto *BI = cast<BranchInst>(Branch);
1695  assert(BI->getSuccessor(1) == Exit);
1696  BI->setSuccessor(1, OuterCond);
1697 
1698  // Add the barrier if requested.
1699  if (NeedsBarrier) {
1700  Builder.SetInsertPoint(&Exit->back());
1701  createBarrier(LocationDescription(Builder.saveIP(), DL),
1702  omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
1703  /* CheckCancelFlag */ false);
1704  }
1705 
1706  CLI->invalidate();
1707  return AfterIP;
1708 }
1709 
1710 /// Make \p Source branch to \p Target.
1711 ///
1712 /// Handles two situations:
1713 /// * \p Source already has an unconditional branch.
1714 /// * \p Source is a degenerate block (no terminator because the BB is
1715 /// the current head of the IR construction).
1717  if (Instruction *Term = Source->getTerminator()) {
1718  auto *Br = cast<BranchInst>(Term);
1719  assert(!Br->isConditional() &&
1720  "BB's terminator must be an unconditional branch (or degenerate)");
1721  BasicBlock *Succ = Br->getSuccessor(0);
1722  Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true);
1723  Br->setSuccessor(0, Target);
1724  return;
1725  }
1726 
1727  auto *NewBr = BranchInst::Create(Target, Source);
1728  NewBr->setDebugLoc(DL);
1729 }
1730 
1731 /// Redirect all edges that branch to \p OldTarget to \p NewTarget. That is,
1732 /// after this \p OldTarget will be orphaned.
1733 static void redirectAllPredecessorsTo(BasicBlock *OldTarget,
1734  BasicBlock *NewTarget, DebugLoc DL) {
1735  for (BasicBlock *Pred : make_early_inc_range(predecessors(OldTarget)))
1736  redirectTo(Pred, NewTarget, DL);
1737 }
1738 
1739 /// Determine which blocks in \p BBs are reachable from outside and remove the
1740 /// ones that are not reachable from the function.
1742  SmallPtrSet<BasicBlock *, 6> BBsToErase{BBs.begin(), BBs.end()};
1743  auto HasRemainingUses = [&BBsToErase](BasicBlock *BB) {
1744  for (Use &U : BB->uses()) {
1745  auto *UseInst = dyn_cast<Instruction>(U.getUser());
1746  if (!UseInst)
1747  continue;
1748  if (BBsToErase.count(UseInst->getParent()))
1749  continue;
1750  return true;
1751  }
1752  return false;
1753  };
1754 
1755  while (true) {
1756  bool Changed = false;
1757  for (BasicBlock *BB : make_early_inc_range(BBsToErase)) {
1758  if (HasRemainingUses(BB)) {
1759  BBsToErase.erase(BB);
1760  Changed = true;
1761  }
1762  }
1763  if (!Changed)
1764  break;
1765  }
1766 
1767  SmallVector<BasicBlock *, 7> BBVec(BBsToErase.begin(), BBsToErase.end());
1768  DeleteDeadBlocks(BBVec);
1769 }
1770 
1773  InsertPointTy ComputeIP) {
1774  assert(Loops.size() >= 1 && "At least one loop required");
1775  size_t NumLoops = Loops.size();
1776 
1777  // Nothing to do if there is already just one loop.
1778  if (NumLoops == 1)
1779  return Loops.front();
1780 
1781  CanonicalLoopInfo *Outermost = Loops.front();
1782  CanonicalLoopInfo *Innermost = Loops.back();
1783  BasicBlock *OrigPreheader = Outermost->getPreheader();
1784  BasicBlock *OrigAfter = Outermost->getAfter();
1785  Function *F = OrigPreheader->getParent();
1786 
1787  // Setup the IRBuilder for inserting the trip count computation.
1788  Builder.SetCurrentDebugLocation(DL);
1789  if (ComputeIP.isSet())
1790  Builder.restoreIP(ComputeIP);
1791  else
1792  Builder.restoreIP(Outermost->getPreheaderIP());
1793 
1794  // Derive the collapsed' loop trip count.
1795  // TODO: Find common/largest indvar type.
1796  Value *CollapsedTripCount = nullptr;
1797  for (CanonicalLoopInfo *L : Loops) {
1798  assert(L->isValid() &&
1799  "All loops to collapse must be valid canonical loops");
1800  Value *OrigTripCount = L->getTripCount();
1801  if (!CollapsedTripCount) {
1802  CollapsedTripCount = OrigTripCount;
1803  continue;
1804  }
1805 
1806  // TODO: Enable UndefinedSanitizer to diagnose an overflow here.
1807  CollapsedTripCount = Builder.CreateMul(CollapsedTripCount, OrigTripCount,
1808  {}, /*HasNUW=*/true);
1809  }
1810 
1811  // Create the collapsed loop control flow.
1812  CanonicalLoopInfo *Result =
1813  createLoopSkeleton(DL, CollapsedTripCount, F,
1814  OrigPreheader->getNextNode(), OrigAfter, "collapsed");
1815 
1816  // Build the collapsed loop body code.
1817  // Start with deriving the input loop induction variables from the collapsed
1818  // one, using a divmod scheme. To preserve the original loops' order, the
1819  // innermost loop use the least significant bits.
1820  Builder.restoreIP(Result->getBodyIP());
1821 
1822  Value *Leftover = Result->getIndVar();
1823  SmallVector<Value *> NewIndVars;
1824  NewIndVars.set_size(NumLoops);
1825  for (int i = NumLoops - 1; i >= 1; --i) {
1826  Value *OrigTripCount = Loops[i]->getTripCount();
1827 
1828  Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount);
1829  NewIndVars[i] = NewIndVar;
1830 
1831  Leftover = Builder.CreateUDiv(Leftover, OrigTripCount);
1832  }
1833  // Outermost loop gets all the remaining bits.
1834  NewIndVars[0] = Leftover;
1835 
1836  // Construct the loop body control flow.
1837  // We progressively construct the branch structure following in direction of
1838  // the control flow, from the leading in-between code, the loop nest body, the
1839  // trailing in-between code, and rejoining the collapsed loop's latch.
1840  // ContinueBlock and ContinuePred keep track of the source(s) of next edge. If
1841  // the ContinueBlock is set, continue with that block. If ContinuePred, use
1842  // its predecessors as sources.
1843  BasicBlock *ContinueBlock = Result->getBody();
1844  BasicBlock *ContinuePred = nullptr;
1845  auto ContinueWith = [&ContinueBlock, &ContinuePred, DL](BasicBlock *Dest,
1846  BasicBlock *NextSrc) {
1847  if (ContinueBlock)
1848  redirectTo(ContinueBlock, Dest, DL);
1849  else
1850  redirectAllPredecessorsTo(ContinuePred, Dest, DL);
1851 
1852  ContinueBlock = nullptr;
1853  ContinuePred = NextSrc;
1854  };
1855 
1856  // The code before the nested loop of each level.
1857  // Because we are sinking it into the nest, it will be executed more often
1858  // that the original loop. More sophisticated schemes could keep track of what
1859  // the in-between code is and instantiate it only once per thread.
1860  for (size_t i = 0; i < NumLoops - 1; ++i)
1861  ContinueWith(Loops[i]->getBody(), Loops[i + 1]->getHeader());
1862 
1863  // Connect the loop nest body.
1864  ContinueWith(Innermost->getBody(), Innermost->getLatch());
1865 
1866  // The code after the nested loop at each level.
1867  for (size_t i = NumLoops - 1; i > 0; --i)
1868  ContinueWith(Loops[i]->getAfter(), Loops[i - 1]->getLatch());
1869 
1870  // Connect the finished loop to the collapsed loop latch.
1871  ContinueWith(Result->getLatch(), nullptr);
1872 
1873  // Replace the input loops with the new collapsed loop.
1874  redirectTo(Outermost->getPreheader(), Result->getPreheader(), DL);
1875  redirectTo(Result->getAfter(), Outermost->getAfter(), DL);
1876 
1877  // Replace the input loop indvars with the derived ones.
1878  for (size_t i = 0; i < NumLoops; ++i)
1879  Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
1880 
1881  // Remove unused parts of the input loops.
1882  SmallVector<BasicBlock *, 12> OldControlBBs;
1883  OldControlBBs.reserve(6 * Loops.size());
1884  for (CanonicalLoopInfo *Loop : Loops)
1885  Loop->collectControlBlocks(OldControlBBs);
1886  removeUnusedBlocksFromParent(OldControlBBs);
1887 
1888  for (CanonicalLoopInfo *L : Loops)
1889  L->invalidate();
1890 
1891 #ifndef NDEBUG
1892  Result->assertOK();
1893 #endif
1894  return Result;
1895 }
1896 
1897 std::vector<CanonicalLoopInfo *>
1899  ArrayRef<Value *> TileSizes) {
1900  assert(TileSizes.size() == Loops.size() &&
1901  "Must pass as many tile sizes as there are loops");
1902  int NumLoops = Loops.size();
1903  assert(NumLoops >= 1 && "At least one loop to tile required");
1904 
1905  CanonicalLoopInfo *OutermostLoop = Loops.front();
1906  CanonicalLoopInfo *InnermostLoop = Loops.back();
1907  Function *F = OutermostLoop->getBody()->getParent();
1908  BasicBlock *InnerEnter = InnermostLoop->getBody();
1909  BasicBlock *InnerLatch = InnermostLoop->getLatch();
1910 
1911  // Collect original trip counts and induction variable to be accessible by
1912  // index. Also, the structure of the original loops is not preserved during
1913  // the construction of the tiled loops, so do it before we scavenge the BBs of
1914  // any original CanonicalLoopInfo.
1915  SmallVector<Value *, 4> OrigTripCounts, OrigIndVars;
1916  for (CanonicalLoopInfo *L : Loops) {
1917  assert(L->isValid() && "All input loops must be valid canonical loops");
1918  OrigTripCounts.push_back(L->getTripCount());
1919  OrigIndVars.push_back(L->getIndVar());
1920  }
1921 
1922  // Collect the code between loop headers. These may contain SSA definitions
1923  // that are used in the loop nest body. To be usable with in the innermost
1924  // body, these BasicBlocks will be sunk into the loop nest body. That is,
1925  // these instructions may be executed more often than before the tiling.
1926  // TODO: It would be sufficient to only sink them into body of the
1927  // corresponding tile loop.
1929  for (int i = 0; i < NumLoops - 1; ++i) {
1930  CanonicalLoopInfo *Surrounding = Loops[i];
1931  CanonicalLoopInfo *Nested = Loops[i + 1];
1932 
1933  BasicBlock *EnterBB = Surrounding->getBody();
1934  BasicBlock *ExitBB = Nested->getHeader();
1935  InbetweenCode.emplace_back(EnterBB, ExitBB);
1936  }
1937 
1938  // Compute the trip counts of the floor loops.
1939  Builder.SetCurrentDebugLocation(DL);
1940  Builder.restoreIP(OutermostLoop->getPreheaderIP());
1941  SmallVector<Value *, 4> FloorCount, FloorRems;
1942  for (int i = 0; i < NumLoops; ++i) {
1943  Value *TileSize = TileSizes[i];
1944  Value *OrigTripCount = OrigTripCounts[i];
1945  Type *IVType = OrigTripCount->getType();
1946 
1947  Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize);
1948  Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize);
1949 
1950  // 0 if tripcount divides the tilesize, 1 otherwise.
1951  // 1 means we need an additional iteration for a partial tile.
1952  //
1953  // Unfortunately we cannot just use the roundup-formula
1954  // (tripcount + tilesize - 1)/tilesize
1955  // because the summation might overflow. We do not want introduce undefined
1956  // behavior when the untiled loop nest did not.
1957  Value *FloorTripOverflow =
1958  Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
1959 
1960  FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
1961  FloorTripCount =
1962  Builder.CreateAdd(FloorTripCount, FloorTripOverflow,
1963  "omp_floor" + Twine(i) + ".tripcount", true);
1964 
1965  // Remember some values for later use.
1966  FloorCount.push_back(FloorTripCount);
1967  FloorRems.push_back(FloorTripRem);
1968  }
1969 
1970  // Generate the new loop nest, from the outermost to the innermost.
1971  std::vector<CanonicalLoopInfo *> Result;
1972  Result.reserve(NumLoops * 2);
1973 
1974  // The basic block of the surrounding loop that enters the nest generated
1975  // loop.
1976  BasicBlock *Enter = OutermostLoop->getPreheader();
1977 
1978  // The basic block of the surrounding loop where the inner code should
1979  // continue.
1980  BasicBlock *Continue = OutermostLoop->getAfter();
1981 
1982  // Where the next loop basic block should be inserted.
1983  BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
1984 
1985  auto EmbeddNewLoop =
1986  [this, DL, F, InnerEnter, &Enter, &Continue, &OutroInsertBefore](
1987  Value *TripCount, const Twine &Name) -> CanonicalLoopInfo * {
1988  CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton(
1989  DL, TripCount, F, InnerEnter, OutroInsertBefore, Name);
1990  redirectTo(Enter, EmbeddedLoop->getPreheader(), DL);
1991  redirectTo(EmbeddedLoop->getAfter(), Continue, DL);
1992 
1993  // Setup the position where the next embedded loop connects to this loop.
1994  Enter = EmbeddedLoop->getBody();
1995  Continue = EmbeddedLoop->getLatch();
1996  OutroInsertBefore = EmbeddedLoop->getLatch();
1997  return EmbeddedLoop;
1998  };
1999 
2000  auto EmbeddNewLoops = [&Result, &EmbeddNewLoop](ArrayRef<Value *> TripCounts,
2001  const Twine &NameBase) {
2002  for (auto P : enumerate(TripCounts)) {
2003  CanonicalLoopInfo *EmbeddedLoop =
2004  EmbeddNewLoop(P.value(), NameBase + Twine(P.index()));
2005  Result.push_back(EmbeddedLoop);
2006  }
2007  };
2008 
2009  EmbeddNewLoops(FloorCount, "floor");
2010 
2011  // Within the innermost floor loop, emit the code that computes the tile
2012  // sizes.
2013  Builder.SetInsertPoint(Enter->getTerminator());
2014  SmallVector<Value *, 4> TileCounts;
2015  for (int i = 0; i < NumLoops; ++i) {
2016  CanonicalLoopInfo *FloorLoop = Result[i];
2017  Value *TileSize = TileSizes[i];
2018 
2019  Value *FloorIsEpilogue =
2020  Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]);
2021  Value *TileTripCount =
2022  Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize);
2023 
2024  TileCounts.push_back(TileTripCount);
2025  }
2026 
2027  // Create the tile loops.
2028  EmbeddNewLoops(TileCounts, "tile");
2029 
2030  // Insert the inbetween code into the body.
2031  BasicBlock *BodyEnter = Enter;
2032  BasicBlock *BodyEntered = nullptr;
2033  for (std::pair<BasicBlock *, BasicBlock *> P : InbetweenCode) {
2034  BasicBlock *EnterBB = P.first;
2035  BasicBlock *ExitBB = P.second;
2036 
2037  if (BodyEnter)
2038  redirectTo(BodyEnter, EnterBB, DL);
2039  else
2040  redirectAllPredecessorsTo(BodyEntered, EnterBB, DL);
2041 
2042  BodyEnter = nullptr;
2043  BodyEntered = ExitBB;
2044  }
2045 
2046  // Append the original loop nest body into the generated loop nest body.
2047  if (BodyEnter)
2048  redirectTo(BodyEnter, InnerEnter, DL);
2049  else
2050  redirectAllPredecessorsTo(BodyEntered, InnerEnter, DL);
2051  redirectAllPredecessorsTo(InnerLatch, Continue, DL);
2052 
2053  // Replace the original induction variable with an induction variable computed
2054  // from the tile and floor induction variables.
2055  Builder.restoreIP(Result.back()->getBodyIP());
2056  for (int i = 0; i < NumLoops; ++i) {
2057  CanonicalLoopInfo *FloorLoop = Result[i];
2058  CanonicalLoopInfo *TileLoop = Result[NumLoops + i];
2059  Value *OrigIndVar = OrigIndVars[i];
2060  Value *Size = TileSizes[i];
2061 
2062  Value *Scale =
2063  Builder.CreateMul(Size, FloorLoop->getIndVar(), {}, /*HasNUW=*/true);
2064  Value *Shift =
2065  Builder.CreateAdd(Scale, TileLoop->getIndVar(), {}, /*HasNUW=*/true);
2066  OrigIndVar->replaceAllUsesWith(Shift);
2067  }
2068 
2069  // Remove unused parts of the original loops.
2070  SmallVector<BasicBlock *, 12> OldControlBBs;
2071  OldControlBBs.reserve(6 * Loops.size());
2072  for (CanonicalLoopInfo *Loop : Loops)
2073  Loop->collectControlBlocks(OldControlBBs);
2074  removeUnusedBlocksFromParent(OldControlBBs);
2075 
2076  for (CanonicalLoopInfo *L : Loops)
2077  L->invalidate();
2078 
2079 #ifndef NDEBUG
2080  for (CanonicalLoopInfo *GenL : Result)
2081  GenL->assertOK();
2082 #endif
2083  return Result;
2084 }
2085 
2086 /// Attach loop metadata \p Properties to the loop described by \p Loop. If the
2087 /// loop already has metadata, the loop properties are appended.
2089  ArrayRef<Metadata *> Properties) {
2090  assert(Loop->isValid() && "Expecting a valid CanonicalLoopInfo");
2091 
2092  // Nothing to do if no property to attach.
2093  if (Properties.empty())
2094  return;
2095 
2096  LLVMContext &Ctx = Loop->getFunction()->getContext();
2097  SmallVector<Metadata *> NewLoopProperties;
2098  NewLoopProperties.push_back(nullptr);
2099 
2100  // If the loop already has metadata, prepend it to the new metadata.
2101  BasicBlock *Latch = Loop->getLatch();
2102  assert(Latch && "A valid CanonicalLoopInfo must have a unique latch");
2103  MDNode *Existing = Latch->getTerminator()->getMetadata(LLVMContext::MD_loop);
2104  if (Existing)
2105  append_range(NewLoopProperties, drop_begin(Existing->operands(), 1));
2106 
2107  append_range(NewLoopProperties, Properties);
2108  MDNode *LoopID = MDNode::getDistinct(Ctx, NewLoopProperties);
2109  LoopID->replaceOperandWith(0, LoopID);
2110 
2111  Latch->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID);
2112 }
2113 
2115  LLVMContext &Ctx = Builder.getContext();
2117  Loop, {MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
2118  MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.full"))});
2119 }
2120 
2122  LLVMContext &Ctx = Builder.getContext();
2124  Loop, {
2125  MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
2126  });
2127 }
2128 
2129 /// Create the TargetMachine object to query the backend for optimization
2130 /// preferences.
2131 ///
2132 /// Ideally, this would be passed from the front-end to the OpenMPBuilder, but
2133 /// e.g. Clang does not pass it to its CodeGen layer and creates it only when
2134 /// needed for the LLVM pass pipline. We use some default options to avoid
2135 /// having to pass too many settings from the frontend that probably do not
2136 /// matter.
2137 ///
2138 /// Currently, TargetMachine is only used sometimes by the unrollLoopPartial
2139 /// method. If we are going to use TargetMachine for more purposes, especially
2140 /// those that are sensitive to TargetOptions, RelocModel and CodeModel, it
2141 /// might become be worth requiring front-ends to pass on their TargetMachine,
2142 /// or at least cache it between methods. Note that while fontends such as Clang
2143 /// have just a single main TargetMachine per translation unit, "target-cpu" and
2144 /// "target-features" that determine the TargetMachine are per-function and can
2145 /// be overrided using __attribute__((target("OPTIONS"))).
2146 static std::unique_ptr<TargetMachine>
2148  Module *M = F->getParent();
2149 
2150  StringRef CPU = F->getFnAttribute("target-cpu").getValueAsString();
2151  StringRef Features = F->getFnAttribute("target-features").getValueAsString();
2152  const std::string &Triple = M->getTargetTriple();
2153 
2154  std::string Error;
2156  if (!TheTarget)
2157  return {};
2158 
2160  return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
2161  Triple, CPU, Features, Options, /*RelocModel=*/None, /*CodeModel=*/None,
2162  OptLevel));
2163 }
2164 
2165 /// Heuristically determine the best-performant unroll factor for \p CLI. This
2166 /// depends on the target processor. We are re-using the same heuristics as the
2167 /// LoopUnrollPass.
2169  Function *F = CLI->getFunction();
2170 
2171  // Assume the user requests the most aggressive unrolling, even if the rest of
2172  // the code is optimized using a lower setting.
2174  std::unique_ptr<TargetMachine> TM = createTargetMachine(F, OptLevel);
2175 
2177  FAM.registerPass([]() { return TargetLibraryAnalysis(); });
2178  FAM.registerPass([]() { return AssumptionAnalysis(); });
2179  FAM.registerPass([]() { return DominatorTreeAnalysis(); });
2180  FAM.registerPass([]() { return LoopAnalysis(); });
2181  FAM.registerPass([]() { return ScalarEvolutionAnalysis(); });
2182  FAM.registerPass([]() { return PassInstrumentationAnalysis(); });
2183  TargetIRAnalysis TIRA;
2184  if (TM)
2185  TIRA = TargetIRAnalysis(
2186  [&](const Function &F) { return TM->getTargetTransformInfo(F); });
2187  FAM.registerPass([&]() { return TIRA; });
2188 
2189  TargetIRAnalysis::Result &&TTI = TIRA.run(*F, FAM);
2191  ScalarEvolution &&SE = SEA.run(*F, FAM);
2193  DominatorTree &&DT = DTA.run(*F, FAM);
2194  LoopAnalysis LIA;
2195  LoopInfo &&LI = LIA.run(*F, FAM);
2196  AssumptionAnalysis ACT;
2197  AssumptionCache &&AC = ACT.run(*F, FAM);
2199 
2200  Loop *L = LI.getLoopFor(CLI->getHeader());
2201  assert(L && "Expecting CanonicalLoopInfo to be recognized as a loop");
2202 
2205  /*BlockFrequencyInfo=*/nullptr,
2206  /*ProfileSummaryInfo=*/nullptr, ORE, OptLevel,
2207  /*UserThreshold=*/None,
2208  /*UserCount=*/None,
2209  /*UserAllowPartial=*/true,
2210  /*UserAllowRuntime=*/true,
2211  /*UserUpperBound=*/None,
2212  /*UserFullUnrollMaxCount=*/None);
2213 
2214  UP.Force = true;
2215 
2216  // Account for additional optimizations taking place before the LoopUnrollPass
2217  // would unroll the loop.
2220 
2221  // Use normal unroll factors even if the rest of the code is optimized for
2222  // size.
2223  UP.OptSizeThreshold = UP.Threshold;
2225 
2226  LLVM_DEBUG(dbgs() << "Unroll heuristic thresholds:\n"
2227  << " Threshold=" << UP.Threshold << "\n"
2228  << " PartialThreshold=" << UP.PartialThreshold << "\n"
2229  << " OptSizeThreshold=" << UP.OptSizeThreshold << "\n"
2230  << " PartialOptSizeThreshold="
2231  << UP.PartialOptSizeThreshold << "\n");
2232 
2233  // Disable peeling.
2236  /*UserAllowPeeling=*/false,
2237  /*UserAllowProfileBasedPeeling=*/false,
2238  /*UserUnrollingSpecficValues=*/false);
2239 
2241  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
2242 
2243  // Assume that reads and writes to stack variables can be eliminated by
2244  // Mem2Reg, SROA or LICM. That is, don't count them towards the loop body's
2245  // size.
2246  for (BasicBlock *BB : L->blocks()) {
2247  for (Instruction &I : *BB) {
2248  Value *Ptr;
2249  if (auto *Load = dyn_cast<LoadInst>(&I)) {
2250  Ptr = Load->getPointerOperand();
2251  } else if (auto *Store = dyn_cast<StoreInst>(&I)) {
2252  Ptr = Store->getPointerOperand();
2253  } else
2254  continue;
2255 
2256  Ptr = Ptr->stripPointerCasts();
2257 
2258  if (auto *Alloca = dyn_cast<AllocaInst>(Ptr)) {
2259  if (Alloca->getParent() == &F->getEntryBlock())
2260  EphValues.insert(&I);
2261  }
2262  }
2263  }
2264 
2265  unsigned NumInlineCandidates;
2266  bool NotDuplicatable;
2267  bool Convergent;
2268  unsigned LoopSize =
2269  ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
2270  TTI, EphValues, UP.BEInsns);
2271  LLVM_DEBUG(dbgs() << "Estimated loop size is " << LoopSize << "\n");
2272 
2273  // Loop is not unrollable if the loop contains certain instructions.
2274  if (NotDuplicatable || Convergent) {
2275  LLVM_DEBUG(dbgs() << "Loop not considered unrollable\n");
2276  return 1;
2277  }
2278 
2279  // TODO: Determine trip count of \p CLI if constant, computeUnrollCount might
2280  // be able to use it.
2281  int TripCount = 0;
2282  int MaxTripCount = 0;
2283  bool MaxOrZero = false;
2284  unsigned TripMultiple = 0;
2285 
2286  bool UseUpperBound = false;
2287  computeUnrollCount(L, TTI, DT, &LI, SE, EphValues, &ORE, TripCount,
2288  MaxTripCount, MaxOrZero, TripMultiple, LoopSize, UP, PP,
2289  UseUpperBound);
2290  unsigned Factor = UP.Count;
2291  LLVM_DEBUG(dbgs() << "Suggesting unroll factor of " << Factor << "\n");
2292 
2293  // This function returns 1 to signal to not unroll a loop.
2294  if (Factor == 0)
2295  return 1;
2296  return Factor;
2297 }
2298 
2300  int32_t Factor,
2301  CanonicalLoopInfo **UnrolledCLI) {
2302  assert(Factor >= 0 && "Unroll factor must not be negative");
2303 
2304  Function *F = Loop->getFunction();
2305  LLVMContext &Ctx = F->getContext();
2306 
2307  // If the unrolled loop is not used for another loop-associated directive, it
2308  // is sufficient to add metadata for the LoopUnrollPass.
2309  if (!UnrolledCLI) {
2310  SmallVector<Metadata *, 2> LoopMetadata;
2311  LoopMetadata.push_back(
2312  MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")));
2313 
2314  if (Factor >= 1) {
2316  ConstantInt::get(Type::getInt32Ty(Ctx), APInt(32, Factor)));
2317  LoopMetadata.push_back(MDNode::get(
2318  Ctx, {MDString::get(Ctx, "llvm.loop.unroll.count"), FactorConst}));
2319  }
2320 
2321  addLoopMetadata(Loop, LoopMetadata);
2322  return;
2323  }
2324 
2325  // Heuristically determine the unroll factor.
2326  if (Factor == 0)
2328 
2329  // No change required with unroll factor 1.
2330  if (Factor == 1) {
2331  *UnrolledCLI = Loop;
2332  return;
2333  }
2334 
2335  assert(Factor >= 2 &&
2336  "unrolling only makes sense with a factor of 2 or larger");
2337 
2338  Type *IndVarTy = Loop->getIndVarType();
2339 
2340  // Apply partial unrolling by tiling the loop by the unroll-factor, then fully
2341  // unroll the inner loop.
2342  Value *FactorVal =
2343  ConstantInt::get(IndVarTy, APInt(IndVarTy->getIntegerBitWidth(), Factor,
2344  /*isSigned=*/false));
2345  std::vector<CanonicalLoopInfo *> LoopNest =
2346  tileLoops(DL, {Loop}, {FactorVal});
2347  assert(LoopNest.size() == 2 && "Expect 2 loops after tiling");
2348  *UnrolledCLI = LoopNest[0];
2349  CanonicalLoopInfo *InnerLoop = LoopNest[1];
2350 
2351  // LoopUnrollPass can only fully unroll loops with constant trip count.
2352  // Unroll by the unroll factor with a fallback epilog for the remainder
2353  // iterations if necessary.
2355  ConstantInt::get(Type::getInt32Ty(Ctx), APInt(32, Factor)));
2357  InnerLoop,
2358  {MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
2359  MDNode::get(
2360  Ctx, {MDString::get(Ctx, "llvm.loop.unroll.count"), FactorConst})});
2361 
2362 #ifndef NDEBUG
2363  (*UnrolledCLI)->assertOK();
2364 #endif
2365 }
2366 
2369  llvm::Value *BufSize, llvm::Value *CpyBuf,
2370  llvm::Value *CpyFn, llvm::Value *DidIt) {
2371  if (!updateToLocation(Loc))
2372  return Loc.IP;
2373 
2374  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2375  Value *Ident = getOrCreateIdent(SrcLocStr);
2376  Value *ThreadId = getOrCreateThreadID(Ident);
2377 
2378  llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt);
2379 
2380  Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
2381 
2382  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate);
2383  Builder.CreateCall(Fn, Args);
2384 
2385  return Builder.saveIP();
2386 }
2387 
2390  BodyGenCallbackTy BodyGenCB,
2391  FinalizeCallbackTy FiniCB, llvm::Value *DidIt) {
2392 
2393  if (!updateToLocation(Loc))
2394  return Loc.IP;
2395 
2396  // If needed (i.e. not null), initialize `DidIt` with 0
2397  if (DidIt) {
2398  Builder.CreateStore(Builder.getInt32(0), DidIt);
2399  }
2400 
2401  Directive OMPD = Directive::OMPD_single;
2402  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2403  Value *Ident = getOrCreateIdent(SrcLocStr);
2404  Value *ThreadId = getOrCreateThreadID(Ident);
2405  Value *Args[] = {Ident, ThreadId};
2406 
2407  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single);
2408  Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
2409 
2410  Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
2411  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
2412 
2413  // generates the following:
2414  // if (__kmpc_single()) {
2415  // .... single region ...
2416  // __kmpc_end_single
2417  // }
2418 
2419  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
2420  /*Conditional*/ true, /*hasFinalize*/ true);
2421 }
2422 
2424  const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
2425  FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) {
2426 
2427  if (!updateToLocation(Loc))
2428  return Loc.IP;
2429 
2430  Directive OMPD = Directive::OMPD_critical;
2431  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2432  Value *Ident = getOrCreateIdent(SrcLocStr);
2433  Value *ThreadId = getOrCreateThreadID(Ident);
2434  Value *LockVar = getOMPCriticalRegionLock(CriticalName);
2435  Value *Args[] = {Ident, ThreadId, LockVar};
2436 
2438  Function *RTFn = nullptr;
2439  if (HintInst) {
2440  // Add Hint to entry Args and create call
2441  EnterArgs.push_back(HintInst);
2442  RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint);
2443  } else {
2444  RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical);
2445  }
2446  Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs);
2447 
2448  Function *ExitRTLFn =
2449  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical);
2450  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
2451 
2452  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
2453  /*Conditional*/ false, /*hasFinalize*/ true);
2454 }
2455 
2458  InsertPointTy AllocaIP, unsigned NumLoops,
2459  ArrayRef<llvm::Value *> StoreValues,
2460  const Twine &Name, bool IsDependSource) {
2461  if (!updateToLocation(Loc))
2462  return Loc.IP;
2463 
2464  // Allocate space for vector and generate alloc instruction.
2465  auto *ArrI64Ty = ArrayType::get(Int64, NumLoops);
2466  Builder.restoreIP(AllocaIP);
2467  AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI64Ty, nullptr, Name);
2468  ArgsBase->setAlignment(Align(8));
2469  Builder.restoreIP(Loc.IP);
2470 
2471  // Store the index value with offset in depend vector.
2472  for (unsigned I = 0; I < NumLoops; ++I) {
2473  Value *DependAddrGEPIter = Builder.CreateInBoundsGEP(
2474  ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(I)});
2475  Builder.CreateStore(StoreValues[I], DependAddrGEPIter);
2476  }
2477 
2478  Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP(
2479  ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)});
2480 
2481  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2482  Value *Ident = getOrCreateIdent(SrcLocStr);
2483  Value *ThreadId = getOrCreateThreadID(Ident);
2484  Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
2485 
2486  Function *RTLFn = nullptr;
2487  if (IsDependSource)
2488  RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_post);
2489  else
2490  RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_wait);
2491  Builder.CreateCall(RTLFn, Args);
2492 
2493  return Builder.saveIP();
2494 }
2495 
2497  const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
2498  FinalizeCallbackTy FiniCB, bool IsThreads) {
2499  if (!updateToLocation(Loc))
2500  return Loc.IP;
2501 
2502  Directive OMPD = Directive::OMPD_ordered;
2503  Instruction *EntryCall = nullptr;
2504  Instruction *ExitCall = nullptr;
2505 
2506  if (IsThreads) {
2507  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2508  Value *Ident = getOrCreateIdent(SrcLocStr);
2509  Value *ThreadId = getOrCreateThreadID(Ident);
2510  Value *Args[] = {Ident, ThreadId};
2511 
2512  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered);
2513  EntryCall = Builder.CreateCall(EntryRTLFn, Args);
2514 
2515  Function *ExitRTLFn =
2516  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered);
2517  ExitCall = Builder.CreateCall(ExitRTLFn, Args);
2518  }
2519 
2520  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
2521  /*Conditional*/ false, /*hasFinalize*/ true);
2522 }
2523 
2524 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion(
2525  Directive OMPD, Instruction *EntryCall, Instruction *ExitCall,
2526  BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional,
2527  bool HasFinalize, bool IsCancellable) {
2528 
2529  if (HasFinalize)
2530  FinalizationStack.push_back({FiniCB, OMPD, IsCancellable});
2531 
2532  // Create inlined region's entry and body blocks, in preparation
2533  // for conditional creation
2534  BasicBlock *EntryBB = Builder.GetInsertBlock();
2535  Instruction *SplitPos = EntryBB->getTerminator();
2536  if (!isa_and_nonnull<BranchInst>(SplitPos))
2537  SplitPos = new UnreachableInst(Builder.getContext(), EntryBB);
2538  BasicBlock *ExitBB = EntryBB->splitBasicBlock(SplitPos, "omp_region.end");
2539  BasicBlock *FiniBB =
2540  EntryBB->splitBasicBlock(EntryBB->getTerminator(), "omp_region.finalize");
2541 
2542  Builder.SetInsertPoint(EntryBB->getTerminator());
2543  emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
2544 
2545  // generate body
2546  BodyGenCB(/* AllocaIP */ InsertPointTy(),
2547  /* CodeGenIP */ Builder.saveIP(), *FiniBB);
2548 
2549  // If we didn't emit a branch to FiniBB during body generation, it means
2550  // FiniBB is unreachable (e.g. while(1);). stop generating all the
2551  // unreachable blocks, and remove anything we are not going to use.
2552  auto SkipEmittingRegion = FiniBB->hasNPredecessors(0);
2553  if (SkipEmittingRegion) {
2554  FiniBB->eraseFromParent();
2555  ExitCall->eraseFromParent();
2556  // Discard finalization if we have it.
2557  if (HasFinalize) {
2558  assert(!FinalizationStack.empty() &&
2559  "Unexpected finalization stack state!");
2560  FinalizationStack.pop_back();
2561  }
2562  } else {
2563  // emit exit call and do any needed finalization.
2564  auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt());
2565  assert(FiniBB->getTerminator()->getNumSuccessors() == 1 &&
2566  FiniBB->getTerminator()->getSuccessor(0) == ExitBB &&
2567  "Unexpected control flow graph state!!");
2568  emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
2569  assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB &&
2570  "Unexpected Control Flow State!");
2571  MergeBlockIntoPredecessor(FiniBB);
2572  }
2573 
2574  // If we are skipping the region of a non conditional, remove the exit
2575  // block, and clear the builder's insertion point.
2576  assert(SplitPos->getParent() == ExitBB &&
2577  "Unexpected Insertion point location!");
2578  if (!Conditional && SkipEmittingRegion) {
2579  ExitBB->eraseFromParent();
2580  Builder.ClearInsertionPoint();
2581  } else {
2582  auto merged = MergeBlockIntoPredecessor(ExitBB);
2583  BasicBlock *ExitPredBB = SplitPos->getParent();
2584  auto InsertBB = merged ? ExitPredBB : ExitBB;
2585  if (!isa_and_nonnull<BranchInst>(SplitPos))
2586  SplitPos->eraseFromParent();
2587  Builder.SetInsertPoint(InsertBB);
2588  }
2589 
2590  return Builder.saveIP();
2591 }
2592 
2593 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry(
2594  Directive OMPD, Value *EntryCall, BasicBlock *ExitBB, bool Conditional) {
2595  // if nothing to do, Return current insertion point.
2596  if (!Conditional || !EntryCall)
2597  return Builder.saveIP();
2598 
2599  BasicBlock *EntryBB = Builder.GetInsertBlock();
2600  Value *CallBool = Builder.CreateIsNotNull(EntryCall);
2601  auto *ThenBB = BasicBlock::Create(M.getContext(), "omp_region.body");
2602  auto *UI = new UnreachableInst(Builder.getContext(), ThenBB);
2603 
2604  // Emit thenBB and set the Builder's insertion point there for
2605  // body generation next. Place the block after the current block.
2606  Function *CurFn = EntryBB->getParent();
2607  CurFn->getBasicBlockList().insertAfter(EntryBB->getIterator(), ThenBB);
2608 
2609  // Move Entry branch to end of ThenBB, and replace with conditional
2610  // branch (If-stmt)
2611  Instruction *EntryBBTI = EntryBB->getTerminator();
2612  Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
2613  EntryBBTI->removeFromParent();
2614  Builder.SetInsertPoint(UI);
2615  Builder.Insert(EntryBBTI);
2616  UI->eraseFromParent();
2617  Builder.SetInsertPoint(ThenBB->getTerminator());
2618 
2619  // return an insertion point to ExitBB.
2620  return IRBuilder<>::InsertPoint(ExitBB, ExitBB->getFirstInsertionPt());
2621 }
2622 
2623 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit(
2624  omp::Directive OMPD, InsertPointTy FinIP, Instruction *ExitCall,
2625  bool HasFinalize) {
2626 
2627  Builder.restoreIP(FinIP);
2628 
2629  // If there is finalization to do, emit it before the exit call
2630  if (HasFinalize) {
2631  assert(!FinalizationStack.empty() &&
2632  "Unexpected finalization stack state!");
2633 
2634  FinalizationInfo Fi = FinalizationStack.pop_back_val();
2635  assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!");
2636 
2637  Fi.FiniCB(FinIP);
2638 
2639  BasicBlock *FiniBB = FinIP.getBlock();
2640  Instruction *FiniBBTI = FiniBB->getTerminator();
2641 
2642  // set Builder IP for call creation
2643  Builder.SetInsertPoint(FiniBBTI);
2644  }
2645 
2646  if (!ExitCall)
2647  return Builder.saveIP();
2648 
2649  // place the Exitcall as last instruction before Finalization block terminator
2650  ExitCall->removeFromParent();
2651  Builder.Insert(ExitCall);
2652 
2653  return IRBuilder<>::InsertPoint(ExitCall->getParent(),
2654  ExitCall->getIterator());
2655 }
2656 
2658  InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr,
2659  llvm::IntegerType *IntPtrTy, bool BranchtoEnd) {
2660  if (!IP.isSet())
2661  return IP;
2662 
2664 
2665  // creates the following CFG structure
2666  // OMP_Entry : (MasterAddr != PrivateAddr)?
2667  // F T
2668  // | \
2669  // | copin.not.master
2670  // | /
2671  // v /
2672  // copyin.not.master.end
2673  // |
2674  // v
2675  // OMP.Entry.Next
2676 
2677  BasicBlock *OMP_Entry = IP.getBlock();
2678  Function *CurFn = OMP_Entry->getParent();
2679  BasicBlock *CopyBegin =
2680  BasicBlock::Create(M.getContext(), "copyin.not.master", CurFn);
2681  BasicBlock *CopyEnd = nullptr;
2682 
2683  // If entry block is terminated, split to preserve the branch to following
2684  // basic block (i.e. OMP.Entry.Next), otherwise, leave everything as is.
2685  if (isa_and_nonnull<BranchInst>(OMP_Entry->getTerminator())) {
2686  CopyEnd = OMP_Entry->splitBasicBlock(OMP_Entry->getTerminator(),
2687  "copyin.not.master.end");
2688  OMP_Entry->getTerminator()->eraseFromParent();
2689  } else {
2690  CopyEnd =
2691  BasicBlock::Create(M.getContext(), "copyin.not.master.end", CurFn);
2692  }
2693 
2694  Builder.SetInsertPoint(OMP_Entry);
2695  Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
2696  Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
2697  Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr);
2698  Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
2699 
2700  Builder.SetInsertPoint(CopyBegin);
2701  if (BranchtoEnd)
2702  Builder.SetInsertPoint(Builder.CreateBr(CopyEnd));
2703 
2704  return Builder.saveIP();
2705 }
2706 
2709  std::string Name) {
2711  Builder.restoreIP(Loc.IP);
2712 
2713  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2714  Value *Ident = getOrCreateIdent(SrcLocStr);
2715  Value *ThreadId = getOrCreateThreadID(Ident);
2716  Value *Args[] = {ThreadId, Size, Allocator};
2717 
2718  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc);
2719 
2720  return Builder.CreateCall(Fn, Args, Name);
2721 }
2722 
2725  std::string Name) {
2727  Builder.restoreIP(Loc.IP);
2728 
2729  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2730  Value *Ident = getOrCreateIdent(SrcLocStr);
2731  Value *ThreadId = getOrCreateThreadID(Ident);
2732  Value *Args[] = {ThreadId, Addr, Allocator};
2733  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free);
2734  return Builder.CreateCall(Fn, Args, Name);
2735 }
2736 
2738  const LocationDescription &Loc, llvm::Value *Pointer,
2741  Builder.restoreIP(Loc.IP);
2742 
2743  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2744  Value *Ident = getOrCreateIdent(SrcLocStr);
2745  Value *ThreadId = getOrCreateThreadID(Ident);
2746  Constant *ThreadPrivateCache =
2747  getOrCreateOMPInternalVariable(Int8PtrPtr, Name);
2748  llvm::Value *Args[] = {Ident, ThreadId, Pointer, Size, ThreadPrivateCache};
2749 
2750  Function *Fn =
2751  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached);
2752 
2753  return Builder.CreateCall(Fn, Args);
2754 }
2755 
2757 OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime) {
2758  if (!updateToLocation(Loc))
2759  return Loc.IP;
2760 
2761  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2762  Value *Ident = getOrCreateIdent(SrcLocStr);
2763  ConstantInt *IsSPMDVal = ConstantInt::getBool(Int32->getContext(), IsSPMD);
2764  ConstantInt *UseGenericStateMachine =
2765  ConstantInt::getBool(Int32->getContext(), !IsSPMD);
2766  ConstantInt *RequiresFullRuntimeVal = ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
2767 
2768  Function *Fn = getOrCreateRuntimeFunctionPtr(
2769  omp::RuntimeFunction::OMPRTL___kmpc_target_init);
2770 
2771  CallInst *ThreadKind =
2772  Builder.CreateCall(Fn, {Ident, IsSPMDVal, UseGenericStateMachine, RequiresFullRuntimeVal});
2773 
2774  Value *ExecUserCode = Builder.CreateICmpEQ(
2775  ThreadKind, ConstantInt::get(ThreadKind->getType(), -1), "exec_user_code");
2776 
2777  // ThreadKind = __kmpc_target_init(...)
2778  // if (ThreadKind == -1)
2779  // user_code
2780  // else
2781  // return;
2782 
2783  auto *UI = Builder.CreateUnreachable();
2784  BasicBlock *CheckBB = UI->getParent();
2785  BasicBlock *UserCodeEntryBB = CheckBB->splitBasicBlock(UI, "user_code.entry");
2786 
2787  BasicBlock *WorkerExitBB = BasicBlock::Create(
2788  CheckBB->getContext(), "worker.exit", CheckBB->getParent());
2789  Builder.SetInsertPoint(WorkerExitBB);
2790  Builder.CreateRetVoid();
2791 
2792  auto *CheckBBTI = CheckBB->getTerminator();
2793  Builder.SetInsertPoint(CheckBBTI);
2794  Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
2795 
2796  CheckBBTI->eraseFromParent();
2797  UI->eraseFromParent();
2798 
2799  // Continue in the "user_code" block, see diagram above and in
2800  // openmp/libomptarget/deviceRTLs/common/include/target.h .
2801  return InsertPointTy(UserCodeEntryBB, UserCodeEntryBB->getFirstInsertionPt());
2802 }
2803 
2805  bool IsSPMD, bool RequiresFullRuntime) {
2806  if (!updateToLocation(Loc))
2807  return;
2808 
2809  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2810  Value *Ident = getOrCreateIdent(SrcLocStr);
2811  ConstantInt *IsSPMDVal = ConstantInt::getBool(Int32->getContext(), IsSPMD);
2812  ConstantInt *RequiresFullRuntimeVal = ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
2813 
2814  Function *Fn = getOrCreateRuntimeFunctionPtr(
2815  omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
2816 
2817  Builder.CreateCall(Fn, {Ident, IsSPMDVal, RequiresFullRuntimeVal});
2818 }
2819 
2820 std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts,
2821  StringRef FirstSeparator,
2822  StringRef Separator) {
2823  SmallString<128> Buffer;
2824  llvm::raw_svector_ostream OS(Buffer);
2825  StringRef Sep = FirstSeparator;
2826  for (StringRef Part : Parts) {
2827  OS << Sep << Part;
2828  Sep = Separator;
2829  }
2830  return OS.str().str();
2831 }
2832 
2833 Constant *OpenMPIRBuilder::getOrCreateOMPInternalVariable(
2834  llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2835  // TODO: Replace the twine arg with stringref to get rid of the conversion
2836  // logic. However This is taken from current implementation in clang as is.
2837  // Since this method is used in many places exclusively for OMP internal use
2838  // we will keep it as is for temporarily until we move all users to the
2839  // builder and then, if possible, fix it everywhere in one go.
2840  SmallString<256> Buffer;
2841  llvm::raw_svector_ostream Out(Buffer);
2842  Out << Name;
2843  StringRef RuntimeName = Out.str();
2844  auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2845  if (Elem.second) {
2846  assert(Elem.second->getType()->getPointerElementType() == Ty &&
2847  "OMP internal variable has different type than requested");
2848  } else {
2849  // TODO: investigate the appropriate linkage type used for the global
2850  // variable for possibly changing that to internal or private, or maybe
2851  // create different versions of the function for different OMP internal
2852  // variables.
2853  Elem.second = new llvm::GlobalVariable(
2854  M, Ty, /*IsConstant*/ false, llvm::GlobalValue::CommonLinkage,
2855  llvm::Constant::getNullValue(Ty), Elem.first(),
2856  /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
2857  AddressSpace);
2858  }
2859 
2860  return Elem.second;
2861 }
2862 
2863 Value *OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName) {
2864  std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2865  std::string Name = getNameWithSeparators({Prefix, "var"}, ".", ".");
2866  return getOrCreateOMPInternalVariable(KmpCriticalNameTy, Name);
2867 }
2868 
2871  std::string VarName) {
2872  llvm::Constant *MaptypesArrayInit =
2873  llvm::ConstantDataArray::get(M.getContext(), Mappings);
2874  auto *MaptypesArrayGlobal = new llvm::GlobalVariable(
2875  M, MaptypesArrayInit->getType(),
2876  /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MaptypesArrayInit,
2877  VarName);
2878  MaptypesArrayGlobal->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
2879  return MaptypesArrayGlobal;
2880 }
2881 
2883  InsertPointTy AllocaIP,
2884  unsigned NumOperands,
2885  struct MapperAllocas &MapperAllocas) {
2886  if (!updateToLocation(Loc))
2887  return;
2888 
2889  auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands);
2890  auto *ArrI64Ty = ArrayType::get(Int64, NumOperands);
2891  Builder.restoreIP(AllocaIP);
2892  AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI8PtrTy);
2893  AllocaInst *Args = Builder.CreateAlloca(ArrI8PtrTy);
2894  AllocaInst *ArgSizes = Builder.CreateAlloca(ArrI64Ty);
2895  Builder.restoreIP(Loc.IP);
2896  MapperAllocas.ArgsBase = ArgsBase;
2898  MapperAllocas.ArgSizes = ArgSizes;
2899 }
2900 
2902  Function *MapperFunc, Value *SrcLocInfo,
2903  Value *MaptypesArg, Value *MapnamesArg,
2904  struct MapperAllocas &MapperAllocas,
2905  int64_t DeviceID, unsigned NumOperands) {
2906  if (!updateToLocation(Loc))
2907  return;
2908 
2909  auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands);
2910  auto *ArrI64Ty = ArrayType::get(Int64, NumOperands);
2911  Value *ArgsBaseGEP =
2912  Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.ArgsBase,
2913  {Builder.getInt32(0), Builder.getInt32(0)});
2914  Value *ArgsGEP =
2915  Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.Args,
2916  {Builder.getInt32(0), Builder.getInt32(0)});
2917  Value *ArgSizesGEP =
2918  Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes,
2919  {Builder.getInt32(0), Builder.getInt32(0)});
2920  Value *NullPtr = Constant::getNullValue(Int8Ptr->getPointerTo());
2921  Builder.CreateCall(MapperFunc,
2922  {SrcLocInfo, Builder.getInt64(DeviceID),
2923  Builder.getInt32(NumOperands), ArgsBaseGEP, ArgsGEP,
2924  ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr});
2925 }
2926 
2927 bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
2928  const LocationDescription &Loc, llvm::AtomicOrdering AO, AtomicKind AK) {
2931  "Unexpected Atomic Ordering.");
2932 
2933  bool Flush = false;
2935 
2936  switch (AK) {
2937  case Read:
2940  FlushAO = AtomicOrdering::Acquire;
2941  Flush = true;
2942  }
2943  break;
2944  case Write:
2945  case Update:
2948  FlushAO = AtomicOrdering::Release;
2949  Flush = true;
2950  }
2951  break;
2952  case Capture:
2953  switch (AO) {
2955  FlushAO = AtomicOrdering::Acquire;
2956  Flush = true;
2957  break;
2959  FlushAO = AtomicOrdering::Release;
2960  Flush = true;
2961  break;
2965  Flush = true;
2966  break;
2967  default:
2968  // do nothing - leave silently.
2969  break;
2970  }
2971  }
2972 
2973  if (Flush) {
2974  // Currently Flush RT call still doesn't take memory_ordering, so for when
2975  // that happens, this tries to do the resolution of which atomic ordering
2976  // to use with but issue the flush call
2977  // TODO: pass `FlushAO` after memory ordering support is added
2978  (void)FlushAO;
2979  emitFlush(Loc);
2980  }
2981 
2982  // for AO == AtomicOrdering::Monotonic and all other case combinations
2983  // do nothing
2984  return Flush;
2985 }
2986 
2990  AtomicOrdering AO) {
2991  if (!updateToLocation(Loc))
2992  return Loc.IP;
2993 
2994  Type *XTy = X.Var->getType();
2995  assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory");
2996  Type *XElemTy = XTy->getPointerElementType();
2997  assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
2998  XElemTy->isPointerTy()) &&
2999  "OMP atomic read expected a scalar type");
3000 
3001  Value *XRead = nullptr;
3002 
3003  if (XElemTy->isIntegerTy()) {
3004  LoadInst *XLD =
3005  Builder.CreateLoad(XElemTy, X.Var, X.IsVolatile, "omp.atomic.read");
3006  XLD->setAtomic(AO);
3007  XRead = cast<Value>(XLD);
3008  } else {
3009  // We need to bitcast and perform atomic op as integer
3010  unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace();
3011  IntegerType *IntCastTy =
3012  IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
3013  Value *XBCast = Builder.CreateBitCast(
3014  X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.src.int.cast");
3015  LoadInst *XLoad =
3016  Builder.CreateLoad(IntCastTy, XBCast, X.IsVolatile, "omp.atomic.load");
3017  XLoad->setAtomic(AO);
3018  if (XElemTy->isFloatingPointTy()) {
3019  XRead = Builder.CreateBitCast(XLoad, XElemTy, "atomic.flt.cast");
3020  } else {
3021  XRead = Builder.CreateIntToPtr(XLoad, XElemTy, "atomic.ptr.cast");
3022  }
3023  }
3024  checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read);
3025  Builder.CreateStore(XRead, V.Var, V.IsVolatile);
3026  return Builder.saveIP();
3027 }
3028 
3031  AtomicOpValue &X, Value *Expr,
3032  AtomicOrdering AO) {
3033  if (!updateToLocation(Loc))
3034  return Loc.IP;
3035 
3036  Type *XTy = X.Var->getType();
3037  assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory");
3038  Type *XElemTy = XTy->getPointerElementType();
3039  assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
3040  XElemTy->isPointerTy()) &&
3041  "OMP atomic write expected a scalar type");
3042 
3043  if (XElemTy->isIntegerTy()) {
3044  StoreInst *XSt = Builder.CreateStore(Expr, X.Var, X.IsVolatile);
3045  XSt->setAtomic(AO);
3046  } else {
3047  // We need to bitcast and perform atomic op as integers
3048  unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace();
3049  IntegerType *IntCastTy =
3050  IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
3051  Value *XBCast = Builder.CreateBitCast(
3052  X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.dst.int.cast");
3053  Value *ExprCast =
3054  Builder.CreateBitCast(Expr, IntCastTy, "atomic.src.int.cast");
3055  StoreInst *XSt = Builder.CreateStore(ExprCast, XBCast, X.IsVolatile);
3056  XSt->setAtomic(AO);
3057  }
3058 
3059  checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Write);
3060  return Builder.saveIP();
3061 }
3062 
3064  const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X,
3065  Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
3066  AtomicUpdateCallbackTy &UpdateOp, bool IsXLHSInRHSPart) {
3067  if (!updateToLocation(Loc))
3068  return Loc.IP;
3069 
3070  LLVM_DEBUG({
3071  Type *XTy = X.Var->getType();
3072  assert(XTy->isPointerTy() &&
3073  "OMP Atomic expects a pointer to target memory");
3074  Type *XElemTy = XTy->getPointerElementType();
3075  assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
3076  XElemTy->isPointerTy()) &&
3077  "OMP atomic update expected a scalar type");
3078  assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) &&
3079  (RMWOp != AtomicRMWInst::UMax) && (RMWOp != AtomicRMWInst::UMin) &&
3080  "OpenMP atomic does not support LT or GT operations");
3081  });
3082 
3083  emitAtomicUpdate(AllocIP, X.Var, Expr, AO, RMWOp, UpdateOp, X.IsVolatile,
3084  IsXLHSInRHSPart);
3085  checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
3086  return Builder.saveIP();
3087 }
3088 
3089 Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2,
3090  AtomicRMWInst::BinOp RMWOp) {
3091  switch (RMWOp) {
3092  case AtomicRMWInst::Add:
3093  return Builder.CreateAdd(Src1, Src2);
3094  case AtomicRMWInst::Sub:
3095  return Builder.CreateSub(Src1, Src2);
3096  case AtomicRMWInst::And:
3097  return Builder.CreateAnd(Src1, Src2);
3098  case AtomicRMWInst::Nand:
3099  return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
3100  case AtomicRMWInst::Or:
3101  return Builder.CreateOr(Src1, Src2);
3102  case AtomicRMWInst::Xor:
3103  return Builder.CreateXor(Src1, Src2);
3104  case AtomicRMWInst::Xchg:
3105  case AtomicRMWInst::FAdd:
3106  case AtomicRMWInst::FSub:
3108  case AtomicRMWInst::Max:
3109  case AtomicRMWInst::Min:
3110  case AtomicRMWInst::UMax:
3111  case AtomicRMWInst::UMin:
3112  llvm_unreachable("Unsupported atomic update operation");
3113  }
3114  llvm_unreachable("Unsupported atomic update operation");
3115 }
3116 
3117 std::pair<Value *, Value *>
3118 OpenMPIRBuilder::emitAtomicUpdate(Instruction *AllocIP, Value *X, Value *Expr,
3120  AtomicUpdateCallbackTy &UpdateOp,
3121  bool VolatileX, bool IsXLHSInRHSPart) {
3122  Type *XElemTy = X->getType()->getPointerElementType();
3123 
3124  bool DoCmpExch =
3125  ((RMWOp == AtomicRMWInst::BAD_BINOP) || (RMWOp == AtomicRMWInst::FAdd)) ||
3126  (RMWOp == AtomicRMWInst::FSub) ||
3127  (RMWOp == AtomicRMWInst::Sub && !IsXLHSInRHSPart);
3128 
3129  std::pair<Value *, Value *> Res;
3130  if (XElemTy->isIntegerTy() && !DoCmpExch) {
3131  Res.first = Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO);
3132  // not needed except in case of postfix captures. Generate anyway for
3133  // consistency with the else part. Will be removed with any DCE pass.
3134  Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
3135  } else {
3136  unsigned Addrspace = cast<PointerType>(X->getType())->getAddressSpace();
3137  IntegerType *IntCastTy =
3138  IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
3139  Value *XBCast =
3140  Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace));
3141  LoadInst *OldVal =
3142  Builder.CreateLoad(IntCastTy, XBCast, X->getName() + ".atomic.load");
3143  OldVal->setAtomic(AO);
3144  // CurBB
3145  // | /---\
3146  // ContBB |
3147  // | \---/
3148  // ExitBB
3149  BasicBlock *CurBB = Builder.GetInsertBlock();
3150  Instruction *CurBBTI = CurBB->getTerminator();
3151  CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
3152  BasicBlock *ExitBB =
3153  CurBB->splitBasicBlock(CurBBTI, X->getName() + ".atomic.exit");
3154  BasicBlock *ContBB = CurBB->splitBasicBlock(CurBB->getTerminator(),
3155  X->getName() + ".atomic.cont");
3156  ContBB->getTerminator()->eraseFromParent();
3157  Builder.SetInsertPoint(ContBB);
3158  llvm::PHINode *PHI = Builder.CreatePHI(OldVal->getType(), 2);
3159  PHI->addIncoming(OldVal, CurBB);
3160  AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
3161  NewAtomicAddr->setName(X->getName() + "x.new.val");
3162  NewAtomicAddr->moveBefore(AllocIP);
3163  IntegerType *NewAtomicCastTy =
3164  IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
3165  bool IsIntTy = XElemTy->isIntegerTy();
3166  Value *NewAtomicIntAddr =
3167  (IsIntTy)
3168  ? NewAtomicAddr
3169  : Builder.CreateBitCast(NewAtomicAddr,
3170  NewAtomicCastTy->getPointerTo(Addrspace));
3171  Value *OldExprVal = PHI;
3172  if (!IsIntTy) {
3173  if (XElemTy->isFloatingPointTy()) {
3174  OldExprVal = Builder.CreateBitCast(PHI, XElemTy,
3175  X->getName() + ".atomic.fltCast");
3176  } else {
3177  OldExprVal = Builder.CreateIntToPtr(PHI, XElemTy,
3178  X->getName() + ".atomic.ptrCast");
3179  }
3180  }
3181 
3182  Value *Upd = UpdateOp(OldExprVal, Builder);
3183  Builder.CreateStore(Upd, NewAtomicAddr);
3184  LoadInst *DesiredVal = Builder.CreateLoad(XElemTy, NewAtomicIntAddr);
3185  Value *XAddr =
3186  (IsIntTy)
3187  ? X
3188  : Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace));
3191  AtomicCmpXchgInst *Result = Builder.CreateAtomicCmpXchg(
3192  XAddr, OldExprVal, DesiredVal, llvm::MaybeAlign(), AO, Failure);
3193  Result->setVolatile(VolatileX);
3194  Value *PreviousVal = Builder.CreateExtractValue(Result, /*Idxs=*/0);
3195  Value *SuccessFailureVal = Builder.CreateExtractValue(Result, /*Idxs=*/1);
3196  PHI->addIncoming(PreviousVal, Builder.GetInsertBlock());
3197  Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
3198 
3199  Res.first = OldExprVal;
3200  Res.second = Upd;
3201 
3202  // set Insertion point in exit block
3203  if (UnreachableInst *ExitTI =
3204  dyn_cast<UnreachableInst>(ExitBB->getTerminator())) {
3205  CurBBTI->eraseFromParent();
3206  Builder.SetInsertPoint(ExitBB);
3207  } else {
3208  Builder.SetInsertPoint(ExitTI);
3209  }
3210  }
3211 
3212  return Res;
3213 }
3214 
3216  const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X,
3217  AtomicOpValue &V, Value *Expr, AtomicOrdering AO,
3219  bool UpdateExpr, bool IsPostfixUpdate, bool IsXLHSInRHSPart) {
3220  if (!updateToLocation(Loc))
3221  return Loc.IP;
3222 
3223  LLVM_DEBUG({
3224  Type *XTy = X.Var->getType();
3225  assert(XTy->isPointerTy() &&
3226  "OMP Atomic expects a pointer to target memory");
3227  Type *XElemTy = XTy->getPointerElementType();
3228  assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
3229  XElemTy->isPointerTy()) &&
3230  "OMP atomic capture expected a scalar type");
3231  assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) &&
3232  "OpenMP atomic does not support LT or GT operations");
3233  });
3234 
3235  // If UpdateExpr is 'x' updated with some `expr` not based on 'x',
3236  // 'x' is simply atomically rewritten with 'expr'.
3237  AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg);
3238  std::pair<Value *, Value *> Result =
3239  emitAtomicUpdate(AllocIP, X.Var, Expr, AO, AtomicOp, UpdateOp,
3240  X.IsVolatile, IsXLHSInRHSPart);
3241 
3242  Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second);
3243  Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
3244 
3245  checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Capture);
3246  return Builder.saveIP();
3247 }
3248 
3251  std::string VarName) {
3252  llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get(
3254  llvm::Type::getInt8Ty(M.getContext())->getPointerTo(), Names.size()),
3255  Names);
3256  auto *MapNamesArrayGlobal = new llvm::GlobalVariable(
3257  M, MapNamesArrayInit->getType(),
3258  /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MapNamesArrayInit,
3259  VarName);
3260  return MapNamesArrayGlobal;
3261 }
3262 
3263 // Create all simple and struct types exposed by the runtime and remember
3264 // the llvm::PointerTypes of them for easy access later.
3265 void OpenMPIRBuilder::initializeTypes(Module &M) {
3266  LLVMContext &Ctx = M.getContext();
3267  StructType *T;
3268 #define OMP_TYPE(VarName, InitValue) VarName = InitValue;
3269 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
3270  VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
3271  VarName##PtrTy = PointerType::getUnqual(VarName##Ty);
3272 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
3273  VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
3274  VarName##Ptr = PointerType::getUnqual(VarName);
3275 #define OMP_STRUCT_TYPE(VarName, StructName, ...) \
3276  T = StructType::getTypeByName(Ctx, StructName); \
3277  if (!T) \
3278  T = StructType::create(Ctx, {__VA_ARGS__}, StructName); \
3279  VarName = T; \
3280  VarName##Ptr = PointerType::getUnqual(T);
3281 #include "llvm/Frontend/OpenMP/OMPKinds.def"
3282 }
3283 
3286  SmallVectorImpl<BasicBlock *> &BlockVector) {
3288  BlockSet.insert(EntryBB);
3289  BlockSet.insert(ExitBB);
3290 
3291  Worklist.push_back(EntryBB);
3292  while (!Worklist.empty()) {
3293  BasicBlock *BB = Worklist.pop_back_val();
3294  BlockVector.push_back(BB);
3295  for (BasicBlock *SuccBB : successors(BB))
3296  if (BlockSet.insert(SuccBB).second)
3297  Worklist.push_back(SuccBB);
3298  }
3299 }
3300 
3301 void CanonicalLoopInfo::collectControlBlocks(
3303  // We only count those BBs as control block for which we do not need to
3304  // reverse the CFG, i.e. not the loop body which can contain arbitrary control
3305  // flow. For consistency, this also means we do not add the Body block, which
3306  // is just the entry to the body code.
3307  BBs.reserve(BBs.size() + 6);
3308  BBs.append({Preheader, Header, Cond, Latch, Exit, After});
3309 }
3310 
3312 #ifndef NDEBUG
3313  // No constraints if this object currently does not describe a loop.
3314  if (!isValid())
3315  return;
3316 
3317  // Verify standard control-flow we use for OpenMP loops.
3318  assert(Preheader);
3319  assert(isa<BranchInst>(Preheader->getTerminator()) &&
3320  "Preheader must terminate with unconditional branch");
3321  assert(Preheader->getSingleSuccessor() == Header &&
3322  "Preheader must jump to header");
3323 
3324  assert(Header);
3325  assert(isa<BranchInst>(Header->getTerminator()) &&
3326  "Header must terminate with unconditional branch");
3327  assert(Header->getSingleSuccessor() == Cond &&
3328  "Header must jump to exiting block");
3329 
3330  assert(Cond);
3331  assert(Cond->getSinglePredecessor() == Header &&
3332  "Exiting block only reachable from header");
3333 
3334  assert(isa<BranchInst>(Cond->getTerminator()) &&
3335  "Exiting block must terminate with conditional branch");
3336  assert(size(successors(Cond)) == 2 &&
3337  "Exiting block must have two successors");
3338  assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(0) == Body &&
3339  "Exiting block's first successor jump to the body");
3340  assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(1) == Exit &&
3341  "Exiting block's second successor must exit the loop");
3342 
3343  assert(Body);
3344  assert(Body->getSinglePredecessor() == Cond &&
3345  "Body only reachable from exiting block");
3346  assert(!isa<PHINode>(Body->front()));
3347 
3348  assert(Latch);
3349  assert(isa<BranchInst>(Latch->getTerminator()) &&
3350  "Latch must terminate with unconditional branch");
3351  assert(Latch->getSingleSuccessor() == Header && "Latch must jump to header");
3352  // TODO: To support simple redirecting of the end of the body code that has
3353  // multiple; introduce another auxiliary basic block like preheader and after.
3354  assert(Latch->getSinglePredecessor() != nullptr);
3355  assert(!isa<PHINode>(Latch->front()));
3356 
3357  assert(Exit);
3358  assert(isa<BranchInst>(Exit->getTerminator()) &&
3359  "Exit block must terminate with unconditional branch");
3360  assert(Exit->getSingleSuccessor() == After &&
3361  "Exit block must jump to after block");
3362 
3363  assert(After);
3364  assert(After->getSinglePredecessor() == Exit &&
3365  "After block only reachable from exit block");
3366  assert(After->empty() || !isa<PHINode>(After->front()));
3367 
3368  Instruction *IndVar = getIndVar();
3369  assert(IndVar && "Canonical induction variable not found?");
3370  assert(isa<IntegerType>(IndVar->getType()) &&
3371  "Induction variable must be an integer");
3372  assert(cast<PHINode>(IndVar)->getParent() == Header &&
3373  "Induction variable must be a PHI in the loop header");
3374  assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader);
3375  assert(
3376  cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->isZero());
3377  assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch);
3378 
3379  auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1);
3380  assert(cast<Instruction>(NextIndVar)->getParent() == Latch);
3381  assert(cast<BinaryOperator>(NextIndVar)->getOpcode() == BinaryOperator::Add);
3382  assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar);
3383  assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1))
3384  ->isOne());
3385 
3386  Value *TripCount = getTripCount();
3387  assert(TripCount && "Loop trip count not found?");
3388  assert(IndVar->getType() == TripCount->getType() &&
3389  "Trip count and induction variable must have the same type");
3390 
3391  auto *CmpI = cast<CmpInst>(&Cond->front());
3392  assert(CmpI->getPredicate() == CmpInst::ICMP_ULT &&
3393  "Exit condition must be a signed less-than comparison");
3394  assert(CmpI->getOperand(0) == IndVar &&
3395  "Exit condition must compare the induction variable");
3396  assert(CmpI->getOperand(1) == TripCount &&
3397  "Exit condition must compare with the trip count");
3398 #endif
3399 }
3400 
3402  Preheader = nullptr;
3403  Header = nullptr;
3404  Cond = nullptr;
3405  Body = nullptr;
3406  Latch = nullptr;
3407  Exit = nullptr;
3408  After = nullptr;
3409 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::CanonicalLoopInfo::getPreheaderIP
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
Definition: OMPIRBuilder.h:1515
llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition: TargetTransformInfo.h:478
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
AssumptionCache.h
llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition: TargetTransformInfo.h:457
llvm::OpenMPIRBuilder::createCachedThreadPrivate
CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
Definition: OMPIRBuilder.cpp:2737
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:298
llvm::AtomicOrdering::AcquireRelease
@ AcquireRelease
Attrs
Function Attrs
Definition: README_ALTIVEC.txt:215
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2331
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:127
llvm::OpenMPIRBuilder::LocationDescription
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
Definition: OMPIRBuilder.h:150
llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition: ScalarEvolution.h:2036
addLoopMetadata
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
Definition: OMPIRBuilder.cpp:2088
llvm::OpenMPIRBuilder::OutlineInfo::ExitBB
BasicBlock * ExitBB
Definition: OMPIRBuilder.h:762
llvm::TargetTransformInfo::UnrollingPreferences::PartialThreshold
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Definition: TargetTransformInfo.h:453
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::OpenMPIRBuilder::createSection
InsertPointTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp section'.
Definition: OMPIRBuilder.cpp:1015
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:266
llvm::OpenMPIRBuilder::createCritical
InsertPointTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for '#omp critical'.
Definition: OMPIRBuilder.cpp:2423
llvm::CanonicalLoopInfo::getAfter
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Definition: OMPIRBuilder.h:1484
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:255
createTargetMachine
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOpt::Level OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
Definition: OMPIRBuilder.cpp:2147
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:228
llvm::TargetOptions
Definition: TargetOptions.h:124
llvm::OpenMPIRBuilder::createLoopSkeleton
CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
Definition: OMPIRBuilder.cpp:1270
llvm::cl::Prefix
@ Prefix
Definition: CommandLine.h:164
llvm::Function::empty
bool empty() const
Definition: Function.h:739
llvm::Function::getBasicBlockList
const BasicBlockListType & getBasicBlockList() const
Get the underlying elements of the Function...
Definition: Function.h:710
llvm::CodeExtractor::findAllocas
void findAllocas(const CodeExtractorAnalysisCache &CEAC, ValueSet &SinkCands, ValueSet &HoistCands, BasicBlock *&ExitBlock) const
Find the set of allocas whose life ranges are contained within the outlined region.
Definition: CodeExtractor.cpp:495
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::ConstantStruct::get
static Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1327
llvm::OpenMPIRBuilder::ReductionInfo::getElementType
Type * getElementType() const
Returns the type of the element being reduced.
Definition: OMPIRBuilder.h:556
llvm::AtomicRMWInst::Xor
@ Xor
*p = old ^ v
Definition: Instructions.h:752
llvm::CodeExtractor::extractCodeRegion
Function * extractCodeRegion(const CodeExtractorAnalysisCache &CEAC)
Perform the extraction, returning the new function.
Definition: CodeExtractor.cpp:1590
Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:372
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:137
llvm::GlobalValue::NotThreadLocal
@ NotThreadLocal
Definition: GlobalValue.h:179
llvm::AtomicRMWInst::BinOp
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:738
llvm::ilist_node_with_parent::getNextNode
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:288
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::BasicBlock::empty
bool empty() const
Definition: BasicBlock.h:307
llvm::OpenMPIRBuilder::tileLoops
std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
Definition: OMPIRBuilder.cpp:1898
llvm::enumerate
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:1981
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::Function::getEntryBlock
const BasicBlock & getEntryBlock() const
Definition: Function.h:717
llvm::OpenMPIRBuilder::InsertPointTy
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
Definition: OMPIRBuilder.h:53
llvm::BasicBlock::replaceSuccessorsPhiUsesWith
void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
Definition: BasicBlock.cpp:457
llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2647
llvm::OpenMPIRBuilder::getOrCreateThreadID
Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
Definition: OMPIRBuilder.cpp:352
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:461
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:742
llvm::ConstantExpr::getBitCast
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2208
llvm::DeleteDeadBlocks
void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
Definition: BasicBlockUtils.cpp:94
llvm::FunctionType::get
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:325
Error.h
OptimizationRemarkEmitter.h
llvm::AtomicOrdering::SequentiallyConsistent
@ SequentiallyConsistent
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:45
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::tgtok::VarName
@ VarName
Definition: TGLexer.h:71
llvm::TargetTransformInfo::UnrollingPreferences::Count
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
Definition: TargetTransformInfo.h:462
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::DILocation
Debug location.
Definition: DebugInfoMetadata.h:1580
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:321
llvm::Function::arg_size
size_t arg_size() const
Definition: Function.h:781
ScalarEvolution.h
Shift
bool Shift
Definition: README.txt:468
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::BasicBlock::eraseFromParent
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:129
llvm::OpenMPIRBuilder::AtomicOpValue
a struct to pack relevant information while generating atomic Ops
Definition: OMPIRBuilder.h:1189
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:535
llvm::CanonicalLoopInfo::getAfterIP
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
Definition: OMPIRBuilder.h:1527
getTripCount
static const SCEV * getTripCount(const SCEV *BECount, Type *IntPtr, Loop *CurLoop, const DataLayout *DL, ScalarEvolution *SE)
Compute trip count from the backedge taken count.
Definition: LoopIdiomRecognize.cpp:1050
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
llvm::BasicBlock::splitBasicBlock
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:381
llvm::GlobalValue::UnnamedAddr::Global
@ Global
llvm::ApproximateLoopSize
unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &EphValues, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
Definition: LoopUnrollPass.cpp:667
llvm::Optional
Definition: APInt.h:33
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::ConstantAsMetadata::get
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:419
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::MCID::Convergent
@ Convergent
Definition: MCInstrDesc.h:182
CodeExtractor.h
llvm::OpenMPIRBuilder::ReductionInfo::Variable
Value * Variable
Reduction variable of pointer type.
Definition: OMPIRBuilder.h:561
llvm::OpenMPIRBuilder::FinalizeCallbackTy
std::function< void(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
Definition: OMPIRBuilder.h:63
llvm::CmpInst::ICMP_SLE
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:750
llvm::computeUnrollCount
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
Definition: LoopUnrollPass.cpp:894
llvm::CanonicalLoopInfo::getFunction
Function * getFunction() const
Definition: OMPIRBuilder.h:1532
llvm::OpenMPIRBuilder::AtomicOpValue::Var
Value * Var
Definition: OMPIRBuilder.h:1190
llvm::BasicBlock::hasNPredecessors
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:286
llvm::CallBase::arg_begin
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1303
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::successors
succ_range successors(Instruction *I)
Definition: CFG.h:262
llvm::OpenMPIRBuilder::createAtomicCapture
InsertPointTy createAtomicCapture(const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXLHSInRHSPart)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
Definition: OMPIRBuilder.cpp:3215
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
llvm::TargetRegistry::lookupTarget
static const Target * lookupTarget(const std::string &Triple, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Definition: TargetRegistry.cpp:62
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:201
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:203
llvm::OpenMPIRBuilder::emitTaskyieldImpl
void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
Definition: OMPIRBuilder.cpp:899
llvm::OpenMPIRBuilder::createReductions
InsertPointTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, bool IsNoWait=false)
Generator for '#omp reduction'.
Definition: OMPIRBuilder.cpp:1061
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::StoreInst::setAtomic
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:384
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1208
llvm::OpenMPIRBuilder::collapseLoops
CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
Definition: OMPIRBuilder.cpp:1772
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::MDNode::operands
op_range operands() const
Definition: Metadata.h:1110
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1336
llvm::ConstantExpr::getPointerCast
static Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
Definition: Constants.cpp:2020
llvm::BasicBlock::getUniqueSuccessor
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:302
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:579
llvm::AtomicOrdering::Monotonic
@ Monotonic
llvm::CanonicalLoopInfo::getIndVar
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Definition: OMPIRBuilder.h:1501
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::gatherUnrollingPreferences
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, Optional< unsigned > UserThreshold, Optional< unsigned > UserCount, Optional< bool > UserAllowPartial, Optional< bool > UserRuntime, Optional< bool > UserUpperBound, Optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
Definition: LoopUnrollPass.cpp:185
llvm::OpenMPIRBuilder::createOrderedDepend
InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for '#omp ordered depend (source | sink)'.
Definition: OMPIRBuilder.cpp:2457
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::Lock
static sys::Mutex Lock
Definition: NVPTXUtilities.cpp:39
llvm::OpenMPIRBuilder::finalize
void finalize(Function *Fn=nullptr, bool AllowExtractorSinking=false)
Finalize the underlying module, e.g., by outlining regions.
Definition: OMPIRBuilder.cpp:159
Mappings
Inject TLI Mappings
Definition: InjectTLIMappings.cpp:172
CommandLine.h
CodeMetrics.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Definition: Instruction.cpp:765
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1551
llvm::OpenMPIRBuilder::getOrCreateDefaultSrcLocStr
Constant * getOrCreateDefaultSrcLocStr()
Return the (LLVM-IR) string describing the default source location.
Definition: OMPIRBuilder.cpp:328
TargetMachine.h
llvm::OpenMPIRBuilder::emitMapperCall
void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
Definition: OMPIRBuilder.cpp:2901
llvm::OpenMPIRBuilder::getOrCreateIdent
Value * getOrCreateIdent(Constant *SrcLocStr, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
Definition: OMPIRBuilder.cpp:258
OMPIRBuilder.h
isZero
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:519
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:648
llvm::CanonicalLoopInfo::assertOK
void assertOK() const
Consistency self-check.
Definition: OMPIRBuilder.cpp:3311
llvm::CanonicalLoopInfo::getCond
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
Definition: OMPIRBuilder.h:1452
llvm::OpenMPIRBuilder
An interface to create LLVM-IR for OpenMP directives.
Definition: OMPIRBuilder.h:29
llvm::OpenMPIRBuilder::createOMPFree
CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
Definition: OMPIRBuilder.cpp:2723
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::TargetTransformInfo::UnrollingPreferences::Force
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
Definition: TargetTransformInfo.h:493
cmp
< i32 >< i32 > cmp
Definition: README.txt:1447
llvm::CmpInst::ICMP_ULE
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:746
TileSize
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1393
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::OpenMPIRBuilder::createMapperAllocas
void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
Definition: OMPIRBuilder.cpp:2882
redirectAllPredecessorsTo
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
Definition: OMPIRBuilder.cpp:1733
llvm::OpenMPIRBuilder::createBarrier
InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
Definition: OMPIRBuilder.cpp:359
llvm::OpenMPIRBuilder::createAtomicRead
InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO)
Emit atomic Read for : V = X — Only Scalar data types.
Definition: OMPIRBuilder.cpp:2988
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:199
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
IP
Definition: NVPTXLowerArgs.cpp:166
TargetLibraryInfo.h
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:249
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:109
llvm::OpenMPIRBuilder::~OpenMPIRBuilder
~OpenMPIRBuilder()
Definition: OMPIRBuilder.cpp:244
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::CanonicalLoopInfo::isValid
bool isValid() const
Returns whether this object currently represents the IR of a loop.
Definition: OMPIRBuilder.h:1432
llvm::CodeExtractor::findInputsOutputs
void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, const ValueSet &Allocas) const
Compute the set of input values and output values for the code.
Definition: CodeExtractor.cpp:647
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:153
MDBuilder.h
llvm::AtomicOrdering::Acquire
@ Acquire
llvm::AtomicRMWInst::Nand
@ Nand
*p = ~(old & v)
Definition: Instructions.h:748
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
setCanonicalLoopTripCount
void setCanonicalLoopTripCount(CanonicalLoopInfo *CLI, Value *TripCount)
Definition: OMPIRBuilder.cpp:1462
llvm::GlobalObject::addMetadata
void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
Definition: Metadata.cpp:1268
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:376
llvm::AssumptionAnalysis::run
AssumptionCache run(Function &F, FunctionAnalysisManager &)
Definition: AssumptionCache.h:177
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:900
llvm::Use::getUser
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:73
llvm::CanonicalLoopInfo::getBodyIP
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
Definition: OMPIRBuilder.h:1521
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:777
removeUnusedBlocksFromParent
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
Definition: OMPIRBuilder.cpp:1741
llvm::Instruction::removeFromParent
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:74
llvm::OpenMPIRBuilder::MapperAllocas::Args
AllocaInst * Args
Definition: OMPIRBuilder.h:801
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::omp::IdentFlag
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
Definition: OMPConstants.h:74
llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
llvm::AtomicRMWInst::Xchg
@ Xchg
*p = v
Definition: Instructions.h:740
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::OpenMPIRBuilder::unrollLoopFull
void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
Definition: OMPIRBuilder.cpp:2114
llvm::OpenMPIRBuilder::emitTaskwaitImpl
void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
Definition: OMPIRBuilder.cpp:881
llvm::OpenMPIRBuilder::createAtomicUpdate
InsertPointTy createAtomicUpdate(const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXLHSInRHSPart)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
Definition: OMPIRBuilder.cpp:3063
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::StringRef::str
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:245
llvm::OpenMPIRBuilder::createOMPAlloc
CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_Alloc.
Definition: OMPIRBuilder.cpp:2707
llvm::SetVector::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
llvm::OpenMPIRBuilder::addAttributes
void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Definition: OMPIRBuilder.cpp:59
llvm::None
const NoneType None
Definition: None.h:23
llvm::AtomicRMWInst::Add
@ Add
*p = old + v
Definition: Instructions.h:742
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
llvm::M68kBeads::Term
@ Term
Definition: M68kBaseInfo.h:71
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::OpenMPIRBuilder::applyDynamicWorkshareLoop
InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, omp::OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk=nullptr)
Modifies the canonical loop to be a dynamically-scheduled workshare loop.
Definition: OMPIRBuilder.cpp:1599
llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:282
llvm::SmallString< 128 >
CFG.h
llvm::Twine::str
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
llvm::OpenMPIRBuilder::OutlineInfo::EntryBB
BasicBlock * EntryBB
Definition: OMPIRBuilder.h:762
llvm::Function::getAttributes
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:326
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:201
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:168
llvm::omp::RuntimeFunction
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
Definition: OMPConstants.h:54
llvm::SmallString::append
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:67
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56
llvm::OpenMPIRBuilder::createTaskyield
void createTaskyield(const LocationDescription &Loc)
Generator for '#omp taskyield'.
Definition: OMPIRBuilder.cpp:910
llvm::AtomicRMWInst::UMin
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:760
llvm::cl::opt< bool >
llvm::CanonicalLoopInfo::getPreheader
BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Definition: OMPIRBuilder.h:1438
llvm::ClrHandlerType::Filter
@ Filter
llvm::OpenMPIRBuilder::createOffloadMaptypes
GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
Definition: OMPIRBuilder.cpp:2870
llvm::OpenMPIRBuilder::AtomicOpValue::IsVolatile
bool IsVolatile
Definition: OMPIRBuilder.h:1192
llvm::OpenMPIRBuilder::createSections
InsertPointTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for '#omp sections'.
Definition: OMPIRBuilder.cpp:916
llvm::OpenMPIRBuilder::MapperAllocas::ArgsBase
AllocaInst * ArgsBase
Definition: OMPIRBuilder.h:800
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::OpenMPIRBuilder::ReductionInfo::PrivateVariable
Value * PrivateVariable
Thread-private partial reduction variable.
Definition: OMPIRBuilder.h:564
llvm::OpenMPIRBuilder::createOrderedThreadsSimd
InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for '#omp ordered [threads | simd]'.
Definition: OMPIRBuilder.cpp:2496
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
llvm::Function::getReturnType
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:181
llvm::AtomicRMWInst::Sub
@ Sub
*p = old - v
Definition: Instructions.h:744
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
uint64_t
llvm::ScalarEvolutionAnalysis::run
ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
Definition: ScalarEvolution.cpp:12893
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:169
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2783
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3124
DebugInfo.h
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:428
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:967
llvm::AtomicOrdering::Unordered
@ Unordered
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::OpenMPIRBuilder::emitCancelationCheckImpl
void emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective, FinalizeCallbackTy ExitCB={})
Generate control flow and cleanup for cancellation.
Definition: OMPIRBuilder.cpp:464
llvm::OpenMPIRBuilder::createCanonicalLoop
CanonicalLoopInfo * createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
Definition: OMPIRBuilder.cpp:1340
llvm::AtomicRMWInst::Min
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:756
llvm::OpenMPIRBuilder::createTargetInit
InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime)
The omp target interface.
Definition: OMPIRBuilder.cpp:2757
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:576
llvm::AtomicRMWInst::Or
@ Or
*p = old | v
Definition: Instructions.h:750
llvm::OpenMPIRBuilder::createParallel
IRBuilder ::InsertPoint createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for '#omp parallel'.
Definition: OMPIRBuilder.cpp:503
llvm::MDString::get
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:473
llvm::Function::Create
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:138
llvm::omp::OMPScheduleType::Static
@ Static
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MDBuilder::createCallbackEncoding
MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
Definition: MDBuilder.cpp:107
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::OpenMPIRBuilder::LocationDescription::DL
DebugLoc DL
Definition: OMPIRBuilder.h:158
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:138
llvm::OpenMPIRBuilder::unrollLoopHeuristic
void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
Definition: OMPIRBuilder.cpp:2121
llvm::ArrayType::get
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:602
llvm::OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr
Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
Definition: OMPIRBuilder.cpp:150
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::OpenMPIRBuilder::createMaster
InsertPointTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp master'.
Definition: OMPIRBuilder.cpp:1223
llvm::MDNode
Metadata node.
Definition: Metadata.h:906
llvm::SplitBlockAndInsertIfThenElse
void SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
Definition: BasicBlockUtils.cpp:1439
llvm::CodeGenOpt::Aggressive
@ Aggressive
Definition: CodeGen.h:56
llvm::OpenMPIRBuilder::createDebugKind
GlobalValue * createDebugKind(unsigned DebugLevel)
Create a global value containing the DebugLevel to control debuggin in the module.
Definition: OMPIRBuilder.cpp:248
llvm::OpenMPIRBuilder::createFlush
void createFlush(const LocationDescription &Loc)
Generator for '#omp flush'.
Definition: OMPIRBuilder.cpp:875
llvm::CallBase::arg_end
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1309
llvm::DominatorTreeAnalysis::run
DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
Definition: Dominators.cpp:360
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::Function::getFunction
const Function & getFunction() const
Definition: Function.h:136
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
llvm::OpenMPIRBuilder::OutlineInfo
Helper that contains information about regions we need to outline during finalization.
Definition: OMPIRBuilder.h:759
llvm::CanonicalLoopInfo
Class to represented the control flow structure of an OpenMP canonical loop.
Definition: OMPIRBuilder.h:1406
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Triple.h
llvm::CmpInst::ICMP_SLT
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:749
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1532
TargetOptions.h
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:94
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:100
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::MergeBlockIntoPredecessor
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false)
Attempts to merge a block into its predecessor, if possible.
Definition: BasicBlockUtils.cpp:173
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:745
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::Value::getNumUses
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:255
llvm::OpenMPIRBuilder::createOffloadMapnames
GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
Definition: OMPIRBuilder.cpp:3250
llvm::ConstantDataArray::get
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:691
llvm::BasicBlock::getUniquePredecessor
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:272
llvm::GlobalValue::WeakODRLinkage
@ WeakODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:53
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:776
uint32_t
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1748
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::ConstantDataArray::getString
static Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
Definition: Constants.cpp:3041
llvm::GlobalObject::hasMetadata
bool hasMetadata() const
Return true if this value has any metadata attached to it.
Definition: Value.h:589
llvm::CodeExtractor
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:85
getKmpcForDynamicNextForType
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
Definition: OMPIRBuilder.cpp:1588
llvm::OpenMPIRBuilder::MapperAllocas::ArgSizes
AllocaInst * ArgSizes
Definition: OMPIRBuilder.h:802
llvm::OpenMPIRBuilder::OutlineInfo::PostOutlineCB
PostOutlineCBTy PostOutlineCB
Definition: OMPIRBuilder.h:761
llvm::LoadInst::setAtomic
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:253
llvm::CodeGenOpt::Level
Level
Definition: CodeGen.h:52
llvm::MDNode::getDistinct
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1216
llvm::OpenMPIRBuilder::unrollLoopPartial
void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
Definition: OMPIRBuilder.cpp:2299
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
llvm::OpenMPIRBuilder::createMasked
InsertPointTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for '#omp masked'.
Definition: OMPIRBuilder.cpp:1247
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
llvm::OpenMPIRBuilder::applyWorkshareLoop
InsertPointTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier)
Modifies the canonical loop to be a workshare loop.
Definition: OMPIRBuilder.cpp:1561
llvm::GlobalValue::CommonLinkage
@ CommonLinkage
Tentative definitions.
Definition: GlobalValue.h:58
llvm::LoopAnalysis::run
LoopInfo run(Function &F, FunctionAnalysisManager &AM)
Definition: LoopInfo.cpp:961
computeHeuristicUnrollFactor
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
Definition: OMPIRBuilder.cpp:2168
llvm::PointerUnion< const Value *, const PseudoSourceValue * >
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:687
llvm::FunctionCallee::getCallee
Value * getCallee()
Definition: DerivedTypes.h:184
llvm::BasicBlock::front
const Instruction & front() const
Definition: BasicBlock.h:308
llvm::CanonicalLoopInfo::getExit
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
Definition: OMPIRBuilder.h:1474
llvm::AtomicOrdering::Release
@ Release
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
llvm::ConstantAsMetadata
Definition: Metadata.h:412
llvm::OpenMPIRBuilder::createTaskwait
void createTaskwait(const LocationDescription &Loc)
Generator for '#omp taskwait'.
Definition: OMPIRBuilder.cpp:893
redirectTo
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
Definition: OMPIRBuilder.cpp:1716
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::OpenMPIRBuilder::ReductionInfo::ReductionGen
ReductionGenTy ReductionGen
Callback for generating the reduction body.
Definition: OMPIRBuilder.h:569
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:348
llvm::AtomicRMWInst::FSub
@ FSub
*p = old - v
Definition: Instructions.h:766
llvm::OpenMPIRBuilder::createAtomicWrite
InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO)
Emit atomic write for : X = Expr — Only Scalar data types.
Definition: OMPIRBuilder.cpp:3030
llvm::ArrayRef::begin
iterator begin() const
Definition: ArrayRef.h:153
llvm::ConstantInt::getBool
static ConstantInt * getBool(LLVMContext &Context, bool V)
Definition: Constants.cpp:862
llvm::Function::getArg
Argument * getArg(unsigned i) const
Definition: Function.h:766
llvm::Error
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
llvm::AtomicRMWInst::And
@ And
*p = old & v
Definition: Instructions.h:746
llvm::CanonicalLoopInfo::invalidate
void invalidate()
Invalidate this loop.
Definition: OMPIRBuilder.cpp:3401
llvm::MCID::Branch
@ Branch
Definition: MCInstrDesc.h:156
llvm::Module::getContext
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:261
llvm::OpenMPIRBuilder::createCancel
InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for '#omp cancel'.
Definition: OMPIRBuilder.cpp:414
llvm::omp::GV
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...
Definition: OMPGridValues.h:57
PassManager.h
OptimisticAttributes
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
llvm::Type::getPointerTo
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:738
llvm::Function::getFunctionType
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:176
llvm::ConstantArray::get
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1263
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::TargetIRAnalysis::run
Result run(const Function &F, FunctionAnalysisManager &)
Definition: TargetTransformInfo.cpp:1133
llvm::CanonicalLoopInfo::getHeader
BasicBlock * getHeader() const
The header is the entry for each iteration.
Definition: OMPIRBuilder.h:1445
llvm::OpenMPIRBuilder::getOrCreateSrcLocStr
Constant * getOrCreateSrcLocStr(StringRef LocStr)
Return the (LLVM-IR) string describing the source location LocStr.
Definition: OMPIRBuilder.cpp:291
llvm::CodeExtractorAnalysisCache
A cache for the CodeExtractor analysis.
Definition: CodeExtractor.h:46
llvm::AnalysisManager::registerPass
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
Definition: PassManager.h:841
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:183
UnrollThresholdFactor
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
llvm::SmallString::str
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:259
getKmpcForDynamicInitForType
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
Definition: OMPIRBuilder.cpp:1572
llvm::MDNode::replaceOperandWith
void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
Definition: Metadata.cpp:877
llvm::OpenMPIRBuilder::createTargetDeinit
void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime)
Create a runtime call for kmpc_target_deinit.
Definition: OMPIRBuilder.cpp:2804
llvm::MDBuilder
Definition: MDBuilder.h:35
llvm::OpenMPIRBuilder::ReductionInfo
Information about an OpenMP reduction.
Definition: OMPIRBuilder.h:548
llvm::AllocaInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:124
llvm::BasicBlock::back
const Instruction & back() const
Definition: BasicBlock.h:310
llvm::OpenMPIRBuilder::initialize
void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
Definition: OMPIRBuilder.cpp:157
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:186
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:363
isValid
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Definition: RustDemangle.cpp:216
llvm::GlobalValue::PrivateLinkage
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:56
llvm::CanonicalLoopInfo::getLatch
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
Definition: OMPIRBuilder.h:1468
Allocator
Basic Register Allocator
Definition: RegAllocBasic.cpp:146
llvm::Type::getPointerElementType
Type * getPointerElementType() const
Definition: Type.h:380
ModuleUtils.h
llvm::TargetTransformInfo::UnrollingPreferences::Threshold
unsigned Threshold
The cost threshold for the unrolled loop.
Definition: TargetTransformInfo.h:436
UnrollLoop.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::remarks::Type::Failure
@ Failure
llvm::to_string
std::string to_string(const T &Value)
Definition: ScopedPrinter.h:63
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
getFreshReductionFunc
Function * getFreshReductionFunc(Module &M)
Create a function with a unique name and a "void (i8*, i8*)" signature in the given module and return...
Definition: OMPIRBuilder.cpp:1051
llvm::AtomicRMWInst::FAdd
@ FAdd
*p = old + v
Definition: Instructions.h:763
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:658
llvm::OpenMPIRBuilder::emitBarrierImpl
InsertPointTy emitBarrierImpl(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall, bool CheckCancelFlag)
Generate a barrier runtime call.
Definition: OMPIRBuilder.cpp:367
llvm::FunctionCallee
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:165
llvm::PHINode
Definition: Instructions.h:2633
llvm::BasicBlock::removePredecessor
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:321
llvm::Function::removeFromParent
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
Definition: Function.cpp:361
llvm::SmallVectorImpl< uint64_t >
llvm::AtomicRMWInst::BAD_BINOP
@ BAD_BINOP
Definition: Instructions.h:770
llvm::CodeExtractor::isEligible
bool isEligible() const
Test whether this code extractor is eligible.
Definition: CodeExtractor.cpp:620
llvm::CanonicalLoopInfo::getTripCount
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
Definition: OMPIRBuilder.h:1492
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:401
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::PassInstrumentationAnalysis
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
Definition: PassManager.h:599
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:275
llvm::TargetTransformInfo::UnrollingPreferences::OptSizeThreshold
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Definition: TargetTransformInfo.h:450
llvm::OpenMPIRBuilder::emitFlush
void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
Definition: OMPIRBuilder.cpp:867
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1475
llvm::OpenMPIRBuilder::OutlineInfo::collectBlocks
void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
Definition: OMPIRBuilder.cpp:3284
llvm::OpenMPIRBuilder::MapperAllocas
Definition: OMPIRBuilder.h:799
llvm::OpenMPIRBuilder::applyStaticWorkshareLoop
InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, Value *Chunk=nullptr)
Modifies the canonical loop to be a statically-scheduled workshare loop.
Definition: OMPIRBuilder.cpp:1470
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::UnreachableInst
This function has undefined behavior.
Definition: Instructions.h:4715
llvm::Value::replaceUsesWithIf
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
Definition: Value.cpp:540
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3212
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:62
llvm::SwitchInst::addCase
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Definition: Instructions.cpp:4184
llvm::cl::desc
Definition: CommandLine.h:414
llvm::OpenMPIRBuilder::createSingle
InsertPointTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, llvm::Value *DidIt)
Generator for '#omp single'.
Definition: OMPIRBuilder.cpp:2389
llvm::AtomicRMWInst::UMax
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:758
llvm::OpenMPIRBuilder::createCopyinClauseBlocks
InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
Definition: OMPIRBuilder.cpp:2657
llvm::OpenMPIRBuilder::createCopyPrivate
InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
Definition: OMPIRBuilder.cpp:2368
llvm::SetVector< Value * >
llvm::omp::OMPScheduleType
OMPScheduleType
Definition: OMPConstants.h:113
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:624
llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction
FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
Definition: OMPIRBuilder.cpp:92
LoopPeel.h
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:814
Value.h
llvm::OpenMPIRBuilder::ReductionInfo::AtomicReductionGen
AtomicReductionGenTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
Definition: OMPIRBuilder.h:575
TargetRegistry.h
llvm::CanonicalLoopInfo::getBody
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
Definition: OMPIRBuilder.h:1460
llvm::AtomicCmpXchgInst::getStrongestFailureOrdering
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Definition: Instructions.h:674
llvm::LoopNest
This class represents a loop nest and can be used to query its properties.
Definition: LoopNestAnalysis.h:28
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::AtomicCmpXchgInst
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:521
getKmpcForStaticInitForType
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Definition: OMPIRBuilder.cpp:1447
llvm::Directive
Definition: DirectiveEmitter.h:100
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:438
llvm::gatherPeelingPreferences
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, Optional< bool > UserAllowPeeling, Optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
Definition: LoopPeel.cpp:612
llvm::ArrayRef::end
iterator end() const
Definition: ArrayRef.h:154
llvm::AtomicOrdering::NotAtomic
@ NotAtomic
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1243
llvm::Instruction::moveBefore
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:97
llvm::DIFile
File.
Definition: DebugInfoMetadata.h:530
llvm::FunctionType
Class to represent function types.
Definition: DerivedTypes.h:103
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
llvm::Target::createTargetMachine
TargetMachine * createTargetMachine(StringRef TT, StringRef CPU, StringRef Features, const TargetOptions &Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM=None, CodeGenOpt::Level OL=CodeGenOpt::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Definition: TargetRegistry.h:449
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::OpenMPIRBuilder::LocationDescription::IP
InsertPointTy IP
Definition: OMPIRBuilder.h:157
llvm::AtomicRMWInst::Max
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:754