LLVM  14.0.0git
OMPIRBuilder.cpp
Go to the documentation of this file.
1 //===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file implements the OpenMPIRBuilder class, which is used as a
11 /// convenient way to create LLVM instructions for OpenMP directives.
12 ///
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/SmallSet.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/Triple.h"
24 #include "llvm/IR/CFG.h"
25 #include "llvm/IR/Constants.h"
26 #include "llvm/IR/DebugInfo.h"
27 #include "llvm/IR/GlobalVariable.h"
28 #include "llvm/IR/IRBuilder.h"
29 #include "llvm/IR/MDBuilder.h"
30 #include "llvm/IR/PassManager.h"
31 #include "llvm/IR/Value.h"
32 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/Error.h"
42 
43 #include <cstdint>
44 #include <sstream>
45 
46 #define DEBUG_TYPE "openmp-ir-builder"
47 
48 using namespace llvm;
49 using namespace omp;
50 
51 static cl::opt<bool>
52  OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden,
53  cl::desc("Use optimistic attributes describing "
54  "'as-if' properties of runtime calls."),
55  cl::init(false));
56 
58  "openmp-ir-builder-unroll-threshold-factor", cl::Hidden,
59  cl::desc("Factor for the unroll threshold to account for code "
60  "simplifications still taking place"),
61  cl::init(1.5));
62 
63 #ifndef NDEBUG
64 /// Return whether IP1 and IP2 are ambiguous, i.e. that inserting instructions
65 /// at position IP1 may change the meaning of IP2 or vice-versa. This is because
66 /// an InsertPoint stores the instruction before something is inserted. For
67 /// instance, if both point to the same instruction, two IRBuilders alternating
68 /// creating instruction will cause the instructions to be interleaved.
71  if (!IP1.isSet() || !IP2.isSet())
72  return false;
73  return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
74 }
75 #endif
76 
78  LLVMContext &Ctx = Fn.getContext();
79 
80  // Get the function's current attributes.
81  auto Attrs = Fn.getAttributes();
82  auto FnAttrs = Attrs.getFnAttrs();
83  auto RetAttrs = Attrs.getRetAttrs();
85  for (size_t ArgNo = 0; ArgNo < Fn.arg_size(); ++ArgNo)
86  ArgAttrs.emplace_back(Attrs.getParamAttrs(ArgNo));
87 
88 #define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
89 #include "llvm/Frontend/OpenMP/OMPKinds.def"
90 
91  // Add attributes to the function declaration.
92  switch (FnID) {
93 #define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
94  case Enum: \
95  FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
96  RetAttrs = RetAttrs.addAttributes(Ctx, RetAttrSet); \
97  for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
98  ArgAttrs[ArgNo] = \
99  ArgAttrs[ArgNo].addAttributes(Ctx, ArgAttrSets[ArgNo]); \
100  Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
101  break;
102 #include "llvm/Frontend/OpenMP/OMPKinds.def"
103  default:
104  // Attributes are optional.
105  break;
106  }
107 }
108 
111  FunctionType *FnTy = nullptr;
112  Function *Fn = nullptr;
113 
114  // Try to find the declation in the module first.
115  switch (FnID) {
116 #define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
117  case Enum: \
118  FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
119  IsVarArg); \
120  Fn = M.getFunction(Str); \
121  break;
122 #include "llvm/Frontend/OpenMP/OMPKinds.def"
123  }
124 
125  if (!Fn) {
126  // Create a new declaration if we need one.
127  switch (FnID) {
128 #define OMP_RTL(Enum, Str, ...) \
129  case Enum: \
130  Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
131  break;
132 #include "llvm/Frontend/OpenMP/OMPKinds.def"
133  }
134 
135  // Add information if the runtime function takes a callback function
136  if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
137  if (!Fn->hasMetadata(LLVMContext::MD_callback)) {
138  LLVMContext &Ctx = Fn->getContext();
139  MDBuilder MDB(Ctx);
140  // Annotate the callback behavior of the runtime function:
141  // - The callback callee is argument number 2 (microtask).
142  // - The first two arguments of the callback callee are unknown (-1).
143  // - All variadic arguments to the runtime function are passed to the
144  // callback callee.
145  Fn->addMetadata(
146  LLVMContext::MD_callback,
148  2, {-1, -1}, /* VarArgsArePassed */ true)}));
149  }
150  }
151 
152  LLVM_DEBUG(dbgs() << "Created OpenMP runtime function " << Fn->getName()
153  << " with type " << *Fn->getFunctionType() << "\n");
154  addAttributes(FnID, *Fn);
155 
156  } else {
157  LLVM_DEBUG(dbgs() << "Found OpenMP runtime function " << Fn->getName()
158  << " with type " << *Fn->getFunctionType() << "\n");
159  }
160 
161  assert(Fn && "Failed to create OpenMP runtime function");
162 
163  // Cast the function to the expected type if necessary
165  return {FnTy, C};
166 }
167 
169  FunctionCallee RTLFn = getOrCreateRuntimeFunction(M, FnID);
170  auto *Fn = dyn_cast<llvm::Function>(RTLFn.getCallee());
171  assert(Fn && "Failed to create OpenMP runtime function pointer");
172  return Fn;
173 }
174 
175 void OpenMPIRBuilder::initialize() { initializeTypes(M); }
176 
177 void OpenMPIRBuilder::finalize(Function *Fn, bool AllowExtractorSinking) {
178  SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
180  SmallVector<OutlineInfo, 16> DeferredOutlines;
181  for (OutlineInfo &OI : OutlineInfos) {
182  // Skip functions that have not finalized yet; may happen with nested
183  // function generation.
184  if (Fn && OI.getFunction() != Fn) {
185  DeferredOutlines.push_back(OI);
186  continue;
187  }
188 
189  ParallelRegionBlockSet.clear();
190  Blocks.clear();
191  OI.collectBlocks(ParallelRegionBlockSet, Blocks);
192 
193  Function *OuterFn = OI.getFunction();
194  CodeExtractorAnalysisCache CEAC(*OuterFn);
195  CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
196  /* AggregateArgs */ false,
197  /* BlockFrequencyInfo */ nullptr,
198  /* BranchProbabilityInfo */ nullptr,
199  /* AssumptionCache */ nullptr,
200  /* AllowVarArgs */ true,
201  /* AllowAlloca */ true,
202  /* Suffix */ ".omp_par");
203 
204  LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n");
205  LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName()
206  << " Exit: " << OI.ExitBB->getName() << "\n");
207  assert(Extractor.isEligible() &&
208  "Expected OpenMP outlining to be possible!");
209 
210  Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
211 
212  LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n");
213  LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n");
214  assert(OutlinedFn->getReturnType()->isVoidTy() &&
215  "OpenMP outlined functions should not return a value!");
216 
217  // For compability with the clang CG we move the outlined function after the
218  // one with the parallel region.
219  OutlinedFn->removeFromParent();
220  M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
221 
222  // Remove the artificial entry introduced by the extractor right away, we
223  // made our own entry block after all.
224  {
225  BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
226  assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB);
227  assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
228  if (AllowExtractorSinking) {
229  // Move instructions from the to-be-deleted ArtificialEntry to the entry
230  // basic block of the parallel region. CodeExtractor may have sunk
231  // allocas/bitcasts for values that are solely used in the outlined
232  // region and do not escape.
233  assert(!ArtificialEntry.empty() &&
234  "Expected instructions to sink in the outlined region");
235  for (BasicBlock::iterator It = ArtificialEntry.begin(),
236  End = ArtificialEntry.end();
237  It != End;) {
238  Instruction &I = *It;
239  It++;
240 
241  if (I.isTerminator())
242  continue;
243 
244  I.moveBefore(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
245  }
246  }
247  OI.EntryBB->moveBefore(&ArtificialEntry);
248  ArtificialEntry.eraseFromParent();
249  }
250  assert(&OutlinedFn->getEntryBlock() == OI.EntryBB);
251  assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
252 
253  // Run a user callback, e.g. to add attributes.
254  if (OI.PostOutlineCB)
255  OI.PostOutlineCB(*OutlinedFn);
256  }
257 
258  // Remove work items that have been completed.
259  OutlineInfos = std::move(DeferredOutlines);
260 }
261 
263  assert(OutlineInfos.empty() && "There must be no outstanding outlinings");
264 }
265 
267  IntegerType *I32Ty = Type::getInt32Ty(M.getContext());
268  auto *GV =
269  new GlobalVariable(M, I32Ty,
270  /* isConstant = */ true, GlobalValue::WeakODRLinkage,
271  ConstantInt::get(I32Ty, Value), Name);
272  GV->setVisibility(GlobalValue::HiddenVisibility);
273 
274  return GV;
275 }
276 
278  uint32_t SrcLocStrSize,
279  IdentFlag LocFlags,
280  unsigned Reserve2Flags) {
281  // Enable "C-mode".
282  LocFlags |= OMP_IDENT_FLAG_KMPC;
283 
284  Constant *&Ident =
285  IdentMap[{SrcLocStr, uint64_t(LocFlags) << 31 | Reserve2Flags}];
286  if (!Ident) {
287  Constant *I32Null = ConstantInt::getNullValue(Int32);
288  Constant *IdentData[] = {I32Null,
289  ConstantInt::get(Int32, uint32_t(LocFlags)),
290  ConstantInt::get(Int32, Reserve2Flags),
291  ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr};
292  Constant *Initializer =
293  ConstantStruct::get(OpenMPIRBuilder::Ident, IdentData);
294 
295  // Look for existing encoding of the location + flags, not needed but
296  // minimizes the difference to the existing solution while we transition.
297  for (GlobalVariable &GV : M.getGlobalList())
298  if (GV.getValueType() == OpenMPIRBuilder::Ident && GV.hasInitializer())
299  if (GV.getInitializer() == Initializer)
300  Ident = &GV;
301 
302  if (!Ident) {
303  auto *GV = new GlobalVariable(
304  M, OpenMPIRBuilder::Ident,
305  /* isConstant = */ true, GlobalValue::PrivateLinkage, Initializer, "",
307  M.getDataLayout().getDefaultGlobalsAddressSpace());
308  GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
309  GV->setAlignment(Align(8));
310  Ident = GV;
311  }
312  }
313 
314  return ConstantExpr::getPointerBitCastOrAddrSpaceCast(Ident, IdentPtr);
315 }
316 
318  uint32_t &SrcLocStrSize) {
319  SrcLocStrSize = LocStr.size();
320  Constant *&SrcLocStr = SrcLocStrMap[LocStr];
321  if (!SrcLocStr) {
322  Constant *Initializer =
323  ConstantDataArray::getString(M.getContext(), LocStr);
324 
325  // Look for existing encoding of the location, not needed but minimizes the
326  // difference to the existing solution while we transition.
327  for (GlobalVariable &GV : M.getGlobalList())
328  if (GV.isConstant() && GV.hasInitializer() &&
329  GV.getInitializer() == Initializer)
330  return SrcLocStr = ConstantExpr::getPointerCast(&GV, Int8Ptr);
331 
332  SrcLocStr = Builder.CreateGlobalStringPtr(LocStr, /* Name */ "",
333  /* AddressSpace */ 0, &M);
334  }
335  return SrcLocStr;
336 }
337 
339  StringRef FileName,
340  unsigned Line, unsigned Column,
341  uint32_t &SrcLocStrSize) {
342  SmallString<128> Buffer;
343  Buffer.push_back(';');
344  Buffer.append(FileName);
345  Buffer.push_back(';');
346  Buffer.append(FunctionName);
347  Buffer.push_back(';');
348  Buffer.append(std::to_string(Line));
349  Buffer.push_back(';');
350  Buffer.append(std::to_string(Column));
351  Buffer.push_back(';');
352  Buffer.push_back(';');
353  return getOrCreateSrcLocStr(Buffer.str(), SrcLocStrSize);
354 }
355 
356 Constant *
358  StringRef UnknownLoc = ";unknown;unknown;0;0;;";
359  return getOrCreateSrcLocStr(UnknownLoc, SrcLocStrSize);
360 }
361 
363  uint32_t &SrcLocStrSize,
364  Function *F) {
365  DILocation *DIL = DL.get();
366  if (!DIL)
367  return getOrCreateDefaultSrcLocStr(SrcLocStrSize);
368  StringRef FileName = M.getName();
369  if (DIFile *DIF = DIL->getFile())
370  if (Optional<StringRef> Source = DIF->getSource())
371  FileName = *Source;
372  StringRef Function = DIL->getScope()->getSubprogram()->getName();
373  if (Function.empty() && F)
374  Function = F->getName();
375  return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(),
376  DIL->getColumn(), SrcLocStrSize);
377 }
378 
380  uint32_t &SrcLocStrSize) {
381  return getOrCreateSrcLocStr(Loc.DL, SrcLocStrSize,
382  Loc.IP.getBlock()->getParent());
383 }
384 
386  return Builder.CreateCall(
387  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
388  "omp_global_thread_num");
389 }
390 
393  bool ForceSimpleCall, bool CheckCancelFlag) {
394  if (!updateToLocation(Loc))
395  return Loc.IP;
396  return emitBarrierImpl(Loc, DK, ForceSimpleCall, CheckCancelFlag);
397 }
398 
401  bool ForceSimpleCall, bool CheckCancelFlag) {
402  // Build call __kmpc_cancel_barrier(loc, thread_id) or
403  // __kmpc_barrier(loc, thread_id);
404 
405  IdentFlag BarrierLocFlags;
406  switch (Kind) {
407  case OMPD_for:
408  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
409  break;
410  case OMPD_sections:
411  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
412  break;
413  case OMPD_single:
414  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
415  break;
416  case OMPD_barrier:
417  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
418  break;
419  default:
420  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
421  break;
422  }
423 
424  uint32_t SrcLocStrSize;
425  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
426  Value *Args[] = {
427  getOrCreateIdent(SrcLocStr, SrcLocStrSize, BarrierLocFlags),
428  getOrCreateThreadID(getOrCreateIdent(SrcLocStr, SrcLocStrSize))};
429 
430  // If we are in a cancellable parallel region, barriers are cancellation
431  // points.
432  // TODO: Check why we would force simple calls or to ignore the cancel flag.
433  bool UseCancelBarrier =
434  !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
435 
436  Value *Result =
437  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
438  UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
439  : OMPRTL___kmpc_barrier),
440  Args);
441 
442  if (UseCancelBarrier && CheckCancelFlag)
443  emitCancelationCheckImpl(Result, OMPD_parallel);
444 
445  return Builder.saveIP();
446 }
447 
450  Value *IfCondition,
451  omp::Directive CanceledDirective) {
452  if (!updateToLocation(Loc))
453  return Loc.IP;
454 
455  // LLVM utilities like blocks with terminators.
456  auto *UI = Builder.CreateUnreachable();
457 
458  Instruction *ThenTI = UI, *ElseTI = nullptr;
459  if (IfCondition)
460  SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
461  Builder.SetInsertPoint(ThenTI);
462 
463  Value *CancelKind = nullptr;
464  switch (CanceledDirective) {
465 #define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
466  case DirectiveEnum: \
467  CancelKind = Builder.getInt32(Value); \
468  break;
469 #include "llvm/Frontend/OpenMP/OMPKinds.def"
470  default:
471  llvm_unreachable("Unknown cancel kind!");
472  }
473 
474  uint32_t SrcLocStrSize;
475  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
476  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
477  Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
478  Value *Result = Builder.CreateCall(
479  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
480  auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) {
481  if (CanceledDirective == OMPD_parallel) {
483  Builder.restoreIP(IP);
484  createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
485  omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false,
486  /* CheckCancelFlag */ false);
487  }
488  };
489 
490  // The actual cancel logic is shared with others, e.g., cancel_barriers.
491  emitCancelationCheckImpl(Result, CanceledDirective, ExitCB);
492 
493  // Update the insertion point and remove the terminator we introduced.
494  Builder.SetInsertPoint(UI->getParent());
495  UI->eraseFromParent();
496 
497  return Builder.saveIP();
498 }
499 
501  omp::Directive CanceledDirective,
502  FinalizeCallbackTy ExitCB) {
503  assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
504  "Unexpected cancellation!");
505 
506  // For a cancel barrier we create two new blocks.
507  BasicBlock *BB = Builder.GetInsertBlock();
508  BasicBlock *NonCancellationBlock;
509  if (Builder.GetInsertPoint() == BB->end()) {
510  // TODO: This branch will not be needed once we moved to the
511  // OpenMPIRBuilder codegen completely.
512  NonCancellationBlock = BasicBlock::Create(
513  BB->getContext(), BB->getName() + ".cont", BB->getParent());
514  } else {
515  NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint());
516  BB->getTerminator()->eraseFromParent();
517  Builder.SetInsertPoint(BB);
518  }
519  BasicBlock *CancellationBlock = BasicBlock::Create(
520  BB->getContext(), BB->getName() + ".cncl", BB->getParent());
521 
522  // Jump to them based on the return value.
523  Value *Cmp = Builder.CreateIsNull(CancelFlag);
524  Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
525  /* TODO weight */ nullptr, nullptr);
526 
527  // From the cancellation block we finalize all variables and go to the
528  // post finalization block that is known to the FiniCB callback.
529  Builder.SetInsertPoint(CancellationBlock);
530  if (ExitCB)
531  ExitCB(Builder.saveIP());
532  auto &FI = FinalizationStack.back();
533  FI.FiniCB(Builder.saveIP());
534 
535  // The continuation block is where code generation continues.
536  Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
537 }
538 
540  const LocationDescription &Loc, InsertPointTy OuterAllocaIP,
541  BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
542  FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads,
543  omp::ProcBindKind ProcBind, bool IsCancellable) {
544  assert(!isConflictIP(Loc.IP, OuterAllocaIP) && "IPs must not be ambiguous");
545 
546  if (!updateToLocation(Loc))
547  return Loc.IP;
548 
549  uint32_t SrcLocStrSize;
550  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
551  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
552  Value *ThreadID = getOrCreateThreadID(Ident);
553 
554  if (NumThreads) {
555  // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads)
556  Value *Args[] = {
557  Ident, ThreadID,
558  Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)};
559  Builder.CreateCall(
560  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args);
561  }
562 
563  if (ProcBind != OMP_PROC_BIND_default) {
564  // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind)
565  Value *Args[] = {
566  Ident, ThreadID,
567  ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)};
568  Builder.CreateCall(
569  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args);
570  }
571 
572  BasicBlock *InsertBB = Builder.GetInsertBlock();
573  Function *OuterFn = InsertBB->getParent();
574 
575  // Save the outer alloca block because the insertion iterator may get
576  // invalidated and we still need this later.
577  BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
578 
579  // Vector to remember instructions we used only during the modeling but which
580  // we want to delete at the end.
581  SmallVector<Instruction *, 4> ToBeDeleted;
582 
583  // Change the location to the outer alloca insertion point to create and
584  // initialize the allocas we pass into the parallel region.
585  Builder.restoreIP(OuterAllocaIP);
586  AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
587  AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr");
588 
589  // If there is an if condition we actually use the TIDAddr and ZeroAddr in the
590  // program, otherwise we only need them for modeling purposes to get the
591  // associated arguments in the outlined function. In the former case,
592  // initialize the allocas properly, in the latter case, delete them later.
593  if (IfCondition) {
594  Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr);
595  Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr);
596  } else {
597  ToBeDeleted.push_back(TIDAddr);
598  ToBeDeleted.push_back(ZeroAddr);
599  }
600 
601  // Create an artificial insertion point that will also ensure the blocks we
602  // are about to split are not degenerated.
603  auto *UI = new UnreachableInst(Builder.getContext(), InsertBB);
604 
605  Instruction *ThenTI = UI, *ElseTI = nullptr;
606  if (IfCondition)
607  SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
608 
609  BasicBlock *ThenBB = ThenTI->getParent();
610  BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock(ThenTI, "omp.par.entry");
611  BasicBlock *PRegBodyBB =
612  PRegEntryBB->splitBasicBlock(ThenTI, "omp.par.region");
613  BasicBlock *PRegPreFiniBB =
614  PRegBodyBB->splitBasicBlock(ThenTI, "omp.par.pre_finalize");
615  BasicBlock *PRegExitBB =
616  PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit");
617 
618  auto FiniCBWrapper = [&](InsertPointTy IP) {
619  // Hide "open-ended" blocks from the given FiniCB by setting the right jump
620  // target to the region exit block.
621  if (IP.getBlock()->end() == IP.getPoint()) {
623  Builder.restoreIP(IP);
624  Instruction *I = Builder.CreateBr(PRegExitBB);
625  IP = InsertPointTy(I->getParent(), I->getIterator());
626  }
627  assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
628  IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
629  "Unexpected insertion point for finalization call!");
630  return FiniCB(IP);
631  };
632 
633  FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
634 
635  // Generate the privatization allocas in the block that will become the entry
636  // of the outlined function.
637  Builder.SetInsertPoint(PRegEntryBB->getTerminator());
638  InsertPointTy InnerAllocaIP = Builder.saveIP();
639 
640  AllocaInst *PrivTIDAddr =
641  Builder.CreateAlloca(Int32, nullptr, "tid.addr.local");
642  Instruction *PrivTID = Builder.CreateLoad(Int32, PrivTIDAddr, "tid");
643 
644  // Add some fake uses for OpenMP provided arguments.
645  ToBeDeleted.push_back(Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use"));
646  Instruction *ZeroAddrUse =
647  Builder.CreateLoad(Int32, ZeroAddr, "zero.addr.use");
648  ToBeDeleted.push_back(ZeroAddrUse);
649 
650  // ThenBB
651  // |
652  // V
653  // PRegionEntryBB <- Privatization allocas are placed here.
654  // |
655  // V
656  // PRegionBodyBB <- BodeGen is invoked here.
657  // |
658  // V
659  // PRegPreFiniBB <- The block we will start finalization from.
660  // |
661  // V
662  // PRegionExitBB <- A common exit to simplify block collection.
663  //
664 
665  LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n");
666 
667  // Let the caller create the body.
668  assert(BodyGenCB && "Expected body generation callback!");
669  InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
670  BodyGenCB(InnerAllocaIP, CodeGenIP, *PRegPreFiniBB);
671 
672  LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n");
673 
674  FunctionCallee RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
675  if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
676  if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
677  llvm::LLVMContext &Ctx = F->getContext();
678  MDBuilder MDB(Ctx);
679  // Annotate the callback behavior of the __kmpc_fork_call:
680  // - The callback callee is argument number 2 (microtask).
681  // - The first two arguments of the callback callee are unknown (-1).
682  // - All variadic arguments to the __kmpc_fork_call are passed to the
683  // callback callee.
684  F->addMetadata(
685  llvm::LLVMContext::MD_callback,
687  Ctx, {MDB.createCallbackEncoding(2, {-1, -1},
688  /* VarArgsArePassed */ true)}));
689  }
690  }
691 
692  OutlineInfo OI;
693  OI.PostOutlineCB = [=](Function &OutlinedFn) {
694  // Add some known attributes.
695  OutlinedFn.addParamAttr(0, Attribute::NoAlias);
696  OutlinedFn.addParamAttr(1, Attribute::NoAlias);
697  OutlinedFn.addFnAttr(Attribute::NoUnwind);
698  OutlinedFn.addFnAttr(Attribute::NoRecurse);
699 
700  assert(OutlinedFn.arg_size() >= 2 &&
701  "Expected at least tid and bounded tid as arguments");
702  unsigned NumCapturedVars =
703  OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
704 
705  CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
706  CI->getParent()->setName("omp_parallel");
707  Builder.SetInsertPoint(CI);
708 
709  // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
710  Value *ForkCallArgs[] = {
711  Ident, Builder.getInt32(NumCapturedVars),
712  Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)};
713 
714  SmallVector<Value *, 16> RealArgs;
715  RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
716  RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
717 
718  Builder.CreateCall(RTLFn, RealArgs);
719 
720  LLVM_DEBUG(dbgs() << "With fork_call placed: "
721  << *Builder.GetInsertBlock()->getParent() << "\n");
722 
723  InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
724 
725  // Initialize the local TID stack location with the argument value.
726  Builder.SetInsertPoint(PrivTID);
727  Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin();
728  Builder.CreateStore(Builder.CreateLoad(Int32, OutlinedAI), PrivTIDAddr);
729 
730  // If no "if" clause was present we do not need the call created during
731  // outlining, otherwise we reuse it in the serialized parallel region.
732  if (!ElseTI) {
733  CI->eraseFromParent();
734  } else {
735 
736  // If an "if" clause was present we are now generating the serialized
737  // version into the "else" branch.
738  Builder.SetInsertPoint(ElseTI);
739 
740  // Build calls __kmpc_serialized_parallel(&Ident, GTid);
741  Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
742  Builder.CreateCall(
743  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_serialized_parallel),
744  SerializedParallelCallArgs);
745 
746  // OutlinedFn(&GTid, &zero, CapturedStruct);
747  CI->removeFromParent();
748  Builder.Insert(CI);
749 
750  // __kmpc_end_serialized_parallel(&Ident, GTid);
751  Value *EndArgs[] = {Ident, ThreadID};
752  Builder.CreateCall(
753  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_serialized_parallel),
754  EndArgs);
755 
756  LLVM_DEBUG(dbgs() << "With serialized parallel region: "
757  << *Builder.GetInsertBlock()->getParent() << "\n");
758  }
759 
760  for (Instruction *I : ToBeDeleted)
761  I->eraseFromParent();
762  };
763 
764  // Adjust the finalization stack, verify the adjustment, and call the
765  // finalize function a last time to finalize values between the pre-fini
766  // block and the exit block if we left the parallel "the normal way".
767  auto FiniInfo = FinalizationStack.pop_back_val();
768  (void)FiniInfo;
769  assert(FiniInfo.DK == OMPD_parallel &&
770  "Unexpected finalization stack state!");
771 
772  Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator();
773 
774  InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator());
775  FiniCB(PreFiniIP);
776 
777  OI.EntryBB = PRegEntryBB;
778  OI.ExitBB = PRegExitBB;
779 
780  SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
782  OI.collectBlocks(ParallelRegionBlockSet, Blocks);
783 
784  // Ensure a single exit node for the outlined region by creating one.
785  // We might have multiple incoming edges to the exit now due to finalizations,
786  // e.g., cancel calls that cause the control flow to leave the region.
787  BasicBlock *PRegOutlinedExitBB = PRegExitBB;
788  PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt());
789  PRegOutlinedExitBB->setName("omp.par.outlined.exit");
790  Blocks.push_back(PRegOutlinedExitBB);
791 
792  CodeExtractorAnalysisCache CEAC(*OuterFn);
793  CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
794  /* AggregateArgs */ false,
795  /* BlockFrequencyInfo */ nullptr,
796  /* BranchProbabilityInfo */ nullptr,
797  /* AssumptionCache */ nullptr,
798  /* AllowVarArgs */ true,
799  /* AllowAlloca */ true,
800  /* Suffix */ ".omp_par");
801 
802  // Find inputs to, outputs from the code region.
803  BasicBlock *CommonExit = nullptr;
804  SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
805  Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
806  Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
807 
808  LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n");
809 
810  FunctionCallee TIDRTLFn =
811  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);
812 
813  auto PrivHelper = [&](Value &V) {
814  if (&V == TIDAddr || &V == ZeroAddr)
815  return;
816 
818  for (Use &U : V.uses())
819  if (auto *UserI = dyn_cast<Instruction>(U.getUser()))
820  if (ParallelRegionBlockSet.count(UserI->getParent()))
821  Uses.insert(&U);
822 
823  // __kmpc_fork_call expects extra arguments as pointers. If the input
824  // already has a pointer type, everything is fine. Otherwise, store the
825  // value onto stack and load it back inside the to-be-outlined region. This
826  // will ensure only the pointer will be passed to the function.
827  // FIXME: if there are more than 15 trailing arguments, they must be
828  // additionally packed in a struct.
829  Value *Inner = &V;
830  if (!V.getType()->isPointerTy()) {
832  LLVM_DEBUG(llvm::dbgs() << "Forwarding input as pointer: " << V << "\n");
833 
834  Builder.restoreIP(OuterAllocaIP);
835  Value *Ptr =
836  Builder.CreateAlloca(V.getType(), nullptr, V.getName() + ".reloaded");
837 
838  // Store to stack at end of the block that currently branches to the entry
839  // block of the to-be-outlined region.
840  Builder.SetInsertPoint(InsertBB,
841  InsertBB->getTerminator()->getIterator());
842  Builder.CreateStore(&V, Ptr);
843 
844  // Load back next to allocations in the to-be-outlined region.
845  Builder.restoreIP(InnerAllocaIP);
846  Inner = Builder.CreateLoad(V.getType(), Ptr);
847  }
848 
849  Value *ReplacementValue = nullptr;
850  CallInst *CI = dyn_cast<CallInst>(&V);
851  if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) {
852  ReplacementValue = PrivTID;
853  } else {
854  Builder.restoreIP(
855  PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue));
856  assert(ReplacementValue &&
857  "Expected copy/create callback to set replacement value!");
858  if (ReplacementValue == &V)
859  return;
860  }
861 
862  for (Use *UPtr : Uses)
863  UPtr->set(ReplacementValue);
864  };
865 
866  // Reset the inner alloca insertion as it will be used for loading the values
867  // wrapped into pointers before passing them into the to-be-outlined region.
868  // Configure it to insert immediately after the fake use of zero address so
869  // that they are available in the generated body and so that the
870  // OpenMP-related values (thread ID and zero address pointers) remain leading
871  // in the argument list.
872  InnerAllocaIP = IRBuilder<>::InsertPoint(
873  ZeroAddrUse->getParent(), ZeroAddrUse->getNextNode()->getIterator());
874 
875  // Reset the outer alloca insertion point to the entry of the relevant block
876  // in case it was invalidated.
877  OuterAllocaIP = IRBuilder<>::InsertPoint(
878  OuterAllocaBlock, OuterAllocaBlock->getFirstInsertionPt());
879 
880  for (Value *Input : Inputs) {
881  LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
882  PrivHelper(*Input);
883  }
884  LLVM_DEBUG({
885  for (Value *Output : Outputs)
886  LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n");
887  });
888  assert(Outputs.empty() &&
889  "OpenMP outlining should not produce live-out values!");
890 
891  LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n");
892  LLVM_DEBUG({
893  for (auto *BB : Blocks)
894  dbgs() << " PBR: " << BB->getName() << "\n";
895  });
896 
897  // Register the outlined info.
898  addOutlineInfo(std::move(OI));
899 
900  InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
901  UI->eraseFromParent();
902 
903  return AfterIP;
904 }
905 
907  // Build call void __kmpc_flush(ident_t *loc)
908  uint32_t SrcLocStrSize;
909  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
910  Value *Args[] = {getOrCreateIdent(SrcLocStr, SrcLocStrSize)};
911 
912  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args);
913 }
914 
916  if (!updateToLocation(Loc))
917  return;
918  emitFlush(Loc);
919 }
920 
922  // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
923  // global_tid);
924  uint32_t SrcLocStrSize;
925  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
926  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
927  Value *Args[] = {Ident, getOrCreateThreadID(Ident)};
928 
929  // Ignore return result until untied tasks are supported.
930  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait),
931  Args);
932 }
933 
935  if (!updateToLocation(Loc))
936  return;
937  emitTaskwaitImpl(Loc);
938 }
939 
941  // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
942  uint32_t SrcLocStrSize;
943  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
944  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
945  Constant *I32Null = ConstantInt::getNullValue(Int32);
946  Value *Args[] = {Ident, getOrCreateThreadID(Ident), I32Null};
947 
948  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield),
949  Args);
950 }
951 
953  if (!updateToLocation(Loc))
954  return;
955  emitTaskyieldImpl(Loc);
956 }
957 
959  const LocationDescription &Loc, InsertPointTy AllocaIP,
961  FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait) {
962  if (!updateToLocation(Loc))
963  return Loc.IP;
964 
965  auto FiniCBWrapper = [&](InsertPointTy IP) {
966  if (IP.getBlock()->end() != IP.getPoint())
967  return FiniCB(IP);
968  // This must be done otherwise any nested constructs using FinalizeOMPRegion
969  // will fail because that function requires the Finalization Basic Block to
970  // have a terminator, which is already removed by EmitOMPRegionBody.
971  // IP is currently at cancelation block.
972  // We need to backtrack to the condition block to fetch
973  // the exit block and create a branch from cancelation
974  // to exit block.
976  Builder.restoreIP(IP);
977  auto *CaseBB = IP.getBlock()->getSinglePredecessor();
978  auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
979  auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
980  Instruction *I = Builder.CreateBr(ExitBB);
981  IP = InsertPointTy(I->getParent(), I->getIterator());
982  return FiniCB(IP);
983  };
984 
985  FinalizationStack.push_back({FiniCBWrapper, OMPD_sections, IsCancellable});
986 
987  // Each section is emitted as a switch case
988  // Each finalization callback is handled from clang.EmitOMPSectionDirective()
989  // -> OMP.createSection() which generates the IR for each section
990  // Iterate through all sections and emit a switch construct:
991  // switch (IV) {
992  // case 0:
993  // <SectionStmt[0]>;
994  // break;
995  // ...
996  // case <NumSection> - 1:
997  // <SectionStmt[<NumSection> - 1]>;
998  // break;
999  // }
1000  // ...
1001  // section_loop.after:
1002  // <FiniCB>;
1003  auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, Value *IndVar) {
1004  auto *CurFn = CodeGenIP.getBlock()->getParent();
1005  auto *ForIncBB = CodeGenIP.getBlock()->getSingleSuccessor();
1006  auto *ForExitBB = CodeGenIP.getBlock()
1007  ->getSinglePredecessor()
1008  ->getTerminator()
1009  ->getSuccessor(1);
1010  SwitchInst *SwitchStmt = Builder.CreateSwitch(IndVar, ForIncBB);
1011  Builder.restoreIP(CodeGenIP);
1012  unsigned CaseNumber = 0;
1013  for (auto SectionCB : SectionCBs) {
1014  auto *CaseBB = BasicBlock::Create(M.getContext(),
1015  "omp_section_loop.body.case", CurFn);
1016  SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB);
1017  Builder.SetInsertPoint(CaseBB);
1018  SectionCB(InsertPointTy(), Builder.saveIP(), *ForExitBB);
1019  CaseNumber++;
1020  }
1021  // remove the existing terminator from body BB since there can be no
1022  // terminators after switch/case
1023  CodeGenIP.getBlock()->getTerminator()->eraseFromParent();
1024  };
1025  // Loop body ends here
1026  // LowerBound, UpperBound, and STride for createCanonicalLoop
1027  Type *I32Ty = Type::getInt32Ty(M.getContext());
1028  Value *LB = ConstantInt::get(I32Ty, 0);
1029  Value *UB = ConstantInt::get(I32Ty, SectionCBs.size());
1030  Value *ST = ConstantInt::get(I32Ty, 1);
1031  llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop(
1032  Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop");
1033  Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator());
1034  AllocaIP = Builder.saveIP();
1035  InsertPointTy AfterIP =
1036  applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, !IsNowait);
1037  BasicBlock *LoopAfterBB = AfterIP.getBlock();
1038  Instruction *SplitPos = LoopAfterBB->getTerminator();
1039  if (!isa_and_nonnull<BranchInst>(SplitPos))
1040  SplitPos = new UnreachableInst(Builder.getContext(), LoopAfterBB);
1041  // ExitBB after LoopAfterBB because LoopAfterBB is used for FinalizationCB,
1042  // which requires a BB with branch
1043  BasicBlock *ExitBB =
1044  LoopAfterBB->splitBasicBlock(SplitPos, "omp_sections.end");
1045  SplitPos->eraseFromParent();
1046 
1047  // Apply the finalization callback in LoopAfterBB
1048  auto FiniInfo = FinalizationStack.pop_back_val();
1049  assert(FiniInfo.DK == OMPD_sections &&
1050  "Unexpected finalization stack state!");
1051  Builder.SetInsertPoint(LoopAfterBB->getTerminator());
1052  FiniInfo.FiniCB(Builder.saveIP());
1053  Builder.SetInsertPoint(ExitBB);
1054 
1055  return Builder.saveIP();
1056 }
1057 
1060  BodyGenCallbackTy BodyGenCB,
1061  FinalizeCallbackTy FiniCB) {
1062  if (!updateToLocation(Loc))
1063  return Loc.IP;
1064 
1065  auto FiniCBWrapper = [&](InsertPointTy IP) {
1066  if (IP.getBlock()->end() != IP.getPoint())
1067  return FiniCB(IP);
1068  // This must be done otherwise any nested constructs using FinalizeOMPRegion
1069  // will fail because that function requires the Finalization Basic Block to
1070  // have a terminator, which is already removed by EmitOMPRegionBody.
1071  // IP is currently at cancelation block.
1072  // We need to backtrack to the condition block to fetch
1073  // the exit block and create a branch from cancelation
1074  // to exit block.
1076  Builder.restoreIP(IP);
1077  auto *CaseBB = Loc.IP.getBlock();
1078  auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
1079  auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
1080  Instruction *I = Builder.CreateBr(ExitBB);
1081  IP = InsertPointTy(I->getParent(), I->getIterator());
1082  return FiniCB(IP);
1083  };
1084 
1085  Directive OMPD = Directive::OMPD_sections;
1086  // Since we are using Finalization Callback here, HasFinalize
1087  // and IsCancellable have to be true
1088  return EmitOMPInlinedRegion(OMPD, nullptr, nullptr, BodyGenCB, FiniCBWrapper,
1089  /*Conditional*/ false, /*hasFinalize*/ true,
1090  /*IsCancellable*/ true);
1091 }
1092 
1093 /// Create a function with a unique name and a "void (i8*, i8*)" signature in
1094 /// the given module and return it.
1096  Type *VoidTy = Type::getVoidTy(M.getContext());
1097  Type *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
1098  auto *FuncTy =
1099  FunctionType::get(VoidTy, {Int8PtrTy, Int8PtrTy}, /* IsVarArg */ false);
1101  M.getDataLayout().getDefaultGlobalsAddressSpace(),
1102  ".omp.reduction.func", &M);
1103 }
1104 
1106  const LocationDescription &Loc, InsertPointTy AllocaIP,
1107  ArrayRef<ReductionInfo> ReductionInfos, bool IsNoWait) {
1108  for (const ReductionInfo &RI : ReductionInfos) {
1109  (void)RI;
1110  assert(RI.Variable && "expected non-null variable");
1111  assert(RI.PrivateVariable && "expected non-null private variable");
1112  assert(RI.ReductionGen && "expected non-null reduction generator callback");
1113  assert(RI.Variable->getType() == RI.PrivateVariable->getType() &&
1114  "expected variables and their private equivalents to have the same "
1115  "type");
1116  assert(RI.Variable->getType()->isPointerTy() &&
1117  "expected variables to be pointers");
1118  }
1119 
1120  if (!updateToLocation(Loc))
1121  return InsertPointTy();
1122 
1123  BasicBlock *InsertBlock = Loc.IP.getBlock();
1124  BasicBlock *ContinuationBlock =
1125  InsertBlock->splitBasicBlock(Loc.IP.getPoint(), "reduce.finalize");
1126  InsertBlock->getTerminator()->eraseFromParent();
1127 
1128  // Create and populate array of type-erased pointers to private reduction
1129  // values.
1130  unsigned NumReductions = ReductionInfos.size();
1131  Type *RedArrayTy = ArrayType::get(Builder.getInt8PtrTy(), NumReductions);
1132  Builder.restoreIP(AllocaIP);
1133  Value *RedArray = Builder.CreateAlloca(RedArrayTy, nullptr, "red.array");
1134 
1135  Builder.SetInsertPoint(InsertBlock, InsertBlock->end());
1136 
1137  for (auto En : enumerate(ReductionInfos)) {
1138  unsigned Index = En.index();
1139  const ReductionInfo &RI = En.value();
1140  Value *RedArrayElemPtr = Builder.CreateConstInBoundsGEP2_64(
1141  RedArrayTy, RedArray, 0, Index, "red.array.elem." + Twine(Index));
1142  Value *Casted =
1143  Builder.CreateBitCast(RI.PrivateVariable, Builder.getInt8PtrTy(),
1144  "private.red.var." + Twine(Index) + ".casted");
1145  Builder.CreateStore(Casted, RedArrayElemPtr);
1146  }
1147 
1148  // Emit a call to the runtime function that orchestrates the reduction.
1149  // Declare the reduction function in the process.
1150  Function *Func = Builder.GetInsertBlock()->getParent();
1151  Module *Module = Func->getParent();
1152  Value *RedArrayPtr =
1153  Builder.CreateBitCast(RedArray, Builder.getInt8PtrTy(), "red.array.ptr");
1154  uint32_t SrcLocStrSize;
1155  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
1156  bool CanGenerateAtomic =
1157  llvm::all_of(ReductionInfos, [](const ReductionInfo &RI) {
1158  return RI.AtomicReductionGen;
1159  });
1160  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize,
1161  CanGenerateAtomic
1162  ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
1163  : IdentFlag(0));
1164  Value *ThreadId = getOrCreateThreadID(Ident);
1165  Constant *NumVariables = Builder.getInt32(NumReductions);
1166  const DataLayout &DL = Module->getDataLayout();
1167  unsigned RedArrayByteSize = DL.getTypeStoreSize(RedArrayTy);
1168  Constant *RedArraySize = Builder.getInt64(RedArrayByteSize);
1169  Function *ReductionFunc = getFreshReductionFunc(*Module);
1170  Value *Lock = getOMPCriticalRegionLock(".reduction");
1171  Function *ReduceFunc = getOrCreateRuntimeFunctionPtr(
1172  IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
1173  : RuntimeFunction::OMPRTL___kmpc_reduce);
1174  CallInst *ReduceCall =
1175  Builder.CreateCall(ReduceFunc,
1176  {Ident, ThreadId, NumVariables, RedArraySize,
1177  RedArrayPtr, ReductionFunc, Lock},
1178  "reduce");
1179 
1180  // Create final reduction entry blocks for the atomic and non-atomic case.
1181  // Emit IR that dispatches control flow to one of the blocks based on the
1182  // reduction supporting the atomic mode.
1183  BasicBlock *NonAtomicRedBlock =
1184  BasicBlock::Create(Module->getContext(), "reduce.switch.nonatomic", Func);
1185  BasicBlock *AtomicRedBlock =
1186  BasicBlock::Create(Module->getContext(), "reduce.switch.atomic", Func);
1187  SwitchInst *Switch =
1188  Builder.CreateSwitch(ReduceCall, ContinuationBlock, /* NumCases */ 2);
1189  Switch->addCase(Builder.getInt32(1), NonAtomicRedBlock);
1190  Switch->addCase(Builder.getInt32(2), AtomicRedBlock);
1191 
1192  // Populate the non-atomic reduction using the elementwise reduction function.
1193  // This loads the elements from the global and private variables and reduces
1194  // them before storing back the result to the global variable.
1195  Builder.SetInsertPoint(NonAtomicRedBlock);
1196  for (auto En : enumerate(ReductionInfos)) {
1197  const ReductionInfo &RI = En.value();
1198  Type *ValueType = RI.ElementType;
1199  Value *RedValue = Builder.CreateLoad(ValueType, RI.Variable,
1200  "red.value." + Twine(En.index()));
1201  Value *PrivateRedValue =
1202  Builder.CreateLoad(ValueType, RI.PrivateVariable,
1203  "red.private.value." + Twine(En.index()));
1204  Value *Reduced;
1205  Builder.restoreIP(
1206  RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced));
1207  if (!Builder.GetInsertBlock())
1208  return InsertPointTy();
1209  Builder.CreateStore(Reduced, RI.Variable);
1210  }
1211  Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr(
1212  IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
1213  : RuntimeFunction::OMPRTL___kmpc_end_reduce);
1214  Builder.CreateCall(EndReduceFunc, {Ident, ThreadId, Lock});
1215  Builder.CreateBr(ContinuationBlock);
1216 
1217  // Populate the atomic reduction using the atomic elementwise reduction
1218  // function. There are no loads/stores here because they will be happening
1219  // inside the atomic elementwise reduction.
1220  Builder.SetInsertPoint(AtomicRedBlock);
1221  if (CanGenerateAtomic) {
1222  for (const ReductionInfo &RI : ReductionInfos) {
1223  Builder.restoreIP(RI.AtomicReductionGen(Builder.saveIP(), RI.ElementType,
1224  RI.Variable, RI.PrivateVariable));
1225  if (!Builder.GetInsertBlock())
1226  return InsertPointTy();
1227  }
1228  Builder.CreateBr(ContinuationBlock);
1229  } else {
1230  Builder.CreateUnreachable();
1231  }
1232 
1233  // Populate the outlined reduction function using the elementwise reduction
1234  // function. Partial values are extracted from the type-erased array of
1235  // pointers to private variables.
1236  BasicBlock *ReductionFuncBlock =
1237  BasicBlock::Create(Module->getContext(), "", ReductionFunc);
1238  Builder.SetInsertPoint(ReductionFuncBlock);
1239  Value *LHSArrayPtr = Builder.CreateBitCast(ReductionFunc->getArg(0),
1240  RedArrayTy->getPointerTo());
1241  Value *RHSArrayPtr = Builder.CreateBitCast(ReductionFunc->getArg(1),
1242  RedArrayTy->getPointerTo());
1243  for (auto En : enumerate(ReductionInfos)) {
1244  const ReductionInfo &RI = En.value();
1245  Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
1246  RedArrayTy, LHSArrayPtr, 0, En.index());
1247  Value *LHSI8Ptr = Builder.CreateLoad(Builder.getInt8PtrTy(), LHSI8PtrPtr);
1248  Value *LHSPtr = Builder.CreateBitCast(LHSI8Ptr, RI.Variable->getType());
1249  Value *LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
1250  Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
1251  RedArrayTy, RHSArrayPtr, 0, En.index());
1252  Value *RHSI8Ptr = Builder.CreateLoad(Builder.getInt8PtrTy(), RHSI8PtrPtr);
1253  Value *RHSPtr =
1254  Builder.CreateBitCast(RHSI8Ptr, RI.PrivateVariable->getType());
1255  Value *RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
1256  Value *Reduced;
1257  Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced));
1258  if (!Builder.GetInsertBlock())
1259  return InsertPointTy();
1260  Builder.CreateStore(Reduced, LHSPtr);
1261  }
1262  Builder.CreateRetVoid();
1263 
1264  Builder.SetInsertPoint(ContinuationBlock);
1265  return Builder.saveIP();
1266 }
1267 
1270  BodyGenCallbackTy BodyGenCB,
1271  FinalizeCallbackTy FiniCB) {
1272 
1273  if (!updateToLocation(Loc))
1274  return Loc.IP;
1275 
1276  Directive OMPD = Directive::OMPD_master;
1277  uint32_t SrcLocStrSize;
1278  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
1279  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1280  Value *ThreadId = getOrCreateThreadID(Ident);
1281  Value *Args[] = {Ident, ThreadId};
1282 
1283  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master);
1284  Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
1285 
1286  Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master);
1287  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
1288 
1289  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
1290  /*Conditional*/ true, /*hasFinalize*/ true);
1291 }
1292 
1295  BodyGenCallbackTy BodyGenCB,
1296  FinalizeCallbackTy FiniCB, Value *Filter) {
1297  if (!updateToLocation(Loc))
1298  return Loc.IP;
1299 
1300  Directive OMPD = Directive::OMPD_masked;
1301  uint32_t SrcLocStrSize;
1302  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
1303  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1304  Value *ThreadId = getOrCreateThreadID(Ident);
1305  Value *Args[] = {Ident, ThreadId, Filter};
1306  Value *ArgsEnd[] = {Ident, ThreadId};
1307 
1308  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked);
1309  Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
1310 
1311  Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked);
1312  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, ArgsEnd);
1313 
1314  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
1315  /*Conditional*/ true, /*hasFinalize*/ true);
1316 }
1317 
1319  DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore,
1320  BasicBlock *PostInsertBefore, const Twine &Name) {
1321  Module *M = F->getParent();
1322  LLVMContext &Ctx = M->getContext();
1323  Type *IndVarTy = TripCount->getType();
1324 
1325  // Create the basic block structure.
1326  BasicBlock *Preheader =
1327  BasicBlock::Create(Ctx, "omp_" + Name + ".preheader", F, PreInsertBefore);
1328  BasicBlock *Header =
1329  BasicBlock::Create(Ctx, "omp_" + Name + ".header", F, PreInsertBefore);
1330  BasicBlock *Cond =
1331  BasicBlock::Create(Ctx, "omp_" + Name + ".cond", F, PreInsertBefore);
1332  BasicBlock *Body =
1333  BasicBlock::Create(Ctx, "omp_" + Name + ".body", F, PreInsertBefore);
1334  BasicBlock *Latch =
1335  BasicBlock::Create(Ctx, "omp_" + Name + ".inc", F, PostInsertBefore);
1336  BasicBlock *Exit =
1337  BasicBlock::Create(Ctx, "omp_" + Name + ".exit", F, PostInsertBefore);
1338  BasicBlock *After =
1339  BasicBlock::Create(Ctx, "omp_" + Name + ".after", F, PostInsertBefore);
1340 
1341  // Use specified DebugLoc for new instructions.
1342  Builder.SetCurrentDebugLocation(DL);
1343 
1344  Builder.SetInsertPoint(Preheader);
1345  Builder.CreateBr(Header);
1346 
1347  Builder.SetInsertPoint(Header);
1348  PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2, "omp_" + Name + ".iv");
1349  IndVarPHI->addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
1350  Builder.CreateBr(Cond);
1351 
1352  Builder.SetInsertPoint(Cond);
1353  Value *Cmp =
1354  Builder.CreateICmpULT(IndVarPHI, TripCount, "omp_" + Name + ".cmp");
1355  Builder.CreateCondBr(Cmp, Body, Exit);
1356 
1357  Builder.SetInsertPoint(Body);
1358  Builder.CreateBr(Latch);
1359 
1360  Builder.SetInsertPoint(Latch);
1361  Value *Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1),
1362  "omp_" + Name + ".next", /*HasNUW=*/true);
1363  Builder.CreateBr(Header);
1364  IndVarPHI->addIncoming(Next, Latch);
1365 
1366  Builder.SetInsertPoint(Exit);
1367  Builder.CreateBr(After);
1368 
1369  // Remember and return the canonical control flow.
1370  LoopInfos.emplace_front();
1371  CanonicalLoopInfo *CL = &LoopInfos.front();
1372 
1373  CL->Header = Header;
1374  CL->Cond = Cond;
1375  CL->Latch = Latch;
1376  CL->Exit = Exit;
1377 
1378 #ifndef NDEBUG
1379  CL->assertOK();
1380 #endif
1381  return CL;
1382 }
1383 
1386  LoopBodyGenCallbackTy BodyGenCB,
1387  Value *TripCount, const Twine &Name) {
1388  BasicBlock *BB = Loc.IP.getBlock();
1389  BasicBlock *NextBB = BB->getNextNode();
1390 
1391  CanonicalLoopInfo *CL = createLoopSkeleton(Loc.DL, TripCount, BB->getParent(),
1392  NextBB, NextBB, Name);
1393  BasicBlock *After = CL->getAfter();
1394 
1395  // If location is not set, don't connect the loop.
1396  if (updateToLocation(Loc)) {
1397  // Split the loop at the insertion point: Branch to the preheader and move
1398  // every following instruction to after the loop (the After BB). Also, the
1399  // new successor is the loop's after block.
1400  Builder.CreateBr(CL->getPreheader());
1401  After->getInstList().splice(After->begin(), BB->getInstList(),
1402  Builder.GetInsertPoint(), BB->end());
1403  After->replaceSuccessorsPhiUsesWith(BB, After);
1404  }
1405 
1406  // Emit the body content. We do it after connecting the loop to the CFG to
1407  // avoid that the callback encounters degenerate BBs.
1408  BodyGenCB(CL->getBodyIP(), CL->getIndVar());
1409 
1410 #ifndef NDEBUG
1411  CL->assertOK();
1412 #endif
1413  return CL;
1414 }
1415 
1417  const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
1418  Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
1419  InsertPointTy ComputeIP, const Twine &Name) {
1420 
1421  // Consider the following difficulties (assuming 8-bit signed integers):
1422  // * Adding \p Step to the loop counter which passes \p Stop may overflow:
1423  // DO I = 1, 100, 50
1424  /// * A \p Step of INT_MIN cannot not be normalized to a positive direction:
1425  // DO I = 100, 0, -128
1426 
1427  // Start, Stop and Step must be of the same integer type.
1428  auto *IndVarTy = cast<IntegerType>(Start->getType());
1429  assert(IndVarTy == Stop->getType() && "Stop type mismatch");
1430  assert(IndVarTy == Step->getType() && "Step type mismatch");
1431 
1432  LocationDescription ComputeLoc =
1433  ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc;
1434  updateToLocation(ComputeLoc);
1435 
1436  ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
1437  ConstantInt *One = ConstantInt::get(IndVarTy, 1);
1438 
1439  // Like Step, but always positive.
1440  Value *Incr = Step;
1441 
1442  // Distance between Start and Stop; always positive.
1443  Value *Span;
1444 
1445  // Condition whether there are no iterations are executed at all, e.g. because
1446  // UB < LB.
1447  Value *ZeroCmp;
1448 
1449  if (IsSigned) {
1450  // Ensure that increment is positive. If not, negate and invert LB and UB.
1451  Value *IsNeg = Builder.CreateICmpSLT(Step, Zero);
1452  Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step);
1453  Value *LB = Builder.CreateSelect(IsNeg, Stop, Start);
1454  Value *UB = Builder.CreateSelect(IsNeg, Start, Stop);
1455  Span = Builder.CreateSub(UB, LB, "", false, true);
1456  ZeroCmp = Builder.CreateICmp(
1457  InclusiveStop ? CmpInst::ICMP_SLT : CmpInst::ICMP_SLE, UB, LB);
1458  } else {
1459  Span = Builder.CreateSub(Stop, Start, "", true);
1460  ZeroCmp = Builder.CreateICmp(
1461  InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Stop, Start);
1462  }
1463 
1464  Value *CountIfLooping;
1465  if (InclusiveStop) {
1466  CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One);
1467  } else {
1468  // Avoid incrementing past stop since it could overflow.
1469  Value *CountIfTwo = Builder.CreateAdd(
1470  Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One);
1471  Value *OneCmp = Builder.CreateICmp(
1472  InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Span, Incr);
1473  CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo);
1474  }
1475  Value *TripCount = Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
1476  "omp_" + Name + ".tripcount");
1477 
1478  auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) {
1479  Builder.restoreIP(CodeGenIP);
1480  Value *Span = Builder.CreateMul(IV, Step);
1481  Value *IndVar = Builder.CreateAdd(Span, Start);
1482  BodyGenCB(Builder.saveIP(), IndVar);
1483  };
1484  LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP();
1485  return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
1486 }
1487 
1488 // Returns an LLVM function to call for initializing loop bounds using OpenMP
1489 // static scheduling depending on `type`. Only i32 and i64 are supported by the
1490 // runtime. Always interpret integers as unsigned similarly to
1491 // CanonicalLoopInfo.
1493  OpenMPIRBuilder &OMPBuilder) {
1494  unsigned Bitwidth = Ty->getIntegerBitWidth();
1495  if (Bitwidth == 32)
1496  return OMPBuilder.getOrCreateRuntimeFunction(
1497  M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
1498  if (Bitwidth == 64)
1499  return OMPBuilder.getOrCreateRuntimeFunction(
1500  M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
1501  llvm_unreachable("unknown OpenMP loop iterator bitwidth");
1502 }
1503 
1504 // Sets the number of loop iterations to the given value. This value must be
1505 // valid in the condition block (i.e., defined in the preheader) and is
1506 // interpreted as an unsigned integer.
1508  Instruction *CmpI = &CLI->getCond()->front();
1509  assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
1510  CmpI->setOperand(1, TripCount);
1511  CLI->assertOK();
1512 }
1513 
1516  InsertPointTy AllocaIP,
1517  bool NeedsBarrier, Value *Chunk) {
1518  assert(CLI->isValid() && "Requires a valid canonical loop");
1519  assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) &&
1520  "Require dedicated allocate IP");
1521 
1522  // Set up the source location value for OpenMP runtime.
1523  Builder.restoreIP(CLI->getPreheaderIP());
1524  Builder.SetCurrentDebugLocation(DL);
1525 
1526  uint32_t SrcLocStrSize;
1527  Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize);
1528  Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1529 
1530  // Declare useful OpenMP runtime functions.
1531  Value *IV = CLI->getIndVar();
1532  Type *IVTy = IV->getType();
1533  FunctionCallee StaticInit = getKmpcForStaticInitForType(IVTy, M, *this);
1534  FunctionCallee StaticFini =
1535  getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
1536 
1537  // Allocate space for computed loop bounds as expected by the "init" function.
1538  Builder.restoreIP(AllocaIP);
1539  Type *I32Type = Type::getInt32Ty(M.getContext());
1540  Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
1541  Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
1542  Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound");
1543  Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride");
1544 
1545  // At the end of the preheader, prepare for calling the "init" function by
1546  // storing the current loop bounds into the allocated space. A canonical loop
1547  // always iterates from 0 to trip-count with step 1. Note that "init" expects
1548  // and produces an inclusive upper bound.
1549  Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
1550  Constant *Zero = ConstantInt::get(IVTy, 0);
1551  Constant *One = ConstantInt::get(IVTy, 1);
1552  Builder.CreateStore(Zero, PLowerBound);
1553  Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One);
1554  Builder.CreateStore(UpperBound, PUpperBound);
1555  Builder.CreateStore(One, PStride);
1556 
1557  // FIXME: schedule(static) is NOT the same as schedule(static,1)
1558  if (!Chunk)
1559  Chunk = One;
1560 
1561  Value *ThreadNum = getOrCreateThreadID(SrcLoc);
1562 
1563  Constant *SchedulingType =
1564  ConstantInt::get(I32Type, static_cast<int>(OMPScheduleType::Static));
1565 
1566  // Call the "init" function and update the trip count of the loop with the
1567  // value it produced.
1568  Builder.CreateCall(StaticInit,
1569  {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
1570  PUpperBound, PStride, One, Chunk});
1571  Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
1572  Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
1573  Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
1574  Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
1575  setCanonicalLoopTripCount(CLI, TripCount);
1576 
1577  // Update all uses of the induction variable except the one in the condition
1578  // block that compares it with the actual upper bound, and the increment in
1579  // the latch block.
1580  // TODO: this can eventually move to CanonicalLoopInfo or to a new
1581  // CanonicalLoopInfoUpdater interface.
1582  Builder.SetInsertPoint(CLI->getBody(), CLI->getBody()->getFirstInsertionPt());
1583  Value *UpdatedIV = Builder.CreateAdd(IV, LowerBound);
1584  IV->replaceUsesWithIf(UpdatedIV, [&](Use &U) {
1585  auto *Instr = dyn_cast<Instruction>(U.getUser());
1586  return !Instr ||
1587  (Instr->getParent() != CLI->getCond() &&
1588  Instr->getParent() != CLI->getLatch() && Instr != UpdatedIV);
1589  });
1590 
1591  // In the "exit" block, call the "fini" function.
1592  Builder.SetInsertPoint(CLI->getExit(),
1593  CLI->getExit()->getTerminator()->getIterator());
1594  Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
1595 
1596  // Add the barrier if requested.
1597  if (NeedsBarrier)
1598  createBarrier(LocationDescription(Builder.saveIP(), DL),
1599  omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
1600  /* CheckCancelFlag */ false);
1601 
1602  InsertPointTy AfterIP = CLI->getAfterIP();
1603  CLI->invalidate();
1604 
1605  return AfterIP;
1606 }
1607 
1610  InsertPointTy AllocaIP, bool NeedsBarrier) {
1611  // Currently only supports static schedules.
1612  return applyStaticWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier);
1613 }
1614 
1615 /// Returns an LLVM function to call for initializing loop bounds using OpenMP
1616 /// dynamic scheduling depending on `type`. Only i32 and i64 are supported by
1617 /// the runtime. Always interpret integers as unsigned similarly to
1618 /// CanonicalLoopInfo.
1619 static FunctionCallee
1621  unsigned Bitwidth = Ty->getIntegerBitWidth();
1622  if (Bitwidth == 32)
1623  return OMPBuilder.getOrCreateRuntimeFunction(
1624  M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
1625  if (Bitwidth == 64)
1626  return OMPBuilder.getOrCreateRuntimeFunction(
1627  M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
1628  llvm_unreachable("unknown OpenMP loop iterator bitwidth");
1629 }
1630 
1631 /// Returns an LLVM function to call for updating the next loop using OpenMP
1632 /// dynamic scheduling depending on `type`. Only i32 and i64 are supported by
1633 /// the runtime. Always interpret integers as unsigned similarly to
1634 /// CanonicalLoopInfo.
1635 static FunctionCallee
1637  unsigned Bitwidth = Ty->getIntegerBitWidth();
1638  if (Bitwidth == 32)
1639  return OMPBuilder.getOrCreateRuntimeFunction(
1640  M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
1641  if (Bitwidth == 64)
1642  return OMPBuilder.getOrCreateRuntimeFunction(
1643  M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
1644  llvm_unreachable("unknown OpenMP loop iterator bitwidth");
1645 }
1646 
1648  DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
1649  OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk) {
1650  assert(CLI->isValid() && "Requires a valid canonical loop");
1651  assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) &&
1652  "Require dedicated allocate IP");
1653 
1654  // Set up the source location value for OpenMP runtime.
1655  Builder.SetCurrentDebugLocation(DL);
1656 
1657  uint32_t SrcLocStrSize;
1658  Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize);
1659  Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1660 
1661  // Declare useful OpenMP runtime functions.
1662  Value *IV = CLI->getIndVar();
1663  Type *IVTy = IV->getType();
1664  FunctionCallee DynamicInit = getKmpcForDynamicInitForType(IVTy, M, *this);
1665  FunctionCallee DynamicNext = getKmpcForDynamicNextForType(IVTy, M, *this);
1666 
1667  // Allocate space for computed loop bounds as expected by the "init" function.
1668  Builder.restoreIP(AllocaIP);
1669  Type *I32Type = Type::getInt32Ty(M.getContext());
1670  Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
1671  Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
1672  Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound");
1673  Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride");
1674 
1675  // At the end of the preheader, prepare for calling the "init" function by
1676  // storing the current loop bounds into the allocated space. A canonical loop
1677  // always iterates from 0 to trip-count with step 1. Note that "init" expects
1678  // and produces an inclusive upper bound.
1679  BasicBlock *PreHeader = CLI->getPreheader();
1680  Builder.SetInsertPoint(PreHeader->getTerminator());
1681  Constant *One = ConstantInt::get(IVTy, 1);
1682  Builder.CreateStore(One, PLowerBound);
1683  Value *UpperBound = CLI->getTripCount();
1684  Builder.CreateStore(UpperBound, PUpperBound);
1685  Builder.CreateStore(One, PStride);
1686 
1687  BasicBlock *Header = CLI->getHeader();
1688  BasicBlock *Exit = CLI->getExit();
1689  BasicBlock *Cond = CLI->getCond();
1690  InsertPointTy AfterIP = CLI->getAfterIP();
1691 
1692  // The CLI will be "broken" in the code below, as the loop is no longer
1693  // a valid canonical loop.
1694 
1695  if (!Chunk)
1696  Chunk = One;
1697 
1698  Value *ThreadNum = getOrCreateThreadID(SrcLoc);
1699 
1700  Constant *SchedulingType =
1701  ConstantInt::get(I32Type, static_cast<int>(SchedType));
1702 
1703  // Call the "init" function.
1704  Builder.CreateCall(DynamicInit,
1705  {SrcLoc, ThreadNum, SchedulingType, /* LowerBound */ One,
1706  UpperBound, /* step */ One, Chunk});
1707 
1708  // An outer loop around the existing one.
1709  BasicBlock *OuterCond = BasicBlock::Create(
1710  PreHeader->getContext(), Twine(PreHeader->getName()) + ".outer.cond",
1711  PreHeader->getParent());
1712  // This needs to be 32-bit always, so can't use the IVTy Zero above.
1713  Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
1714  Value *Res =
1715  Builder.CreateCall(DynamicNext, {SrcLoc, ThreadNum, PLastIter,
1716  PLowerBound, PUpperBound, PStride});
1717  Constant *Zero32 = ConstantInt::get(I32Type, 0);
1718  Value *MoreWork = Builder.CreateCmp(CmpInst::ICMP_NE, Res, Zero32);
1719  Value *LowerBound =
1720  Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One, "lb");
1721  Builder.CreateCondBr(MoreWork, Header, Exit);
1722 
1723  // Change PHI-node in loop header to use outer cond rather than preheader,
1724  // and set IV to the LowerBound.
1725  Instruction *Phi = &Header->front();
1726  auto *PI = cast<PHINode>(Phi);
1727  PI->setIncomingBlock(0, OuterCond);
1728  PI->setIncomingValue(0, LowerBound);
1729 
1730  // Then set the pre-header to jump to the OuterCond
1731  Instruction *Term = PreHeader->getTerminator();
1732  auto *Br = cast<BranchInst>(Term);
1733  Br->setSuccessor(0, OuterCond);
1734 
1735  // Modify the inner condition:
1736  // * Use the UpperBound returned from the DynamicNext call.
1737  // * jump to the loop outer loop when done with one of the inner loops.
1738  Builder.SetInsertPoint(Cond, Cond->getFirstInsertionPt());
1739  UpperBound = Builder.CreateLoad(IVTy, PUpperBound, "ub");
1740  Instruction *Comp = &*Builder.GetInsertPoint();
1741  auto *CI = cast<CmpInst>(Comp);
1742  CI->setOperand(1, UpperBound);
1743  // Redirect the inner exit to branch to outer condition.
1744  Instruction *Branch = &Cond->back();
1745  auto *BI = cast<BranchInst>(Branch);
1746  assert(BI->getSuccessor(1) == Exit);
1747  BI->setSuccessor(1, OuterCond);
1748 
1749  // Add the barrier if requested.
1750  if (NeedsBarrier) {
1751  Builder.SetInsertPoint(&Exit->back());
1752  createBarrier(LocationDescription(Builder.saveIP(), DL),
1753  omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
1754  /* CheckCancelFlag */ false);
1755  }
1756 
1757  CLI->invalidate();
1758  return AfterIP;
1759 }
1760 
1761 /// Make \p Source branch to \p Target.
1762 ///
1763 /// Handles two situations:
1764 /// * \p Source already has an unconditional branch.
1765 /// * \p Source is a degenerate block (no terminator because the BB is
1766 /// the current head of the IR construction).
1768  if (Instruction *Term = Source->getTerminator()) {
1769  auto *Br = cast<BranchInst>(Term);
1770  assert(!Br->isConditional() &&
1771  "BB's terminator must be an unconditional branch (or degenerate)");
1772  BasicBlock *Succ = Br->getSuccessor(0);
1773  Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true);
1774  Br->setSuccessor(0, Target);
1775  return;
1776  }
1777 
1778  auto *NewBr = BranchInst::Create(Target, Source);
1779  NewBr->setDebugLoc(DL);
1780 }
1781 
1782 /// Redirect all edges that branch to \p OldTarget to \p NewTarget. That is,
1783 /// after this \p OldTarget will be orphaned.
1784 static void redirectAllPredecessorsTo(BasicBlock *OldTarget,
1785  BasicBlock *NewTarget, DebugLoc DL) {
1786  for (BasicBlock *Pred : make_early_inc_range(predecessors(OldTarget)))
1787  redirectTo(Pred, NewTarget, DL);
1788 }
1789 
1790 /// Determine which blocks in \p BBs are reachable from outside and remove the
1791 /// ones that are not reachable from the function.
1793  SmallPtrSet<BasicBlock *, 6> BBsToErase{BBs.begin(), BBs.end()};
1794  auto HasRemainingUses = [&BBsToErase](BasicBlock *BB) {
1795  for (Use &U : BB->uses()) {
1796  auto *UseInst = dyn_cast<Instruction>(U.getUser());
1797  if (!UseInst)
1798  continue;
1799  if (BBsToErase.count(UseInst->getParent()))
1800  continue;
1801  return true;
1802  }
1803  return false;
1804  };
1805 
1806  while (true) {
1807  bool Changed = false;
1808  for (BasicBlock *BB : make_early_inc_range(BBsToErase)) {
1809  if (HasRemainingUses(BB)) {
1810  BBsToErase.erase(BB);
1811  Changed = true;
1812  }
1813  }
1814  if (!Changed)
1815  break;
1816  }
1817 
1818  SmallVector<BasicBlock *, 7> BBVec(BBsToErase.begin(), BBsToErase.end());
1819  DeleteDeadBlocks(BBVec);
1820 }
1821 
1824  InsertPointTy ComputeIP) {
1825  assert(Loops.size() >= 1 && "At least one loop required");
1826  size_t NumLoops = Loops.size();
1827 
1828  // Nothing to do if there is already just one loop.
1829  if (NumLoops == 1)
1830  return Loops.front();
1831 
1832  CanonicalLoopInfo *Outermost = Loops.front();
1833  CanonicalLoopInfo *Innermost = Loops.back();
1834  BasicBlock *OrigPreheader = Outermost->getPreheader();
1835  BasicBlock *OrigAfter = Outermost->getAfter();
1836  Function *F = OrigPreheader->getParent();
1837 
1838  // Loop control blocks that may become orphaned later.
1839  SmallVector<BasicBlock *, 12> OldControlBBs;
1840  OldControlBBs.reserve(6 * Loops.size());
1841  for (CanonicalLoopInfo *Loop : Loops)
1842  Loop->collectControlBlocks(OldControlBBs);
1843 
1844  // Setup the IRBuilder for inserting the trip count computation.
1845  Builder.SetCurrentDebugLocation(DL);
1846  if (ComputeIP.isSet())
1847  Builder.restoreIP(ComputeIP);
1848  else
1849  Builder.restoreIP(Outermost->getPreheaderIP());
1850 
1851  // Derive the collapsed' loop trip count.
1852  // TODO: Find common/largest indvar type.
1853  Value *CollapsedTripCount = nullptr;
1854  for (CanonicalLoopInfo *L : Loops) {
1855  assert(L->isValid() &&
1856  "All loops to collapse must be valid canonical loops");
1857  Value *OrigTripCount = L->getTripCount();
1858  if (!CollapsedTripCount) {
1859  CollapsedTripCount = OrigTripCount;
1860  continue;
1861  }
1862 
1863  // TODO: Enable UndefinedSanitizer to diagnose an overflow here.
1864  CollapsedTripCount = Builder.CreateMul(CollapsedTripCount, OrigTripCount,
1865  {}, /*HasNUW=*/true);
1866  }
1867 
1868  // Create the collapsed loop control flow.
1869  CanonicalLoopInfo *Result =
1870  createLoopSkeleton(DL, CollapsedTripCount, F,
1871  OrigPreheader->getNextNode(), OrigAfter, "collapsed");
1872 
1873  // Build the collapsed loop body code.
1874  // Start with deriving the input loop induction variables from the collapsed
1875  // one, using a divmod scheme. To preserve the original loops' order, the
1876  // innermost loop use the least significant bits.
1877  Builder.restoreIP(Result->getBodyIP());
1878 
1879  Value *Leftover = Result->getIndVar();
1880  SmallVector<Value *> NewIndVars;
1881  NewIndVars.resize(NumLoops);
1882  for (int i = NumLoops - 1; i >= 1; --i) {
1883  Value *OrigTripCount = Loops[i]->getTripCount();
1884 
1885  Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount);
1886  NewIndVars[i] = NewIndVar;
1887 
1888  Leftover = Builder.CreateUDiv(Leftover, OrigTripCount);
1889  }
1890  // Outermost loop gets all the remaining bits.
1891  NewIndVars[0] = Leftover;
1892 
1893  // Construct the loop body control flow.
1894  // We progressively construct the branch structure following in direction of
1895  // the control flow, from the leading in-between code, the loop nest body, the
1896  // trailing in-between code, and rejoining the collapsed loop's latch.
1897  // ContinueBlock and ContinuePred keep track of the source(s) of next edge. If
1898  // the ContinueBlock is set, continue with that block. If ContinuePred, use
1899  // its predecessors as sources.
1900  BasicBlock *ContinueBlock = Result->getBody();
1901  BasicBlock *ContinuePred = nullptr;
1902  auto ContinueWith = [&ContinueBlock, &ContinuePred, DL](BasicBlock *Dest,
1903  BasicBlock *NextSrc) {
1904  if (ContinueBlock)
1905  redirectTo(ContinueBlock, Dest, DL);
1906  else
1907  redirectAllPredecessorsTo(ContinuePred, Dest, DL);
1908 
1909  ContinueBlock = nullptr;
1910  ContinuePred = NextSrc;
1911  };
1912 
1913  // The code before the nested loop of each level.
1914  // Because we are sinking it into the nest, it will be executed more often
1915  // that the original loop. More sophisticated schemes could keep track of what
1916  // the in-between code is and instantiate it only once per thread.
1917  for (size_t i = 0; i < NumLoops - 1; ++i)
1918  ContinueWith(Loops[i]->getBody(), Loops[i + 1]->getHeader());
1919 
1920  // Connect the loop nest body.
1921  ContinueWith(Innermost->getBody(), Innermost->getLatch());
1922 
1923  // The code after the nested loop at each level.
1924  for (size_t i = NumLoops - 1; i > 0; --i)
1925  ContinueWith(Loops[i]->getAfter(), Loops[i - 1]->getLatch());
1926 
1927  // Connect the finished loop to the collapsed loop latch.
1928  ContinueWith(Result->getLatch(), nullptr);
1929 
1930  // Replace the input loops with the new collapsed loop.
1931  redirectTo(Outermost->getPreheader(), Result->getPreheader(), DL);
1932  redirectTo(Result->getAfter(), Outermost->getAfter(), DL);
1933 
1934  // Replace the input loop indvars with the derived ones.
1935  for (size_t i = 0; i < NumLoops; ++i)
1936  Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
1937 
1938  // Remove unused parts of the input loops.
1939  removeUnusedBlocksFromParent(OldControlBBs);
1940 
1941  for (CanonicalLoopInfo *L : Loops)
1942  L->invalidate();
1943 
1944 #ifndef NDEBUG
1945  Result->assertOK();
1946 #endif
1947  return Result;
1948 }
1949 
1950 std::vector<CanonicalLoopInfo *>
1952  ArrayRef<Value *> TileSizes) {
1953  assert(TileSizes.size() == Loops.size() &&
1954  "Must pass as many tile sizes as there are loops");
1955  int NumLoops = Loops.size();
1956  assert(NumLoops >= 1 && "At least one loop to tile required");
1957 
1958  CanonicalLoopInfo *OutermostLoop = Loops.front();
1959  CanonicalLoopInfo *InnermostLoop = Loops.back();
1960  Function *F = OutermostLoop->getBody()->getParent();
1961  BasicBlock *InnerEnter = InnermostLoop->getBody();
1962  BasicBlock *InnerLatch = InnermostLoop->getLatch();
1963 
1964  // Loop control blocks that may become orphaned later.
1965  SmallVector<BasicBlock *, 12> OldControlBBs;
1966  OldControlBBs.reserve(6 * Loops.size());
1967  for (CanonicalLoopInfo *Loop : Loops)
1968  Loop->collectControlBlocks(OldControlBBs);
1969 
1970  // Collect original trip counts and induction variable to be accessible by
1971  // index. Also, the structure of the original loops is not preserved during
1972  // the construction of the tiled loops, so do it before we scavenge the BBs of
1973  // any original CanonicalLoopInfo.
1974  SmallVector<Value *, 4> OrigTripCounts, OrigIndVars;
1975  for (CanonicalLoopInfo *L : Loops) {
1976  assert(L->isValid() && "All input loops must be valid canonical loops");
1977  OrigTripCounts.push_back(L->getTripCount());
1978  OrigIndVars.push_back(L->getIndVar());
1979  }
1980 
1981  // Collect the code between loop headers. These may contain SSA definitions
1982  // that are used in the loop nest body. To be usable with in the innermost
1983  // body, these BasicBlocks will be sunk into the loop nest body. That is,
1984  // these instructions may be executed more often than before the tiling.
1985  // TODO: It would be sufficient to only sink them into body of the
1986  // corresponding tile loop.
1988  for (int i = 0; i < NumLoops - 1; ++i) {
1989  CanonicalLoopInfo *Surrounding = Loops[i];
1990  CanonicalLoopInfo *Nested = Loops[i + 1];
1991 
1992  BasicBlock *EnterBB = Surrounding->getBody();
1993  BasicBlock *ExitBB = Nested->getHeader();
1994  InbetweenCode.emplace_back(EnterBB, ExitBB);
1995  }
1996 
1997  // Compute the trip counts of the floor loops.
1998  Builder.SetCurrentDebugLocation(DL);
1999  Builder.restoreIP(OutermostLoop->getPreheaderIP());
2000  SmallVector<Value *, 4> FloorCount, FloorRems;
2001  for (int i = 0; i < NumLoops; ++i) {
2002  Value *TileSize = TileSizes[i];
2003  Value *OrigTripCount = OrigTripCounts[i];
2004  Type *IVType = OrigTripCount->getType();
2005 
2006  Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize);
2007  Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize);
2008 
2009  // 0 if tripcount divides the tilesize, 1 otherwise.
2010  // 1 means we need an additional iteration for a partial tile.
2011  //
2012  // Unfortunately we cannot just use the roundup-formula
2013  // (tripcount + tilesize - 1)/tilesize
2014  // because the summation might overflow. We do not want introduce undefined
2015  // behavior when the untiled loop nest did not.
2016  Value *FloorTripOverflow =
2017  Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
2018 
2019  FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
2020  FloorTripCount =
2021  Builder.CreateAdd(FloorTripCount, FloorTripOverflow,
2022  "omp_floor" + Twine(i) + ".tripcount", true);
2023 
2024  // Remember some values for later use.
2025  FloorCount.push_back(FloorTripCount);
2026  FloorRems.push_back(FloorTripRem);
2027  }
2028 
2029  // Generate the new loop nest, from the outermost to the innermost.
2030  std::vector<CanonicalLoopInfo *> Result;
2031  Result.reserve(NumLoops * 2);
2032 
2033  // The basic block of the surrounding loop that enters the nest generated
2034  // loop.
2035  BasicBlock *Enter = OutermostLoop->getPreheader();
2036 
2037  // The basic block of the surrounding loop where the inner code should
2038  // continue.
2039  BasicBlock *Continue = OutermostLoop->getAfter();
2040 
2041  // Where the next loop basic block should be inserted.
2042  BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
2043 
2044  auto EmbeddNewLoop =
2045  [this, DL, F, InnerEnter, &Enter, &Continue, &OutroInsertBefore](
2046  Value *TripCount, const Twine &Name) -> CanonicalLoopInfo * {
2047  CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton(
2048  DL, TripCount, F, InnerEnter, OutroInsertBefore, Name);
2049  redirectTo(Enter, EmbeddedLoop->getPreheader(), DL);
2050  redirectTo(EmbeddedLoop->getAfter(), Continue, DL);
2051 
2052  // Setup the position where the next embedded loop connects to this loop.
2053  Enter = EmbeddedLoop->getBody();
2054  Continue = EmbeddedLoop->getLatch();
2055  OutroInsertBefore = EmbeddedLoop->getLatch();
2056  return EmbeddedLoop;
2057  };
2058 
2059  auto EmbeddNewLoops = [&Result, &EmbeddNewLoop](ArrayRef<Value *> TripCounts,
2060  const Twine &NameBase) {
2061  for (auto P : enumerate(TripCounts)) {
2062  CanonicalLoopInfo *EmbeddedLoop =
2063  EmbeddNewLoop(P.value(), NameBase + Twine(P.index()));
2064  Result.push_back(EmbeddedLoop);
2065  }
2066  };
2067 
2068  EmbeddNewLoops(FloorCount, "floor");
2069 
2070  // Within the innermost floor loop, emit the code that computes the tile
2071  // sizes.
2072  Builder.SetInsertPoint(Enter->getTerminator());
2073  SmallVector<Value *, 4> TileCounts;
2074  for (int i = 0; i < NumLoops; ++i) {
2075  CanonicalLoopInfo *FloorLoop = Result[i];
2076  Value *TileSize = TileSizes[i];
2077 
2078  Value *FloorIsEpilogue =
2079  Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]);
2080  Value *TileTripCount =
2081  Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize);
2082 
2083  TileCounts.push_back(TileTripCount);
2084  }
2085 
2086  // Create the tile loops.
2087  EmbeddNewLoops(TileCounts, "tile");
2088 
2089  // Insert the inbetween code into the body.
2090  BasicBlock *BodyEnter = Enter;
2091  BasicBlock *BodyEntered = nullptr;
2092  for (std::pair<BasicBlock *, BasicBlock *> P : InbetweenCode) {
2093  BasicBlock *EnterBB = P.first;
2094  BasicBlock *ExitBB = P.second;
2095 
2096  if (BodyEnter)
2097  redirectTo(BodyEnter, EnterBB, DL);
2098  else
2099  redirectAllPredecessorsTo(BodyEntered, EnterBB, DL);
2100 
2101  BodyEnter = nullptr;
2102  BodyEntered = ExitBB;
2103  }
2104 
2105  // Append the original loop nest body into the generated loop nest body.
2106  if (BodyEnter)
2107  redirectTo(BodyEnter, InnerEnter, DL);
2108  else
2109  redirectAllPredecessorsTo(BodyEntered, InnerEnter, DL);
2110  redirectAllPredecessorsTo(InnerLatch, Continue, DL);
2111 
2112  // Replace the original induction variable with an induction variable computed
2113  // from the tile and floor induction variables.
2114  Builder.restoreIP(Result.back()->getBodyIP());
2115  for (int i = 0; i < NumLoops; ++i) {
2116  CanonicalLoopInfo *FloorLoop = Result[i];
2117  CanonicalLoopInfo *TileLoop = Result[NumLoops + i];
2118  Value *OrigIndVar = OrigIndVars[i];
2119  Value *Size = TileSizes[i];
2120 
2121  Value *Scale =
2122  Builder.CreateMul(Size, FloorLoop->getIndVar(), {}, /*HasNUW=*/true);
2123  Value *Shift =
2124  Builder.CreateAdd(Scale, TileLoop->getIndVar(), {}, /*HasNUW=*/true);
2125  OrigIndVar->replaceAllUsesWith(Shift);
2126  }
2127 
2128  // Remove unused parts of the original loops.
2129  removeUnusedBlocksFromParent(OldControlBBs);
2130 
2131  for (CanonicalLoopInfo *L : Loops)
2132  L->invalidate();
2133 
2134 #ifndef NDEBUG
2135  for (CanonicalLoopInfo *GenL : Result)
2136  GenL->assertOK();
2137 #endif
2138  return Result;
2139 }
2140 
2141 /// Attach loop metadata \p Properties to the loop described by \p Loop. If the
2142 /// loop already has metadata, the loop properties are appended.
2144  ArrayRef<Metadata *> Properties) {
2145  assert(Loop->isValid() && "Expecting a valid CanonicalLoopInfo");
2146 
2147  // Nothing to do if no property to attach.
2148  if (Properties.empty())
2149  return;
2150 
2151  LLVMContext &Ctx = Loop->getFunction()->getContext();
2152  SmallVector<Metadata *> NewLoopProperties;
2153  NewLoopProperties.push_back(nullptr);
2154 
2155  // If the loop already has metadata, prepend it to the new metadata.
2156  BasicBlock *Latch = Loop->getLatch();
2157  assert(Latch && "A valid CanonicalLoopInfo must have a unique latch");
2158  MDNode *Existing = Latch->getTerminator()->getMetadata(LLVMContext::MD_loop);
2159  if (Existing)
2160  append_range(NewLoopProperties, drop_begin(Existing->operands(), 1));
2161 
2162  append_range(NewLoopProperties, Properties);
2163  MDNode *LoopID = MDNode::getDistinct(Ctx, NewLoopProperties);
2164  LoopID->replaceOperandWith(0, LoopID);
2165 
2166  Latch->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID);
2167 }
2168 
2169 /// Attach llvm.access.group metadata to the memref instructions of \p Block
2170 static void addSimdMetadata(BasicBlock *Block, MDNode *AccessGroup,
2171  LoopInfo &LI) {
2172  for (Instruction &I : *Block) {
2173  if (I.mayReadOrWriteMemory()) {
2174  // TODO: This instruction may already have access group from
2175  // other pragmas e.g. #pragma clang loop vectorize. Append
2176  // so that the existing metadata is not overwritten.
2177  I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
2178  }
2179  }
2180 }
2181 
2183  LLVMContext &Ctx = Builder.getContext();
2185  Loop, {MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
2186  MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.full"))});
2187 }
2188 
2190  LLVMContext &Ctx = Builder.getContext();
2192  Loop, {
2193  MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
2194  });
2195 }
2196 
2198  LLVMContext &Ctx = Builder.getContext();
2199 
2200  Function *F = CanonicalLoop->getFunction();
2201 
2203  FAM.registerPass([]() { return DominatorTreeAnalysis(); });
2204  FAM.registerPass([]() { return LoopAnalysis(); });
2205  FAM.registerPass([]() { return PassInstrumentationAnalysis(); });
2206 
2207  LoopAnalysis LIA;
2208  LoopInfo &&LI = LIA.run(*F, FAM);
2209 
2210  Loop *L = LI.getLoopFor(CanonicalLoop->getHeader());
2211 
2212  SmallSet<BasicBlock *, 8> Reachable;
2213 
2214  // Get the basic blocks from the loop in which memref instructions
2215  // can be found.
2216  // TODO: Generalize getting all blocks inside a CanonicalizeLoopInfo,
2217  // preferably without running any passes.
2218  for (BasicBlock *Block : L->getBlocks()) {
2219  if (Block == CanonicalLoop->getCond() ||
2220  Block == CanonicalLoop->getHeader())
2221  continue;
2222  Reachable.insert(Block);
2223  }
2224 
2225  // Add access group metadata to memory-access instructions.
2226  MDNode *AccessGroup = MDNode::getDistinct(Ctx, {});
2227  for (BasicBlock *BB : Reachable)
2228  addSimdMetadata(BB, AccessGroup, LI);
2229 
2230  // Use the above access group metadata to create loop level
2231  // metadata, which should be distinct for each loop.
2232  ConstantAsMetadata *BoolConst =
2234  // TODO: If the loop has existing parallel access metadata, have
2235  // to combine two lists.
2237  CanonicalLoop,
2238  {MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.parallel_accesses"),
2239  AccessGroup}),
2240  MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.enable"),
2241  BoolConst})});
2242 }
2243 
2244 /// Create the TargetMachine object to query the backend for optimization
2245 /// preferences.
2246 ///
2247 /// Ideally, this would be passed from the front-end to the OpenMPBuilder, but
2248 /// e.g. Clang does not pass it to its CodeGen layer and creates it only when
2249 /// needed for the LLVM pass pipline. We use some default options to avoid
2250 /// having to pass too many settings from the frontend that probably do not
2251 /// matter.
2252 ///
2253 /// Currently, TargetMachine is only used sometimes by the unrollLoopPartial
2254 /// method. If we are going to use TargetMachine for more purposes, especially
2255 /// those that are sensitive to TargetOptions, RelocModel and CodeModel, it
2256 /// might become be worth requiring front-ends to pass on their TargetMachine,
2257 /// or at least cache it between methods. Note that while fontends such as Clang
2258 /// have just a single main TargetMachine per translation unit, "target-cpu" and
2259 /// "target-features" that determine the TargetMachine are per-function and can
2260 /// be overrided using __attribute__((target("OPTIONS"))).
2261 static std::unique_ptr<TargetMachine>
2263  Module *M = F->getParent();
2264 
2265  StringRef CPU = F->getFnAttribute("target-cpu").getValueAsString();
2266  StringRef Features = F->getFnAttribute("target-features").getValueAsString();
2267  const std::string &Triple = M->getTargetTriple();
2268 
2269  std::string Error;
2271  if (!TheTarget)
2272  return {};
2273 
2275  return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
2276  Triple, CPU, Features, Options, /*RelocModel=*/None, /*CodeModel=*/None,
2277  OptLevel));
2278 }
2279 
2280 /// Heuristically determine the best-performant unroll factor for \p CLI. This
2281 /// depends on the target processor. We are re-using the same heuristics as the
2282 /// LoopUnrollPass.
2284  Function *F = CLI->getFunction();
2285 
2286  // Assume the user requests the most aggressive unrolling, even if the rest of
2287  // the code is optimized using a lower setting.
2289  std::unique_ptr<TargetMachine> TM = createTargetMachine(F, OptLevel);
2290 
2292  FAM.registerPass([]() { return TargetLibraryAnalysis(); });
2293  FAM.registerPass([]() { return AssumptionAnalysis(); });
2294  FAM.registerPass([]() { return DominatorTreeAnalysis(); });
2295  FAM.registerPass([]() { return LoopAnalysis(); });
2296  FAM.registerPass([]() { return ScalarEvolutionAnalysis(); });
2297  FAM.registerPass([]() { return PassInstrumentationAnalysis(); });
2298  TargetIRAnalysis TIRA;
2299  if (TM)
2300  TIRA = TargetIRAnalysis(
2301  [&](const Function &F) { return TM->getTargetTransformInfo(F); });
2302  FAM.registerPass([&]() { return TIRA; });
2303 
2304  TargetIRAnalysis::Result &&TTI = TIRA.run(*F, FAM);
2306  ScalarEvolution &&SE = SEA.run(*F, FAM);
2308  DominatorTree &&DT = DTA.run(*F, FAM);
2309  LoopAnalysis LIA;
2310  LoopInfo &&LI = LIA.run(*F, FAM);
2311  AssumptionAnalysis ACT;
2312  AssumptionCache &&AC = ACT.run(*F, FAM);
2314 
2315  Loop *L = LI.getLoopFor(CLI->getHeader());
2316  assert(L && "Expecting CanonicalLoopInfo to be recognized as a loop");
2317 
2320  /*BlockFrequencyInfo=*/nullptr,
2321  /*ProfileSummaryInfo=*/nullptr, ORE, OptLevel,
2322  /*UserThreshold=*/None,
2323  /*UserCount=*/None,
2324  /*UserAllowPartial=*/true,
2325  /*UserAllowRuntime=*/true,
2326  /*UserUpperBound=*/None,
2327  /*UserFullUnrollMaxCount=*/None);
2328 
2329  UP.Force = true;
2330 
2331  // Account for additional optimizations taking place before the LoopUnrollPass
2332  // would unroll the loop.
2335 
2336  // Use normal unroll factors even if the rest of the code is optimized for
2337  // size.
2338  UP.OptSizeThreshold = UP.Threshold;
2340 
2341  LLVM_DEBUG(dbgs() << "Unroll heuristic thresholds:\n"
2342  << " Threshold=" << UP.Threshold << "\n"
2343  << " PartialThreshold=" << UP.PartialThreshold << "\n"
2344  << " OptSizeThreshold=" << UP.OptSizeThreshold << "\n"
2345  << " PartialOptSizeThreshold="
2346  << UP.PartialOptSizeThreshold << "\n");
2347 
2348  // Disable peeling.
2351  /*UserAllowPeeling=*/false,
2352  /*UserAllowProfileBasedPeeling=*/false,
2353  /*UnrollingSpecficValues=*/false);
2354 
2356  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
2357 
2358  // Assume that reads and writes to stack variables can be eliminated by
2359  // Mem2Reg, SROA or LICM. That is, don't count them towards the loop body's
2360  // size.
2361  for (BasicBlock *BB : L->blocks()) {
2362  for (Instruction &I : *BB) {
2363  Value *Ptr;
2364  if (auto *Load = dyn_cast<LoadInst>(&I)) {
2365  Ptr = Load->getPointerOperand();
2366  } else if (auto *Store = dyn_cast<StoreInst>(&I)) {
2367  Ptr = Store->getPointerOperand();
2368  } else
2369  continue;
2370 
2371  Ptr = Ptr->stripPointerCasts();
2372 
2373  if (auto *Alloca = dyn_cast<AllocaInst>(Ptr)) {
2374  if (Alloca->getParent() == &F->getEntryBlock())
2375  EphValues.insert(&I);
2376  }
2377  }
2378  }
2379 
2380  unsigned NumInlineCandidates;
2381  bool NotDuplicatable;
2382  bool Convergent;
2383  unsigned LoopSize =
2384  ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
2385  TTI, EphValues, UP.BEInsns);
2386  LLVM_DEBUG(dbgs() << "Estimated loop size is " << LoopSize << "\n");
2387 
2388  // Loop is not unrollable if the loop contains certain instructions.
2389  if (NotDuplicatable || Convergent) {
2390  LLVM_DEBUG(dbgs() << "Loop not considered unrollable\n");
2391  return 1;
2392  }
2393 
2394  // TODO: Determine trip count of \p CLI if constant, computeUnrollCount might
2395  // be able to use it.
2396  int TripCount = 0;
2397  int MaxTripCount = 0;
2398  bool MaxOrZero = false;
2399  unsigned TripMultiple = 0;
2400 
2401  bool UseUpperBound = false;
2402  computeUnrollCount(L, TTI, DT, &LI, SE, EphValues, &ORE, TripCount,
2403  MaxTripCount, MaxOrZero, TripMultiple, LoopSize, UP, PP,
2404  UseUpperBound);
2405  unsigned Factor = UP.Count;
2406  LLVM_DEBUG(dbgs() << "Suggesting unroll factor of " << Factor << "\n");
2407 
2408  // This function returns 1 to signal to not unroll a loop.
2409  if (Factor == 0)
2410  return 1;
2411  return Factor;
2412 }
2413 
2415  int32_t Factor,
2416  CanonicalLoopInfo **UnrolledCLI) {
2417  assert(Factor >= 0 && "Unroll factor must not be negative");
2418 
2419  Function *F = Loop->getFunction();
2420  LLVMContext &Ctx = F->getContext();
2421 
2422  // If the unrolled loop is not used for another loop-associated directive, it
2423  // is sufficient to add metadata for the LoopUnrollPass.
2424  if (!UnrolledCLI) {
2425  SmallVector<Metadata *, 2> LoopMetadata;
2426  LoopMetadata.push_back(
2427  MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")));
2428 
2429  if (Factor >= 1) {
2431  ConstantInt::get(Type::getInt32Ty(Ctx), APInt(32, Factor)));
2432  LoopMetadata.push_back(MDNode::get(
2433  Ctx, {MDString::get(Ctx, "llvm.loop.unroll.count"), FactorConst}));
2434  }
2435 
2436  addLoopMetadata(Loop, LoopMetadata);
2437  return;
2438  }
2439 
2440  // Heuristically determine the unroll factor.
2441  if (Factor == 0)
2443 
2444  // No change required with unroll factor 1.
2445  if (Factor == 1) {
2446  *UnrolledCLI = Loop;
2447  return;
2448  }
2449 
2450  assert(Factor >= 2 &&
2451  "unrolling only makes sense with a factor of 2 or larger");
2452 
2453  Type *IndVarTy = Loop->getIndVarType();
2454 
2455  // Apply partial unrolling by tiling the loop by the unroll-factor, then fully
2456  // unroll the inner loop.
2457  Value *FactorVal =
2458  ConstantInt::get(IndVarTy, APInt(IndVarTy->getIntegerBitWidth(), Factor,
2459  /*isSigned=*/false));
2460  std::vector<CanonicalLoopInfo *> LoopNest =
2461  tileLoops(DL, {Loop}, {FactorVal});
2462  assert(LoopNest.size() == 2 && "Expect 2 loops after tiling");
2463  *UnrolledCLI = LoopNest[0];
2464  CanonicalLoopInfo *InnerLoop = LoopNest[1];
2465 
2466  // LoopUnrollPass can only fully unroll loops with constant trip count.
2467  // Unroll by the unroll factor with a fallback epilog for the remainder
2468  // iterations if necessary.
2470  ConstantInt::get(Type::getInt32Ty(Ctx), APInt(32, Factor)));
2472  InnerLoop,
2473  {MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
2474  MDNode::get(
2475  Ctx, {MDString::get(Ctx, "llvm.loop.unroll.count"), FactorConst})});
2476 
2477 #ifndef NDEBUG
2478  (*UnrolledCLI)->assertOK();
2479 #endif
2480 }
2481 
2484  llvm::Value *BufSize, llvm::Value *CpyBuf,
2485  llvm::Value *CpyFn, llvm::Value *DidIt) {
2486  if (!updateToLocation(Loc))
2487  return Loc.IP;
2488 
2489  uint32_t SrcLocStrSize;
2490  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
2491  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2492  Value *ThreadId = getOrCreateThreadID(Ident);
2493 
2494  llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt);
2495 
2496  Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
2497 
2498  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate);
2499  Builder.CreateCall(Fn, Args);
2500 
2501  return Builder.saveIP();
2502 }
2503 
2506  BodyGenCallbackTy BodyGenCB,
2507  FinalizeCallbackTy FiniCB, llvm::Value *DidIt) {
2508 
2509  if (!updateToLocation(Loc))
2510  return Loc.IP;
2511 
2512  // If needed (i.e. not null), initialize `DidIt` with 0
2513  if (DidIt) {
2514  Builder.CreateStore(Builder.getInt32(0), DidIt);
2515  }
2516 
2517  Directive OMPD = Directive::OMPD_single;
2518  uint32_t SrcLocStrSize;
2519  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
2520  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2521  Value *ThreadId = getOrCreateThreadID(Ident);
2522  Value *Args[] = {Ident, ThreadId};
2523 
2524  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single);
2525  Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
2526 
2527  Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
2528  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
2529 
2530  // generates the following:
2531  // if (__kmpc_single()) {
2532  // .... single region ...
2533  // __kmpc_end_single
2534  // }
2535 
2536  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
2537  /*Conditional*/ true, /*hasFinalize*/ true);
2538 }
2539 
2541  const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
2542  FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) {
2543 
2544  if (!updateToLocation(Loc))
2545  return Loc.IP;
2546 
2547  Directive OMPD = Directive::OMPD_critical;
2548  uint32_t SrcLocStrSize;
2549  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
2550  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2551  Value *ThreadId = getOrCreateThreadID(Ident);
2552  Value *LockVar = getOMPCriticalRegionLock(CriticalName);
2553  Value *Args[] = {Ident, ThreadId, LockVar};
2554 
2556  Function *RTFn = nullptr;
2557  if (HintInst) {
2558  // Add Hint to entry Args and create call
2559  EnterArgs.push_back(HintInst);
2560  RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint);
2561  } else {
2562  RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical);
2563  }
2564  Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs);
2565 
2566  Function *ExitRTLFn =
2567  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical);
2568  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
2569 
2570  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
2571  /*Conditional*/ false, /*hasFinalize*/ true);
2572 }
2573 
2576  InsertPointTy AllocaIP, unsigned NumLoops,
2577  ArrayRef<llvm::Value *> StoreValues,
2578  const Twine &Name, bool IsDependSource) {
2579  for (size_t I = 0; I < StoreValues.size(); I++)
2580  assert(StoreValues[I]->getType()->isIntegerTy(64) &&
2581  "OpenMP runtime requires depend vec with i64 type");
2582 
2583  if (!updateToLocation(Loc))
2584  return Loc.IP;
2585 
2586  // Allocate space for vector and generate alloc instruction.
2587  auto *ArrI64Ty = ArrayType::get(Int64, NumLoops);
2588  Builder.restoreIP(AllocaIP);
2589  AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI64Ty, nullptr, Name);
2590  ArgsBase->setAlignment(Align(8));
2591  Builder.restoreIP(Loc.IP);
2592 
2593  // Store the index value with offset in depend vector.
2594  for (unsigned I = 0; I < NumLoops; ++I) {
2595  Value *DependAddrGEPIter = Builder.CreateInBoundsGEP(
2596  ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(I)});
2597  StoreInst *STInst = Builder.CreateStore(StoreValues[I], DependAddrGEPIter);
2598  STInst->setAlignment(Align(8));
2599  }
2600 
2601  Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP(
2602  ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)});
2603 
2604  uint32_t SrcLocStrSize;
2605  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
2606  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2607  Value *ThreadId = getOrCreateThreadID(Ident);
2608  Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
2609 
2610  Function *RTLFn = nullptr;
2611  if (IsDependSource)
2612  RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_post);
2613  else
2614  RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_wait);
2615  Builder.CreateCall(RTLFn, Args);
2616 
2617  return Builder.saveIP();
2618 }
2619 
2621  const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
2622  FinalizeCallbackTy FiniCB, bool IsThreads) {
2623  if (!updateToLocation(Loc))
2624  return Loc.IP;
2625 
2626  Directive OMPD = Directive::OMPD_ordered;
2627  Instruction *EntryCall = nullptr;
2628  Instruction *ExitCall = nullptr;
2629 
2630  if (IsThreads) {
2631  uint32_t SrcLocStrSize;
2632  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
2633  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2634  Value *ThreadId = getOrCreateThreadID(Ident);
2635  Value *Args[] = {Ident, ThreadId};
2636 
2637  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered);
2638  EntryCall = Builder.CreateCall(EntryRTLFn, Args);
2639 
2640  Function *ExitRTLFn =
2641  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered);
2642  ExitCall = Builder.CreateCall(ExitRTLFn, Args);
2643  }
2644 
2645  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
2646  /*Conditional*/ false, /*hasFinalize*/ true);
2647 }
2648 
2649 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion(
2650  Directive OMPD, Instruction *EntryCall, Instruction *ExitCall,
2651  BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional,
2652  bool HasFinalize, bool IsCancellable) {
2653 
2654  if (HasFinalize)
2655  FinalizationStack.push_back({FiniCB, OMPD, IsCancellable});
2656 
2657  // Create inlined region's entry and body blocks, in preparation
2658  // for conditional creation
2659  BasicBlock *EntryBB = Builder.GetInsertBlock();
2660  Instruction *SplitPos = EntryBB->getTerminator();
2661  if (!isa_and_nonnull<BranchInst>(SplitPos))
2662  SplitPos = new UnreachableInst(Builder.getContext(), EntryBB);
2663  BasicBlock *ExitBB = EntryBB->splitBasicBlock(SplitPos, "omp_region.end");
2664  BasicBlock *FiniBB =
2665  EntryBB->splitBasicBlock(EntryBB->getTerminator(), "omp_region.finalize");
2666 
2667  Builder.SetInsertPoint(EntryBB->getTerminator());
2668  emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
2669 
2670  // generate body
2671  BodyGenCB(/* AllocaIP */ InsertPointTy(),
2672  /* CodeGenIP */ Builder.saveIP(), *FiniBB);
2673 
2674  // If we didn't emit a branch to FiniBB during body generation, it means
2675  // FiniBB is unreachable (e.g. while(1);). stop generating all the
2676  // unreachable blocks, and remove anything we are not going to use.
2677  auto SkipEmittingRegion = FiniBB->hasNPredecessors(0);
2678  if (SkipEmittingRegion) {
2679  FiniBB->eraseFromParent();
2680  ExitCall->eraseFromParent();
2681  // Discard finalization if we have it.
2682  if (HasFinalize) {
2683  assert(!FinalizationStack.empty() &&
2684  "Unexpected finalization stack state!");
2685  FinalizationStack.pop_back();
2686  }
2687  } else {
2688  // emit exit call and do any needed finalization.
2689  auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt());
2690  assert(FiniBB->getTerminator()->getNumSuccessors() == 1 &&
2691  FiniBB->getTerminator()->getSuccessor(0) == ExitBB &&
2692  "Unexpected control flow graph state!!");
2693  emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
2694  assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB &&
2695  "Unexpected Control Flow State!");
2696  MergeBlockIntoPredecessor(FiniBB);
2697  }
2698 
2699  // If we are skipping the region of a non conditional, remove the exit
2700  // block, and clear the builder's insertion point.
2701  assert(SplitPos->getParent() == ExitBB &&
2702  "Unexpected Insertion point location!");
2703  if (!Conditional && SkipEmittingRegion) {
2704  ExitBB->eraseFromParent();
2705  Builder.ClearInsertionPoint();
2706  } else {
2707  auto merged = MergeBlockIntoPredecessor(ExitBB);
2708  BasicBlock *ExitPredBB = SplitPos->getParent();
2709  auto InsertBB = merged ? ExitPredBB : ExitBB;
2710  if (!isa_and_nonnull<BranchInst>(SplitPos))
2711  SplitPos->eraseFromParent();
2712  Builder.SetInsertPoint(InsertBB);
2713  }
2714 
2715  return Builder.saveIP();
2716 }
2717 
2718 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry(
2719  Directive OMPD, Value *EntryCall, BasicBlock *ExitBB, bool Conditional) {
2720  // if nothing to do, Return current insertion point.
2721  if (!Conditional || !EntryCall)
2722  return Builder.saveIP();
2723 
2724  BasicBlock *EntryBB = Builder.GetInsertBlock();
2725  Value *CallBool = Builder.CreateIsNotNull(EntryCall);
2726  auto *ThenBB = BasicBlock::Create(M.getContext(), "omp_region.body");
2727  auto *UI = new UnreachableInst(Builder.getContext(), ThenBB);
2728 
2729  // Emit thenBB and set the Builder's insertion point there for
2730  // body generation next. Place the block after the current block.
2731  Function *CurFn = EntryBB->getParent();
2732  CurFn->getBasicBlockList().insertAfter(EntryBB->getIterator(), ThenBB);
2733 
2734  // Move Entry branch to end of ThenBB, and replace with conditional
2735  // branch (If-stmt)
2736  Instruction *EntryBBTI = EntryBB->getTerminator();
2737  Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
2738  EntryBBTI->removeFromParent();
2739  Builder.SetInsertPoint(UI);
2740  Builder.Insert(EntryBBTI);
2741  UI->eraseFromParent();
2742  Builder.SetInsertPoint(ThenBB->getTerminator());
2743 
2744  // return an insertion point to ExitBB.
2745  return IRBuilder<>::InsertPoint(ExitBB, ExitBB->getFirstInsertionPt());
2746 }
2747 
2748 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit(
2749  omp::Directive OMPD, InsertPointTy FinIP, Instruction *ExitCall,
2750  bool HasFinalize) {
2751 
2752  Builder.restoreIP(FinIP);
2753 
2754  // If there is finalization to do, emit it before the exit call
2755  if (HasFinalize) {
2756  assert(!FinalizationStack.empty() &&
2757  "Unexpected finalization stack state!");
2758 
2759  FinalizationInfo Fi = FinalizationStack.pop_back_val();
2760  assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!");
2761 
2762  Fi.FiniCB(FinIP);
2763 
2764  BasicBlock *FiniBB = FinIP.getBlock();
2765  Instruction *FiniBBTI = FiniBB->getTerminator();
2766 
2767  // set Builder IP for call creation
2768  Builder.SetInsertPoint(FiniBBTI);
2769  }
2770 
2771  if (!ExitCall)
2772  return Builder.saveIP();
2773 
2774  // place the Exitcall as last instruction before Finalization block terminator
2775  ExitCall->removeFromParent();
2776  Builder.Insert(ExitCall);
2777 
2778  return IRBuilder<>::InsertPoint(ExitCall->getParent(),
2779  ExitCall->getIterator());
2780 }
2781 
2783  InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr,
2784  llvm::IntegerType *IntPtrTy, bool BranchtoEnd) {
2785  if (!IP.isSet())
2786  return IP;
2787 
2789 
2790  // creates the following CFG structure
2791  // OMP_Entry : (MasterAddr != PrivateAddr)?
2792  // F T
2793  // | \
2794  // | copin.not.master
2795  // | /
2796  // v /
2797  // copyin.not.master.end
2798  // |
2799  // v
2800  // OMP.Entry.Next
2801 
2802  BasicBlock *OMP_Entry = IP.getBlock();
2803  Function *CurFn = OMP_Entry->getParent();
2804  BasicBlock *CopyBegin =
2805  BasicBlock::Create(M.getContext(), "copyin.not.master", CurFn);
2806  BasicBlock *CopyEnd = nullptr;
2807 
2808  // If entry block is terminated, split to preserve the branch to following
2809  // basic block (i.e. OMP.Entry.Next), otherwise, leave everything as is.
2810  if (isa_and_nonnull<BranchInst>(OMP_Entry->getTerminator())) {
2811  CopyEnd = OMP_Entry->splitBasicBlock(OMP_Entry->getTerminator(),
2812  "copyin.not.master.end");
2813  OMP_Entry->getTerminator()->eraseFromParent();
2814  } else {
2815  CopyEnd =
2816  BasicBlock::Create(M.getContext(), "copyin.not.master.end", CurFn);
2817  }
2818 
2819  Builder.SetInsertPoint(OMP_Entry);
2820  Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
2821  Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
2822  Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr);
2823  Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
2824 
2825  Builder.SetInsertPoint(CopyBegin);
2826  if (BranchtoEnd)
2827  Builder.SetInsertPoint(Builder.CreateBr(CopyEnd));
2828 
2829  return Builder.saveIP();
2830 }
2831 
2834  std::string Name) {
2836  Builder.restoreIP(Loc.IP);
2837 
2838  uint32_t SrcLocStrSize;
2839  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
2840  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2841  Value *ThreadId = getOrCreateThreadID(Ident);
2842  Value *Args[] = {ThreadId, Size, Allocator};
2843 
2844  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc);
2845 
2846  return Builder.CreateCall(Fn, Args, Name);
2847 }
2848 
2851  std::string Name) {
2853  Builder.restoreIP(Loc.IP);
2854 
2855  uint32_t SrcLocStrSize;
2856  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
2857  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2858  Value *ThreadId = getOrCreateThreadID(Ident);
2859  Value *Args[] = {ThreadId, Addr, Allocator};
2860  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free);
2861  return Builder.CreateCall(Fn, Args, Name);
2862 }
2863 
2865  const LocationDescription &Loc, llvm::Value *Pointer,
2868  Builder.restoreIP(Loc.IP);
2869 
2870  uint32_t SrcLocStrSize;
2871  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
2872  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2873  Value *ThreadId = getOrCreateThreadID(Ident);
2874  Constant *ThreadPrivateCache =
2875  getOrCreateOMPInternalVariable(Int8PtrPtr, Name);
2876  llvm::Value *Args[] = {Ident, ThreadId, Pointer, Size, ThreadPrivateCache};
2877 
2878  Function *Fn =
2879  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached);
2880 
2881  return Builder.CreateCall(Fn, Args);
2882 }
2883 
2886  bool RequiresFullRuntime) {
2887  if (!updateToLocation(Loc))
2888  return Loc.IP;
2889 
2890  uint32_t SrcLocStrSize;
2891  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
2892  Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2893  ConstantInt *IsSPMDVal = ConstantInt::getSigned(
2894  IntegerType::getInt8Ty(Int8->getContext()),
2896  ConstantInt *UseGenericStateMachine =
2897  ConstantInt::getBool(Int32->getContext(), !IsSPMD);
2898  ConstantInt *RequiresFullRuntimeVal =
2899  ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
2900 
2901  Function *Fn = getOrCreateRuntimeFunctionPtr(
2902  omp::RuntimeFunction::OMPRTL___kmpc_target_init);
2903 
2904  CallInst *ThreadKind = Builder.CreateCall(
2905  Fn, {Ident, IsSPMDVal, UseGenericStateMachine, RequiresFullRuntimeVal});
2906 
2907  Value *ExecUserCode = Builder.CreateICmpEQ(
2908  ThreadKind, ConstantInt::get(ThreadKind->getType(), -1),
2909  "exec_user_code");
2910 
2911  // ThreadKind = __kmpc_target_init(...)
2912  // if (ThreadKind == -1)
2913  // user_code
2914  // else
2915  // return;
2916 
2917  auto *UI = Builder.CreateUnreachable();
2918  BasicBlock *CheckBB = UI->getParent();
2919  BasicBlock *UserCodeEntryBB = CheckBB->splitBasicBlock(UI, "user_code.entry");
2920 
2921  BasicBlock *WorkerExitBB = BasicBlock::Create(
2922  CheckBB->getContext(), "worker.exit", CheckBB->getParent());
2923  Builder.SetInsertPoint(WorkerExitBB);
2924  Builder.CreateRetVoid();
2925 
2926  auto *CheckBBTI = CheckBB->getTerminator();
2927  Builder.SetInsertPoint(CheckBBTI);
2928  Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
2929 
2930  CheckBBTI->eraseFromParent();
2931  UI->eraseFromParent();
2932 
2933  // Continue in the "user_code" block, see diagram above and in
2934  // openmp/libomptarget/deviceRTLs/common/include/target.h .
2935  return InsertPointTy(UserCodeEntryBB, UserCodeEntryBB->getFirstInsertionPt());
2936 }
2937 
2939  bool IsSPMD,
2940  bool RequiresFullRuntime) {
2941  if (!updateToLocation(Loc))
2942  return;
2943 
2944  uint32_t SrcLocStrSize;
2945  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
2946  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2947  ConstantInt *IsSPMDVal = ConstantInt::getSigned(
2948  IntegerType::getInt8Ty(Int8->getContext()),
2950  ConstantInt *RequiresFullRuntimeVal =
2951  ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
2952 
2953  Function *Fn = getOrCreateRuntimeFunctionPtr(
2954  omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
2955 
2956  Builder.CreateCall(Fn, {Ident, IsSPMDVal, RequiresFullRuntimeVal});
2957 }
2958 
2959 std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts,
2960  StringRef FirstSeparator,
2961  StringRef Separator) {
2962  SmallString<128> Buffer;
2963  llvm::raw_svector_ostream OS(Buffer);
2964  StringRef Sep = FirstSeparator;
2965  for (StringRef Part : Parts) {
2966  OS << Sep << Part;
2967  Sep = Separator;
2968  }
2969  return OS.str().str();
2970 }
2971 
2972 Constant *OpenMPIRBuilder::getOrCreateOMPInternalVariable(
2973  llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2974  // TODO: Replace the twine arg with stringref to get rid of the conversion
2975  // logic. However This is taken from current implementation in clang as is.
2976  // Since this method is used in many places exclusively for OMP internal use
2977  // we will keep it as is for temporarily until we move all users to the
2978  // builder and then, if possible, fix it everywhere in one go.
2979  SmallString<256> Buffer;
2980  llvm::raw_svector_ostream Out(Buffer);
2981  Out << Name;
2982  StringRef RuntimeName = Out.str();
2983  auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2984  if (Elem.second) {
2985  assert(Elem.second->getType()->getPointerElementType() == Ty &&
2986  "OMP internal variable has different type than requested");
2987  } else {
2988  // TODO: investigate the appropriate linkage type used for the global
2989  // variable for possibly changing that to internal or private, or maybe
2990  // create different versions of the function for different OMP internal
2991  // variables.
2992  Elem.second = new llvm::GlobalVariable(
2993  M, Ty, /*IsConstant*/ false, llvm::GlobalValue::CommonLinkage,
2994  llvm::Constant::getNullValue(Ty), Elem.first(),
2995  /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
2996  AddressSpace);
2997  }
2998 
2999  return Elem.second;
3000 }
3001 
3002 Value *OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName) {
3003  std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3004  std::string Name = getNameWithSeparators({Prefix, "var"}, ".", ".");
3005  return getOrCreateOMPInternalVariable(KmpCriticalNameTy, Name);
3006 }
3007 
3010  std::string VarName) {
3011  llvm::Constant *MaptypesArrayInit =
3012  llvm::ConstantDataArray::get(M.getContext(), Mappings);
3013  auto *MaptypesArrayGlobal = new llvm::GlobalVariable(
3014  M, MaptypesArrayInit->getType(),
3015  /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MaptypesArrayInit,
3016  VarName);
3017  MaptypesArrayGlobal->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3018  return MaptypesArrayGlobal;
3019 }
3020 
3022  InsertPointTy AllocaIP,
3023  unsigned NumOperands,
3024  struct MapperAllocas &MapperAllocas) {
3025  if (!updateToLocation(Loc))
3026  return;
3027 
3028  auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands);
3029  auto *ArrI64Ty = ArrayType::get(Int64, NumOperands);
3030  Builder.restoreIP(AllocaIP);
3031  AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI8PtrTy);
3032  AllocaInst *Args = Builder.CreateAlloca(ArrI8PtrTy);
3033  AllocaInst *ArgSizes = Builder.CreateAlloca(ArrI64Ty);
3034  Builder.restoreIP(Loc.IP);
3035  MapperAllocas.ArgsBase = ArgsBase;
3037  MapperAllocas.ArgSizes = ArgSizes;
3038 }
3039 
3041  Function *MapperFunc, Value *SrcLocInfo,
3042  Value *MaptypesArg, Value *MapnamesArg,
3043  struct MapperAllocas &MapperAllocas,
3044  int64_t DeviceID, unsigned NumOperands) {
3045  if (!updateToLocation(Loc))
3046  return;
3047 
3048  auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands);
3049  auto *ArrI64Ty = ArrayType::get(Int64, NumOperands);
3050  Value *ArgsBaseGEP =
3051  Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.ArgsBase,
3052  {Builder.getInt32(0), Builder.getInt32(0)});
3053  Value *ArgsGEP =
3054  Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.Args,
3055  {Builder.getInt32(0), Builder.getInt32(0)});
3056  Value *ArgSizesGEP =
3057  Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes,
3058  {Builder.getInt32(0), Builder.getInt32(0)});
3059  Value *NullPtr = Constant::getNullValue(Int8Ptr->getPointerTo());
3060  Builder.CreateCall(MapperFunc,
3061  {SrcLocInfo, Builder.getInt64(DeviceID),
3062  Builder.getInt32(NumOperands), ArgsBaseGEP, ArgsGEP,
3063  ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr});
3064 }
3065 
3066 bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
3067  const LocationDescription &Loc, llvm::AtomicOrdering AO, AtomicKind AK) {
3070  "Unexpected Atomic Ordering.");
3071 
3072  bool Flush = false;
3074 
3075  switch (AK) {
3076  case Read:
3079  FlushAO = AtomicOrdering::Acquire;
3080  Flush = true;
3081  }
3082  break;
3083  case Write:
3084  case Update:
3087  FlushAO = AtomicOrdering::Release;
3088  Flush = true;
3089  }
3090  break;
3091  case Capture:
3092  switch (AO) {
3094  FlushAO = AtomicOrdering::Acquire;
3095  Flush = true;
3096  break;
3098  FlushAO = AtomicOrdering::Release;
3099  Flush = true;
3100  break;
3104  Flush = true;
3105  break;
3106  default:
3107  // do nothing - leave silently.
3108  break;
3109  }
3110  }
3111 
3112  if (Flush) {
3113  // Currently Flush RT call still doesn't take memory_ordering, so for when
3114  // that happens, this tries to do the resolution of which atomic ordering
3115  // to use with but issue the flush call
3116  // TODO: pass `FlushAO` after memory ordering support is added
3117  (void)FlushAO;
3118  emitFlush(Loc);
3119  }
3120 
3121  // for AO == AtomicOrdering::Monotonic and all other case combinations
3122  // do nothing
3123  return Flush;
3124 }
3125 
3129  AtomicOrdering AO) {
3130  if (!updateToLocation(Loc))
3131  return Loc.IP;
3132 
3133  Type *XTy = X.Var->getType();
3134  assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory");
3135  Type *XElemTy = XTy->getPointerElementType();
3136  assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
3137  XElemTy->isPointerTy()) &&
3138  "OMP atomic read expected a scalar type");
3139 
3140  Value *XRead = nullptr;
3141 
3142  if (XElemTy->isIntegerTy()) {
3143  LoadInst *XLD =
3144  Builder.CreateLoad(XElemTy, X.Var, X.IsVolatile, "omp.atomic.read");
3145  XLD->setAtomic(AO);
3146  XRead = cast<Value>(XLD);
3147  } else {
3148  // We need to bitcast and perform atomic op as integer
3149  unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace();
3150  IntegerType *IntCastTy =
3151  IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
3152  Value *XBCast = Builder.CreateBitCast(
3153  X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.src.int.cast");
3154  LoadInst *XLoad =
3155  Builder.CreateLoad(IntCastTy, XBCast, X.IsVolatile, "omp.atomic.load");
3156  XLoad->setAtomic(AO);
3157  if (XElemTy->isFloatingPointTy()) {
3158  XRead = Builder.CreateBitCast(XLoad, XElemTy, "atomic.flt.cast");
3159  } else {
3160  XRead = Builder.CreateIntToPtr(XLoad, XElemTy, "atomic.ptr.cast");
3161  }
3162  }
3163  checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read);
3164  Builder.CreateStore(XRead, V.Var, V.IsVolatile);
3165  return Builder.saveIP();
3166 }
3167 
3170  AtomicOpValue &X, Value *Expr,
3171  AtomicOrdering AO) {
3172  if (!updateToLocation(Loc))
3173  return Loc.IP;
3174 
3175  Type *XTy = X.Var->getType();
3176  assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory");
3177  Type *XElemTy = XTy->getPointerElementType();
3178  assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
3179  XElemTy->isPointerTy()) &&
3180  "OMP atomic write expected a scalar type");
3181 
3182  if (XElemTy->isIntegerTy()) {
3183  StoreInst *XSt = Builder.CreateStore(Expr, X.Var, X.IsVolatile);
3184  XSt->setAtomic(AO);
3185  } else {
3186  // We need to bitcast and perform atomic op as integers
3187  unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace();
3188  IntegerType *IntCastTy =
3189  IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
3190  Value *XBCast = Builder.CreateBitCast(
3191  X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.dst.int.cast");
3192  Value *ExprCast =
3193  Builder.CreateBitCast(Expr, IntCastTy, "atomic.src.int.cast");
3194  StoreInst *XSt = Builder.CreateStore(ExprCast, XBCast, X.IsVolatile);
3195  XSt->setAtomic(AO);
3196  }
3197 
3198  checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Write);
3199  return Builder.saveIP();
3200 }
3201 
3203  const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X,
3204  Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
3205  AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr) {
3206  if (!updateToLocation(Loc))
3207  return Loc.IP;
3208 
3209  LLVM_DEBUG({
3210  Type *XTy = X.Var->getType();
3211  assert(XTy->isPointerTy() &&
3212  "OMP Atomic expects a pointer to target memory");
3213  Type *XElemTy = XTy->getPointerElementType();
3214  assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
3215  XElemTy->isPointerTy()) &&
3216  "OMP atomic update expected a scalar type");
3217  assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) &&
3218  (RMWOp != AtomicRMWInst::UMax) && (RMWOp != AtomicRMWInst::UMin) &&
3219  "OpenMP atomic does not support LT or GT operations");
3220  });
3221 
3222  emitAtomicUpdate(AllocIP, X.Var, Expr, AO, RMWOp, UpdateOp, X.IsVolatile,
3223  IsXBinopExpr);
3224  checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
3225  return Builder.saveIP();
3226 }
3227 
3228 Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2,
3229  AtomicRMWInst::BinOp RMWOp) {
3230  switch (RMWOp) {
3231  case AtomicRMWInst::Add:
3232  return Builder.CreateAdd(Src1, Src2);
3233  case AtomicRMWInst::Sub:
3234  return Builder.CreateSub(Src1, Src2);
3235  case AtomicRMWInst::And:
3236  return Builder.CreateAnd(Src1, Src2);
3237  case AtomicRMWInst::Nand:
3238  return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
3239  case AtomicRMWInst::Or:
3240  return Builder.CreateOr(Src1, Src2);
3241  case AtomicRMWInst::Xor:
3242  return Builder.CreateXor(Src1, Src2);
3243  case AtomicRMWInst::Xchg:
3244  case AtomicRMWInst::FAdd:
3245  case AtomicRMWInst::FSub:
3247  case AtomicRMWInst::Max:
3248  case AtomicRMWInst::Min:
3249  case AtomicRMWInst::UMax:
3250  case AtomicRMWInst::UMin:
3251  llvm_unreachable("Unsupported atomic update operation");
3252  }
3253  llvm_unreachable("Unsupported atomic update operation");
3254 }
3255 
3256 std::pair<Value *, Value *>
3257 OpenMPIRBuilder::emitAtomicUpdate(Instruction *AllocIP, Value *X, Value *Expr,
3259  AtomicUpdateCallbackTy &UpdateOp,
3260  bool VolatileX, bool IsXBinopExpr) {
3261  Type *XElemTy = X->getType()->getPointerElementType();
3262 
3263  bool DoCmpExch =
3264  ((RMWOp == AtomicRMWInst::BAD_BINOP) || (RMWOp == AtomicRMWInst::FAdd)) ||
3265  (RMWOp == AtomicRMWInst::FSub) ||
3266  (RMWOp == AtomicRMWInst::Sub && !IsXBinopExpr);
3267 
3268  std::pair<Value *, Value *> Res;
3269  if (XElemTy->isIntegerTy() && !DoCmpExch) {
3270  Res.first = Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO);
3271  // not needed except in case of postfix captures. Generate anyway for
3272  // consistency with the else part. Will be removed with any DCE pass.
3273  Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
3274  } else {
3275  unsigned Addrspace = cast<PointerType>(X->getType())->getAddressSpace();
3276  IntegerType *IntCastTy =
3277  IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
3278  Value *XBCast =
3279  Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace));
3280  LoadInst *OldVal =
3281  Builder.CreateLoad(IntCastTy, XBCast, X->getName() + ".atomic.load");
3282  OldVal->setAtomic(AO);
3283  // CurBB
3284  // | /---\
3285  // ContBB |
3286  // | \---/
3287  // ExitBB
3288  BasicBlock *CurBB = Builder.GetInsertBlock();
3289  Instruction *CurBBTI = CurBB->getTerminator();
3290  CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
3291  BasicBlock *ExitBB =
3292  CurBB->splitBasicBlock(CurBBTI, X->getName() + ".atomic.exit");
3293  BasicBlock *ContBB = CurBB->splitBasicBlock(CurBB->getTerminator(),
3294  X->getName() + ".atomic.cont");
3295  ContBB->getTerminator()->eraseFromParent();
3296  Builder.SetInsertPoint(ContBB);
3297  llvm::PHINode *PHI = Builder.CreatePHI(OldVal->getType(), 2);
3298  PHI->addIncoming(OldVal, CurBB);
3299  AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
3300  NewAtomicAddr->setName(X->getName() + "x.new.val");
3301  NewAtomicAddr->moveBefore(AllocIP);
3302  IntegerType *NewAtomicCastTy =
3303  IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
3304  bool IsIntTy = XElemTy->isIntegerTy();
3305  Value *NewAtomicIntAddr =
3306  (IsIntTy)
3307  ? NewAtomicAddr
3308  : Builder.CreateBitCast(NewAtomicAddr,
3309  NewAtomicCastTy->getPointerTo(Addrspace));
3310  Value *OldExprVal = PHI;
3311  if (!IsIntTy) {
3312  if (XElemTy->isFloatingPointTy()) {
3313  OldExprVal = Builder.CreateBitCast(PHI, XElemTy,
3314  X->getName() + ".atomic.fltCast");
3315  } else {
3316  OldExprVal = Builder.CreateIntToPtr(PHI, XElemTy,
3317  X->getName() + ".atomic.ptrCast");
3318  }
3319  }
3320 
3321  Value *Upd = UpdateOp(OldExprVal, Builder);
3322  Builder.CreateStore(Upd, NewAtomicAddr);
3323  LoadInst *DesiredVal = Builder.CreateLoad(XElemTy, NewAtomicIntAddr);
3324  Value *XAddr =
3325  (IsIntTy)
3326  ? X
3327  : Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace));
3330  AtomicCmpXchgInst *Result = Builder.CreateAtomicCmpXchg(
3331  XAddr, OldExprVal, DesiredVal, llvm::MaybeAlign(), AO, Failure);
3332  Result->setVolatile(VolatileX);
3333  Value *PreviousVal = Builder.CreateExtractValue(Result, /*Idxs=*/0);
3334  Value *SuccessFailureVal = Builder.CreateExtractValue(Result, /*Idxs=*/1);
3335  PHI->addIncoming(PreviousVal, Builder.GetInsertBlock());
3336  Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
3337 
3338  Res.first = OldExprVal;
3339  Res.second = Upd;
3340 
3341  // set Insertion point in exit block
3342  if (UnreachableInst *ExitTI =
3343  dyn_cast<UnreachableInst>(ExitBB->getTerminator())) {
3344  CurBBTI->eraseFromParent();
3345  Builder.SetInsertPoint(ExitBB);
3346  } else {
3347  Builder.SetInsertPoint(ExitTI);
3348  }
3349  }
3350 
3351  return Res;
3352 }
3353 
3355  const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X,
3356  AtomicOpValue &V, Value *Expr, AtomicOrdering AO,
3358  bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr) {
3359  if (!updateToLocation(Loc))
3360  return Loc.IP;
3361 
3362  LLVM_DEBUG({
3363  Type *XTy = X.Var->getType();
3364  assert(XTy->isPointerTy() &&
3365  "OMP Atomic expects a pointer to target memory");
3366  Type *XElemTy = XTy->getPointerElementType();
3367  assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
3368  XElemTy->isPointerTy()) &&
3369  "OMP atomic capture expected a scalar type");
3370  assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) &&
3371  "OpenMP atomic does not support LT or GT operations");
3372  });
3373 
3374  // If UpdateExpr is 'x' updated with some `expr` not based on 'x',
3375  // 'x' is simply atomically rewritten with 'expr'.
3376  AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg);
3377  std::pair<Value *, Value *> Result = emitAtomicUpdate(
3378  AllocIP, X.Var, Expr, AO, AtomicOp, UpdateOp, X.IsVolatile, IsXBinopExpr);
3379 
3380  Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second);
3381  Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
3382 
3383  checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Capture);
3384  return Builder.saveIP();
3385 }
3386 
3389  std::string VarName) {
3390  llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get(
3392  llvm::Type::getInt8Ty(M.getContext())->getPointerTo(), Names.size()),
3393  Names);
3394  auto *MapNamesArrayGlobal = new llvm::GlobalVariable(
3395  M, MapNamesArrayInit->getType(),
3396  /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MapNamesArrayInit,
3397  VarName);
3398  return MapNamesArrayGlobal;
3399 }
3400 
3401 // Create all simple and struct types exposed by the runtime and remember
3402 // the llvm::PointerTypes of them for easy access later.
3403 void OpenMPIRBuilder::initializeTypes(Module &M) {
3404  LLVMContext &Ctx = M.getContext();
3405  StructType *T;
3406 #define OMP_TYPE(VarName, InitValue) VarName = InitValue;
3407 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
3408  VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
3409  VarName##PtrTy = PointerType::getUnqual(VarName##Ty);
3410 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
3411  VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
3412  VarName##Ptr = PointerType::getUnqual(VarName);
3413 #define OMP_STRUCT_TYPE(VarName, StructName, ...) \
3414  T = StructType::getTypeByName(Ctx, StructName); \
3415  if (!T) \
3416  T = StructType::create(Ctx, {__VA_ARGS__}, StructName); \
3417  VarName = T; \
3418  VarName##Ptr = PointerType::getUnqual(T);
3419 #include "llvm/Frontend/OpenMP/OMPKinds.def"
3420 }
3421 
3424  SmallVectorImpl<BasicBlock *> &BlockVector) {
3426  BlockSet.insert(EntryBB);
3427  BlockSet.insert(ExitBB);
3428 
3429  Worklist.push_back(EntryBB);
3430  while (!Worklist.empty()) {
3431  BasicBlock *BB = Worklist.pop_back_val();
3432  BlockVector.push_back(BB);
3433  for (BasicBlock *SuccBB : successors(BB))
3434  if (BlockSet.insert(SuccBB).second)
3435  Worklist.push_back(SuccBB);
3436  }
3437 }
3438 
3439 void CanonicalLoopInfo::collectControlBlocks(
3441  // We only count those BBs as control block for which we do not need to
3442  // reverse the CFG, i.e. not the loop body which can contain arbitrary control
3443  // flow. For consistency, this also means we do not add the Body block, which
3444  // is just the entry to the body code.
3445  BBs.reserve(BBs.size() + 6);
3446  BBs.append({getPreheader(), Header, Cond, Latch, Exit, getAfter()});
3447 }
3448 
3450  assert(isValid() && "Requires a valid canonical loop");
3451  for (BasicBlock *Pred : predecessors(Header)) {
3452  if (Pred != Latch)
3453  return Pred;
3454  }
3455  llvm_unreachable("Missing preheader");
3456 }
3457 
3459 #ifndef NDEBUG
3460  // No constraints if this object currently does not describe a loop.
3461  if (!isValid())
3462  return;
3463 
3464  BasicBlock *Preheader = getPreheader();
3465  BasicBlock *Body = getBody();
3466  BasicBlock *After = getAfter();
3467 
3468  // Verify standard control-flow we use for OpenMP loops.
3469  assert(Preheader);
3470  assert(isa<BranchInst>(Preheader->getTerminator()) &&
3471  "Preheader must terminate with unconditional branch");
3472  assert(Preheader->getSingleSuccessor() == Header &&
3473  "Preheader must jump to header");
3474 
3475  assert(Header);
3476  assert(isa<BranchInst>(Header->getTerminator()) &&
3477  "Header must terminate with unconditional branch");
3478  assert(Header->getSingleSuccessor() == Cond &&
3479  "Header must jump to exiting block");
3480 
3481  assert(Cond);
3482  assert(Cond->getSinglePredecessor() == Header &&
3483  "Exiting block only reachable from header");
3484 
3485  assert(isa<BranchInst>(Cond->getTerminator()) &&
3486  "Exiting block must terminate with conditional branch");
3487  assert(size(successors(Cond)) == 2 &&
3488  "Exiting block must have two successors");
3489  assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(0) == Body &&
3490  "Exiting block's first successor jump to the body");
3491  assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(1) == Exit &&
3492  "Exiting block's second successor must exit the loop");
3493 
3494  assert(Body);
3495  assert(Body->getSinglePredecessor() == Cond &&
3496  "Body only reachable from exiting block");
3497  assert(!isa<PHINode>(Body->front()));
3498 
3499  assert(Latch);
3500  assert(isa<BranchInst>(Latch->getTerminator()) &&
3501  "Latch must terminate with unconditional branch");
3502  assert(Latch->getSingleSuccessor() == Header && "Latch must jump to header");
3503  // TODO: To support simple redirecting of the end of the body code that has
3504  // multiple; introduce another auxiliary basic block like preheader and after.
3505  assert(Latch->getSinglePredecessor() != nullptr);
3506  assert(!isa<PHINode>(Latch->front()));
3507 
3508  assert(Exit);
3509  assert(isa<BranchInst>(Exit->getTerminator()) &&
3510  "Exit block must terminate with unconditional branch");
3511  assert(Exit->getSingleSuccessor() == After &&
3512  "Exit block must jump to after block");
3513 
3514  assert(After);
3515  assert(After->getSinglePredecessor() == Exit &&
3516  "After block only reachable from exit block");
3517  assert(After->empty() || !isa<PHINode>(After->front()));
3518 
3519  Instruction *IndVar = getIndVar();
3520  assert(IndVar && "Canonical induction variable not found?");
3521  assert(isa<IntegerType>(IndVar->getType()) &&
3522  "Induction variable must be an integer");
3523  assert(cast<PHINode>(IndVar)->getParent() == Header &&
3524  "Induction variable must be a PHI in the loop header");
3525  assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader);
3526  assert(
3527  cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->isZero());
3528  assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch);
3529 
3530  auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1);
3531  assert(cast<Instruction>(NextIndVar)->getParent() == Latch);
3532  assert(cast<BinaryOperator>(NextIndVar)->getOpcode() == BinaryOperator::Add);
3533  assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar);
3534  assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1))
3535  ->isOne());
3536 
3537  Value *TripCount = getTripCount();
3538  assert(TripCount && "Loop trip count not found?");
3539  assert(IndVar->getType() == TripCount->getType() &&
3540  "Trip count and induction variable must have the same type");
3541 
3542  auto *CmpI = cast<CmpInst>(&Cond->front());
3543  assert(CmpI->getPredicate() == CmpInst::ICMP_ULT &&
3544  "Exit condition must be a signed less-than comparison");
3545  assert(CmpI->getOperand(0) == IndVar &&
3546  "Exit condition must compare the induction variable");
3547  assert(CmpI->getOperand(1) == TripCount &&
3548  "Exit condition must compare with the trip count");
3549 #endif
3550 }
3551 
3553  Header = nullptr;
3554  Cond = nullptr;
3555  Latch = nullptr;
3556  Exit = nullptr;
3557 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::CanonicalLoopInfo::getPreheaderIP
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
Definition: OMPIRBuilder.h:1523
llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition: TargetTransformInfo.h:481
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
AssumptionCache.h
llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition: TargetTransformInfo.h:460
llvm::OpenMPIRBuilder::createCachedThreadPrivate
CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
Definition: OMPIRBuilder.cpp:2864
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:298
llvm::AtomicOrdering::AcquireRelease
@ AcquireRelease
Attrs
Function Attrs
Definition: README_ALTIVEC.txt:215
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2420
llvm::OpenMPIRBuilder::LocationDescription
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
Definition: OMPIRBuilder.h:150
llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition: ScalarEvolution.h:2118
addLoopMetadata
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
Definition: OMPIRBuilder.cpp:2143
llvm::OpenMPIRBuilder::OutlineInfo::ExitBB
BasicBlock * ExitBB
Definition: OMPIRBuilder.h:774
llvm::TargetTransformInfo::UnrollingPreferences::PartialThreshold
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Definition: TargetTransformInfo.h:456
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:22
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:238
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::OpenMPIRBuilder::createSection
InsertPointTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp section'.
Definition: OMPIRBuilder.cpp:1059
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:264
llvm::OpenMPIRBuilder::createCritical
InsertPointTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for '#omp critical'.
Definition: OMPIRBuilder.cpp:2540
llvm::CanonicalLoopInfo::getAfter
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Definition: OMPIRBuilder.h:1492
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:293
createTargetMachine
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOpt::Level OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
Definition: OMPIRBuilder.cpp:2262
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:217
llvm::TargetOptions
Definition: TargetOptions.h:124
llvm::OpenMPIRBuilder::createLoopSkeleton
CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
Definition: OMPIRBuilder.cpp:1318
llvm::cl::Prefix
@ Prefix
Definition: CommandLine.h:164
llvm::Function::empty
bool empty() const
Definition: Function.h:729
llvm::Function::getBasicBlockList
const BasicBlockListType & getBasicBlockList() const
Get the underlying elements of the Function...
Definition: Function.h:700
llvm::GlobalValue::HiddenVisibility
@ HiddenVisibility
The GV is hidden.
Definition: GlobalValue.h:64
T
llvm::CodeExtractor::findAllocas
void findAllocas(const CodeExtractorAnalysisCache &CEAC, ValueSet &SinkCands, ValueSet &HoistCands, BasicBlock *&ExitBlock) const
Find the set of allocas whose life ranges are contained within the outlined region.
Definition: CodeExtractor.cpp:495
llvm::CanonicalLoopInfo::getPreheader
BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Definition: OMPIRBuilder.cpp:3449
llvm::Function
Definition: Function.h:62
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::ConstantStruct::get
static Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1353
llvm::AtomicRMWInst::Xor
@ Xor
*p = old ^ v
Definition: Instructions.h:757
llvm::CodeExtractor::extractCodeRegion
Function * extractCodeRegion(const CodeExtractorAnalysisCache &CEAC)
Perform the extraction, returning the new function.
Definition: CodeExtractor.cpp:1584
Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:372
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:137
llvm::GlobalValue::NotThreadLocal
@ NotThreadLocal
Definition: GlobalValue.h:179
llvm::AtomicRMWInst::BinOp
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:743
llvm::ilist_node_with_parent::getNextNode
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:288
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1175
llvm::BasicBlock::empty
bool empty() const
Definition: BasicBlock.h:307
llvm::OpenMPIRBuilder::tileLoops
std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
Definition: OMPIRBuilder.cpp:1951
llvm::enumerate
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:2023
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::Function::getEntryBlock
const BasicBlock & getEntryBlock() const
Definition: Function.h:707
llvm::OpenMPIRBuilder::InsertPointTy
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
Definition: OMPIRBuilder.h:53
llvm::BasicBlock::replaceSuccessorsPhiUsesWith
void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
Definition: BasicBlock.cpp:461
llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2515
llvm::OpenMPIRBuilder::getOrCreateThreadID
Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
Definition: OMPIRBuilder.cpp:385
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:743
llvm::ConstantExpr::getBitCast
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2253
llvm::DeleteDeadBlocks
void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
Definition: BasicBlockUtils.cpp:101
llvm::FunctionType::get
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:363
Error.h
OptimizationRemarkEmitter.h
llvm::AtomicOrdering::SequentiallyConsistent
@ SequentiallyConsistent
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::tgtok::VarName
@ VarName
Definition: TGLexer.h:71
llvm::TargetTransformInfo::UnrollingPreferences::Count
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
Definition: TargetTransformInfo.h:465
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::DILocation
Debug location.
Definition: DebugInfoMetadata.h:1580
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:321
llvm::Function::arg_size
size_t arg_size() const
Definition: Function.h:771
ScalarEvolution.h
Shift
bool Shift
Definition: README.txt:468
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::BasicBlock::eraseFromParent
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:133
llvm::OpenMPIRBuilder::AtomicOpValue
a struct to pack relevant information while generating atomic Ops
Definition: OMPIRBuilder.h:1203
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:538
llvm::CanonicalLoopInfo::getAfterIP
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
Definition: OMPIRBuilder.h:1537
getTripCount
static const SCEV * getTripCount(const SCEV *BECount, Type *IntPtr, Loop *CurLoop, const DataLayout *DL, ScalarEvolution *SE)
Compute trip count from the backedge taken count.
Definition: LoopIdiomRecognize.cpp:1057
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:227
llvm::BasicBlock::getSingleSuccessor
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:298
llvm::StoreInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:362
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
llvm::BasicBlock::splitBasicBlock
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:385
llvm::GlobalValue::UnnamedAddr::Global
@ Global
llvm::ApproximateLoopSize
unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &EphValues, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
Definition: LoopUnrollPass.cpp:667
llvm::Optional
Definition: APInt.h:33
llvm::ConstantAsMetadata::get
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:419
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::MCID::Convergent
@ Convergent
Definition: MCInstrDesc.h:182
CodeExtractor.h
llvm::OpenMPIRBuilder::ReductionInfo::Variable
Value * Variable
Reduction variable of pointer type.
Definition: OMPIRBuilder.h:570
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:31
llvm::OpenMPIRBuilder::FinalizeCallbackTy
std::function< void(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
Definition: OMPIRBuilder.h:63
llvm::CmpInst::ICMP_SLE
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:751
llvm::computeUnrollCount
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
Definition: LoopUnrollPass.cpp:890
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::CanonicalLoopInfo::getFunction
Function * getFunction() const
Definition: OMPIRBuilder.h:1543
llvm::BasicBlock::getSinglePredecessor
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:268
llvm::OpenMPIRBuilder::AtomicOpValue::Var
Value * Var
Definition: OMPIRBuilder.h:1204
llvm::BasicBlock::hasNPredecessors
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:290
llvm::CallBase::arg_begin
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1318
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:642
RHS
Value * RHS
Definition: X86PartialReduction.cpp:74
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
llvm::TargetRegistry::lookupTarget
static const Target * lookupTarget(const std::string &Triple, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Definition: TargetRegistry.cpp:62
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:241
llvm::OpenMPIRBuilder::emitTaskyieldImpl
void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
Definition: OMPIRBuilder.cpp:940
llvm::OpenMPIRBuilder::createReductions
InsertPointTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, bool IsNoWait=false)
Generator for '#omp reduction'.
Definition: OMPIRBuilder.cpp:1105
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:158
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::StoreInst::setAtomic
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:389
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1233
llvm::OpenMPIRBuilder::collapseLoops
CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
Definition: OMPIRBuilder.cpp:1823
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::MDNode::operands
op_range operands() const
Definition: Metadata.h:1135
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1336
llvm::ConstantExpr::getPointerCast
static Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
Definition: Constants.cpp:2065
llvm::BasicBlock::getUniqueSuccessor
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:306
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:583
llvm::AtomicOrdering::Monotonic
@ Monotonic
llvm::CanonicalLoopInfo::getIndVar
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Definition: OMPIRBuilder.h:1509
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::gatherUnrollingPreferences
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, Optional< unsigned > UserThreshold, Optional< unsigned > UserCount, Optional< bool > UserAllowPartial, Optional< bool > UserRuntime, Optional< bool > UserUpperBound, Optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
Definition: LoopUnrollPass.cpp:185
llvm::OpenMPIRBuilder::createOrderedDepend
InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for '#omp ordered depend (source | sink)'.
Definition: OMPIRBuilder.cpp:2575
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::Lock
static sys::Mutex Lock
Definition: NVPTXUtilities.cpp:39
llvm::OpenMPIRBuilder::finalize
void finalize(Function *Fn=nullptr, bool AllowExtractorSinking=false)
Finalize the underlying module, e.g., by outlining regions.
Definition: OMPIRBuilder.cpp:177
Mappings
Inject TLI Mappings
Definition: InjectTLIMappings.cpp:172
CommandLine.h
CodeMetrics.h
LHS
Value * LHS
Definition: X86PartialReduction.cpp:73
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Definition: Instruction.cpp:775
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1356
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1592
TargetMachine.h
llvm::OpenMPIRBuilder::emitMapperCall
void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
Definition: OMPIRBuilder.cpp:3040
OMPIRBuilder.h
Constants.h
llvm::omp::OMP_TGT_EXEC_MODE_GENERIC
@ OMP_TGT_EXEC_MODE_GENERIC
Definition: OMPConstants.h:101
isZero
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:519
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:655
llvm::CanonicalLoopInfo::assertOK
void assertOK() const
Consistency self-check.
Definition: OMPIRBuilder.cpp:3458
llvm::CanonicalLoopInfo::getCond
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
Definition: OMPIRBuilder.h:1460
llvm::OpenMPIRBuilder
An interface to create LLVM-IR for OpenMP directives.
Definition: OMPIRBuilder.h:29
llvm::OpenMPIRBuilder::createOMPFree
CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
Definition: OMPIRBuilder.cpp:2849
llvm::OpenMPIRBuilder::getOrCreateDefaultSrcLocStr
Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
Definition: OMPIRBuilder.cpp:357
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::TargetTransformInfo::UnrollingPreferences::Force
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
Definition: TargetTransformInfo.h:496
cmp
< i32 >< i32 > cmp
Definition: README.txt:1447
llvm::CmpInst::ICMP_ULE
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:747
TileSize
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1398
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::OpenMPIRBuilder::createMapperAllocas
void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
Definition: OMPIRBuilder.cpp:3021
redirectAllPredecessorsTo
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
Definition: OMPIRBuilder.cpp:1784
llvm::OpenMPIRBuilder::createBarrier
InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
Definition: OMPIRBuilder.cpp:392
llvm::OpenMPIRBuilder::createAtomicRead
InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO)
Emit atomic Read for : V = X — Only Scalar data types.
Definition: OMPIRBuilder.cpp:3127
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:199
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
IP
Definition: NVPTXLowerArgs.cpp:166
TargetLibraryInfo.h
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:253
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:109
llvm::OpenMPIRBuilder::~OpenMPIRBuilder
~OpenMPIRBuilder()
Definition: OMPIRBuilder.cpp:262
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
llvm::CanonicalLoopInfo::isValid
bool isValid() const
Returns whether this object currently represents the IR of a loop.
Definition: OMPIRBuilder.h:1443
llvm::OpenMPIRBuilder::getOrCreateSrcLocStr
Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
Definition: OMPIRBuilder.cpp:317
llvm::CodeExtractor::findInputsOutputs
void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, const ValueSet &Allocas) const
Compute the set of input values and output values for the code.
Definition: CodeExtractor.cpp:647
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:191
llvm::pdb::Int8
@ Int8
Definition: PDBTypes.h:396
MDBuilder.h
llvm::AtomicOrdering::Acquire
@ Acquire
llvm::AtomicRMWInst::Nand
@ Nand
*p = ~(old & v)
Definition: Instructions.h:753
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:32
setCanonicalLoopTripCount
void setCanonicalLoopTripCount(CanonicalLoopInfo *CLI, Value *TripCount)
Definition: OMPIRBuilder.cpp:1507
llvm::GlobalObject::addMetadata
void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
Definition: Metadata.cpp:1268
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:376
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:607
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:925
llvm::Use::getUser
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:73
llvm::OpenMPIRBuilder::applySimd
void applySimd(DebugLoc DL, CanonicalLoopInfo *Loop)
Add metadata to simd-ize a loop.
Definition: OMPIRBuilder.cpp:2197
llvm::CanonicalLoopInfo::getBodyIP
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
Definition: OMPIRBuilder.h:1530
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:71
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:787
removeUnusedBlocksFromParent
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
Definition: OMPIRBuilder.cpp:1792
llvm::Instruction::removeFromParent
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:74
llvm::OpenMPIRBuilder::MapperAllocas::Args
AllocaInst * Args
Definition: OMPIRBuilder.h:813
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
llvm::omp::IdentFlag
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
Definition: OMPConstants.h:66
llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
llvm::AtomicRMWInst::Xchg
@ Xchg
*p = v
Definition: Instructions.h:745
llvm::OpenMPIRBuilder::unrollLoopFull
void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
Definition: OMPIRBuilder.cpp:2182
llvm::OpenMPIRBuilder::emitTaskwaitImpl
void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
Definition: OMPIRBuilder.cpp:921
llvm::StringRef::str
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:245
llvm::OpenMPIRBuilder::createOMPAlloc
CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_Alloc.
Definition: OMPIRBuilder.cpp:2832
llvm::SetVector::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:71
llvm::OpenMPIRBuilder::addAttributes
void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Definition: OMPIRBuilder.cpp:77
llvm::None
const NoneType None
Definition: None.h:23
llvm::AtomicRMWInst::Add
@ Add
*p = old + v
Definition: Instructions.h:747
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::OpenMPIRBuilder::applyDynamicWorkshareLoop
InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, omp::OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk=nullptr)
Modifies the canonical loop to be a dynamically-scheduled workshare loop.
Definition: OMPIRBuilder.cpp:1647
llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:282
llvm::SmallString< 128 >
CFG.h
llvm::Twine::str
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
llvm::OpenMPIRBuilder::OutlineInfo::EntryBB
BasicBlock * EntryBB
Definition: OMPIRBuilder.h:774
llvm::Function::getAttributes
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:316
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:190
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36
llvm::omp::RuntimeFunction
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
Definition: OMPConstants.h:46
llvm::SmallString::append
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:67
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56
llvm::OpenMPIRBuilder::createTaskyield
void createTaskyield(const LocationDescription &Loc)
Generator for '#omp taskyield'.
Definition: OMPIRBuilder.cpp:952
llvm::AtomicRMWInst::UMin
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:765
llvm::cl::opt< bool >
llvm::ClrHandlerType::Filter
@ Filter
llvm::OpenMPIRBuilder::createOffloadMaptypes
GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
Definition: OMPIRBuilder.cpp:3009
llvm::OpenMPIRBuilder::AtomicOpValue::IsVolatile
bool IsVolatile
Definition: OMPIRBuilder.h:1206
llvm::OpenMPIRBuilder::createSections
InsertPointTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for '#omp sections'.
Definition: OMPIRBuilder.cpp:958
llvm::OpenMPIRBuilder::MapperAllocas::ArgsBase
AllocaInst * ArgsBase
Definition: OMPIRBuilder.h:812
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:309
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::OpenMPIRBuilder::ReductionInfo::PrivateVariable
Value * PrivateVariable
Thread-private partial reduction variable.
Definition: OMPIRBuilder.h:573
llvm::OpenMPIRBuilder::createOrderedThreadsSimd
InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for '#omp ordered [threads | simd]'.
Definition: OMPIRBuilder.cpp:2620
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
llvm::Function::getReturnType
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:182
llvm::AtomicRMWInst::Sub
@ Sub
*p = old - v
Definition: Instructions.h:749
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
uint64_t
llvm::ScalarEvolutionAnalysis::run
ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
Definition: ScalarEvolution.cpp:13459
llvm::AssumptionAnalysis::run
AssumptionCache run(Function &F, FunctionAnalysisManager &)
Definition: AssumptionCache.cpp:260
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:79
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2807
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3148
DebugInfo.h
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:431
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:970
llvm::AtomicOrdering::Unordered
@ Unordered
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::OpenMPIRBuilder::emitCancelationCheckImpl
void emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective, FinalizeCallbackTy ExitCB={})
Generate control flow and cleanup for cancellation.
Definition: OMPIRBuilder.cpp:500
llvm::OpenMPIRBuilder::createCanonicalLoop
CanonicalLoopInfo * createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
Definition: OMPIRBuilder.cpp:1385
llvm::AtomicRMWInst::Min
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:761
llvm::OpenMPIRBuilder::createTargetInit
InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime)
The omp target interface.
Definition: OMPIRBuilder.cpp:2885
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:585
llvm::OpenMPIRBuilder::getOrCreateIdent
Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
Definition: OMPIRBuilder.cpp:277
llvm::AtomicRMWInst::Or
@ Or
*p = old | v
Definition: Instructions.h:755
addSimdMetadata
static void addSimdMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
Definition: OMPIRBuilder.cpp:2170
llvm::OpenMPIRBuilder::createParallel
IRBuilder ::InsertPoint createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for '#omp parallel'.
Definition: OMPIRBuilder.cpp:539
llvm::MDString::get
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:473
llvm::Function::Create
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:139
llvm::omp::OMPScheduleType::Static
@ Static
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MDBuilder::createCallbackEncoding
MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
Definition: MDBuilder.cpp:107
llvm::OpenMPIRBuilder::LocationDescription::DL
DebugLoc DL
Definition: OMPIRBuilder.h:158
isConflictIP
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
Definition: OMPIRBuilder.cpp:69
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:138
llvm::OpenMPIRBuilder::unrollLoopHeuristic
void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
Definition: OMPIRBuilder.cpp:2189
llvm::ArrayType::get
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:640
llvm::OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr
Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
Definition: OMPIRBuilder.cpp:168
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::OpenMPIRBuilder::createMaster
InsertPointTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp master'.
Definition: OMPIRBuilder.cpp:1269
llvm::MDNode
Metadata node.
Definition: Metadata.h:906
llvm::SplitBlockAndInsertIfThenElse
void SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
Definition: BasicBlockUtils.cpp:1465
llvm::CodeGenOpt::Aggressive
@ Aggressive
Definition: CodeGen.h:56
llvm::OpenMPIRBuilder::createFlush
void createFlush(const LocationDescription &Loc)
Generator for '#omp flush'.
Definition: OMPIRBuilder.cpp:915
llvm::CallBase::arg_end
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1324
llvm::DominatorTreeAnalysis::run
DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
Definition: Dominators.cpp:360
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::Function::getFunction
const Function & getFunction() const
Definition: Function.h:137
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
llvm::OpenMPIRBuilder::OutlineInfo
Helper that contains information about regions we need to outline during finalization.
Definition: OMPIRBuilder.h:771
llvm::CanonicalLoopInfo
Class to represented the control flow structure of an OpenMP canonical loop.
Definition: OMPIRBuilder.h:1420
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Triple.h
llvm::CmpInst::ICMP_SLT
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:750
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1573
llvm::OpenMPIRBuilder::createGlobalFlag
GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
Definition: OMPIRBuilder.cpp:266
TargetOptions.h
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:94
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:99
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1086
llvm::MergeBlockIntoPredecessor
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false)
Attempts to merge a block into its predecessor, if possible.
Definition: BasicBlockUtils.cpp:180
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:746
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::Value::getNumUses
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:255
llvm::OpenMPIRBuilder::createOffloadMapnames
GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
Definition: OMPIRBuilder.cpp:3388
llvm::OpenMPIRBuilder::createAtomicCapture
InsertPointTy createAtomicCapture(const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
Definition: OMPIRBuilder.cpp:3354
llvm::ConstantDataArray::get
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:691
llvm::BasicBlock::getUniquePredecessor
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:276
llvm::GlobalValue::WeakODRLinkage
@ WeakODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:53
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:870
uint32_t
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1789
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::ConstantDataArray::getString
static Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
Definition: Constants.cpp:3086
llvm::GlobalObject::hasMetadata
bool hasMetadata() const
Return true if this value has any metadata attached to it.
Definition: Value.h:588
llvm::CodeExtractor
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:85
getKmpcForDynamicNextForType
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
Definition: OMPIRBuilder.cpp:1636
llvm::OpenMPIRBuilder::MapperAllocas::ArgSizes
AllocaInst * ArgSizes
Definition: OMPIRBuilder.h:814
llvm::OpenMPIRBuilder::OutlineInfo::PostOutlineCB
PostOutlineCBTy PostOutlineCB
Definition: OMPIRBuilder.h:773
llvm::LoadInst::setAtomic
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:258
llvm::CodeGenOpt::Level
Level
Definition: CodeGen.h:52
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
llvm::MDNode::getDistinct
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1241
llvm::OpenMPIRBuilder::unrollLoopPartial
void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
Definition: OMPIRBuilder.cpp:2414
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:180
llvm::OpenMPIRBuilder::createMasked
InsertPointTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for '#omp masked'.
Definition: OMPIRBuilder.cpp:1294
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:152
llvm::OpenMPIRBuilder::applyWorkshareLoop
InsertPointTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier)
Modifies the canonical loop to be a workshare loop.
Definition: OMPIRBuilder.cpp:1609
llvm::GlobalValue::CommonLinkage
@ CommonLinkage
Tentative definitions.
Definition: GlobalValue.h:58
llvm::LoopAnalysis::run
LoopInfo run(Function &F, FunctionAnalysisManager &AM)
Definition: LoopInfo.cpp:961
computeHeuristicUnrollFactor
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
Definition: OMPIRBuilder.cpp:2283
llvm::PointerUnion< const Value *, const PseudoSourceValue * >
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:685
llvm::FunctionCallee::getCallee
Value * getCallee()
Definition: DerivedTypes.h:184
llvm::BasicBlock::front
const Instruction & front() const
Definition: BasicBlock.h:308
llvm::CanonicalLoopInfo::getExit
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
Definition: OMPIRBuilder.h:1482
llvm::AtomicOrdering::Release
@ Release
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:36
llvm::StringRef::size
constexpr LLVM_NODISCARD size_t size() const
size - Get the string size.
Definition: StringRef.h:157
llvm::ConstantAsMetadata
Definition: Metadata.h:412
llvm::OpenMPIRBuilder::createTaskwait
void createTaskwait(const LocationDescription &Loc)
Generator for '#omp taskwait'.
Definition: OMPIRBuilder.cpp:934
redirectTo
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
Definition: OMPIRBuilder.cpp:1767
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::OpenMPIRBuilder::ReductionInfo::ReductionGen
ReductionGenTy ReductionGen
Callback for generating the reduction body.
Definition: OMPIRBuilder.h:578
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:873
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:348
llvm::omp::AddressSpace
AddressSpace
Definition: OMPConstants.h:108
llvm::AtomicRMWInst::FSub
@ FSub
*p = old - v
Definition: Instructions.h:771
llvm::OpenMPIRBuilder::ReductionInfo::ElementType
Type * ElementType
Reduction element type, must match pointee type of variable.
Definition: OMPIRBuilder.h:567
llvm::OpenMPIRBuilder::createAtomicWrite
InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO)
Emit atomic write for : X = Expr — Only Scalar data types.
Definition: OMPIRBuilder.cpp:3169
llvm::ArrayRef::begin
iterator begin() const
Definition: ArrayRef.h:151
llvm::ConstantInt::getBool
static ConstantInt * getBool(LLVMContext &Context, bool V)
Definition: Constants.cpp:887
llvm::Function::getArg
Argument * getArg(unsigned i) const
Definition: Function.h:756
llvm::Error
Lightweight error class with error context and mandatory checking.
Definition: Error.h:155
GlobalVariable.h
llvm::AtomicRMWInst::And
@ And
*p = old & v
Definition: Instructions.h:751
llvm::CanonicalLoopInfo::invalidate
void invalidate()
Invalidate this loop.
Definition: OMPIRBuilder.cpp:3552
llvm::MCID::Branch
@ Branch
Definition: MCInstrDesc.h:156
llvm::Module::getContext
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:259
llvm::ConstantInt::getSigned
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.cpp:939
llvm::OpenMPIRBuilder::createCancel
InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for '#omp cancel'.
Definition: OMPIRBuilder.cpp:449
llvm::omp::GV
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...
Definition: OMPGridValues.h:57
PassManager.h
OptimisticAttributes
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
llvm::Type::getPointerTo
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:776
llvm::Function::getFunctionType
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:177
llvm::ConstantArray::get
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1288
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:579
llvm::TargetIRAnalysis::run
Result run(const Function &F, FunctionAnalysisManager &)
Definition: TargetTransformInfo.cpp:1166
llvm::CanonicalLoopInfo::getHeader
BasicBlock * getHeader() const
The header is the entry for each iteration.
Definition: OMPIRBuilder.h:1453
llvm::CodeExtractorAnalysisCache
A cache for the CodeExtractor analysis.
Definition: CodeExtractor.h:46
llvm::AnalysisManager::registerPass
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
Definition: PassManager.h:845
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:183
UnrollThresholdFactor
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
llvm::omp::OMP_TGT_EXEC_MODE_SPMD
@ OMP_TGT_EXEC_MODE_SPMD
Definition: OMPConstants.h:102
llvm::SmallString::str
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:259
getKmpcForDynamicInitForType
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
Definition: OMPIRBuilder.cpp:1620
llvm::MDNode::replaceOperandWith
void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
Definition: Metadata.cpp:877
llvm::OpenMPIRBuilder::createTargetDeinit
void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime)
Create a runtime call for kmpc_target_deinit.
Definition: OMPIRBuilder.cpp:2938
llvm::MDBuilder
Definition: MDBuilder.h:35
llvm::OpenMPIRBuilder::ReductionInfo
Information about an OpenMP reduction.
Definition: OMPIRBuilder.h:555
llvm::AllocaInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:129
llvm::BasicBlock::back
const Instruction & back() const
Definition: BasicBlock.h:310
llvm::OpenMPIRBuilder::initialize
void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
Definition: OMPIRBuilder.cpp:175
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:224
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:363
isValid
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Definition: RustDemangle.cpp:217
llvm::GlobalValue::PrivateLinkage
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:56
llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
Definition: Constants.cpp:2080
llvm::OpenMPIRBuilder::createAtomicUpdate
InsertPointTy createAtomicUpdate(const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
Definition: OMPIRBuilder.cpp:3202
llvm::CanonicalLoopInfo::getLatch
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
Definition: OMPIRBuilder.h:1476
Allocator
Basic Register Allocator
Definition: RegAllocBasic.cpp:146
llvm::Type::getPointerElementType
Type * getPointerElementType() const
This method is deprecated without replacement.
Definition: Type.h:371
ModuleUtils.h
llvm::TargetTransformInfo::UnrollingPreferences::Threshold
unsigned Threshold
The cost threshold for the unrolled loop.
Definition: TargetTransformInfo.h:439
UnrollLoop.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::remarks::Type::Failure
@ Failure
llvm::to_string
std::string to_string(const T &Value)
Definition: ScopedPrinter.h:86
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
getFreshReductionFunc
Function * getFreshReductionFunc(Module &M)
Create a function with a unique name and a "void (i8*, i8*)" signature in the given module and return...
Definition: OMPIRBuilder.cpp:1095
llvm::AtomicRMWInst::FAdd
@ FAdd
*p = old + v
Definition: Instructions.h:768
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:662
llvm::OpenMPIRBuilder::emitBarrierImpl
InsertPointTy emitBarrierImpl(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall, bool CheckCancelFlag)
Generate a barrier runtime call.
Definition: OMPIRBuilder.cpp:400
llvm::FunctionCallee
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:165
llvm::PHINode
Definition: Instructions.h:2657
llvm::BasicBlock::removePredecessor
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:325
llvm::Function::removeFromParent
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
Definition: Function.cpp:361
llvm::SmallVectorImpl< uint64_t >
llvm::AtomicRMWInst::BAD_BINOP
@ BAD_BINOP
Definition: Instructions.h:775
llvm::CodeExtractor::isEligible
bool isEligible() const
Test whether this code extractor is eligible.
Definition: CodeExtractor.cpp:620
llvm::CanonicalLoopInfo::getTripCount
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
Definition: OMPIRBuilder.h:1500
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:401
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::PassInstrumentationAnalysis
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
Definition: PassManager.h:603
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:313
llvm::TargetTransformInfo::UnrollingPreferences::OptSizeThreshold
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Definition: TargetTransformInfo.h:453
llvm::OpenMPIRBuilder::emitFlush
void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
Definition: OMPIRBuilder.cpp:906
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1478
llvm::OpenMPIRBuilder::OutlineInfo::collectBlocks
void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
Definition: OMPIRBuilder.cpp:3422
llvm::OpenMPIRBuilder::MapperAllocas
Definition: OMPIRBuilder.h:811
llvm::OpenMPIRBuilder::applyStaticWorkshareLoop
InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, Value *Chunk=nullptr)
Modifies the canonical loop to be a statically-scheduled workshare loop.
Definition: OMPIRBuilder.cpp:1515
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::UnreachableInst
This function has undefined behavior.
Definition: Instructions.h:4733
llvm::Value::replaceUsesWithIf
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
Definition: Value.cpp:540
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3236
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:62
llvm::SwitchInst::addCase
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Definition: Instructions.cpp:4330
llvm::cl::desc
Definition: CommandLine.h:412
llvm::OpenMPIRBuilder::createSingle
InsertPointTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, llvm::Value *DidIt)
Generator for '#omp single'.
Definition: OMPIRBuilder.cpp:2505
llvm::AtomicRMWInst::UMax
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:763
llvm::OpenMPIRBuilder::createCopyinClauseBlocks
InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
Definition: OMPIRBuilder.cpp:2782
llvm::OpenMPIRBuilder::createCopyPrivate
InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
Definition: OMPIRBuilder.cpp:2483
llvm::SetVector< Value * >
llvm::omp::OMPScheduleType
OMPScheduleType
Definition: OMPConstants.h:78
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:632
llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction
FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
Definition: OMPIRBuilder.cpp:110
LoopPeel.h
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:839
Value.h
llvm::OpenMPIRBuilder::ReductionInfo::AtomicReductionGen
AtomicReductionGenTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
Definition: OMPIRBuilder.h:584
TargetRegistry.h
llvm::CanonicalLoopInfo::getBody
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
Definition: OMPIRBuilder.h:1468
llvm::M68kBeads::Term
@ Term
Definition: M68kBaseInfo.h:71
llvm::AtomicCmpXchgInst::getStrongestFailureOrdering
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Definition: Instructions.h:679
llvm::LoopNest
This class represents a loop nest and can be used to query its properties.
Definition: LoopNestAnalysis.h:28
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::AtomicCmpXchgInst
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:526
getKmpcForStaticInitForType
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Definition: OMPIRBuilder.cpp:1492
llvm::Directive
Definition: DirectiveEmitter.h:100
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:440
llvm::gatherPeelingPreferences
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, Optional< bool > UserAllowPeeling, Optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
Definition: LoopPeel.cpp:696
llvm::ArrayRef::end
iterator end() const
Definition: ArrayRef.h:152
llvm::AtomicOrdering::NotAtomic
@ NotAtomic
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1246
llvm::Instruction::moveBefore
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:97
llvm::DIFile
File.
Definition: DebugInfoMetadata.h:530
llvm::FunctionType
Class to represent function types.
Definition: DerivedTypes.h:103
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:915
llvm::Target::createTargetMachine
TargetMachine * createTargetMachine(StringRef TT, StringRef CPU, StringRef Features, const TargetOptions &Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM=None, CodeGenOpt::Level OL=CodeGenOpt::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Definition: TargetRegistry.h:449
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::OpenMPIRBuilder::LocationDescription::IP
InsertPointTy IP
Definition: OMPIRBuilder.h:157
llvm::AtomicRMWInst::Max
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:759