23#include <unordered_set> 
   26#define DEBUG_TYPE "sample-profile-inference" 
   32    cl::desc(
"Try to evenly distribute flow when there are multiple equally " 
   37    cl::desc(
"Evenly re-distribute flow among unknown subgraphs."));
 
   41    cl::desc(
"Join isolated components having positive flow."));
 
   45    cl::desc(
"The cost of increasing a block's count by one."));
 
   49    cl::desc(
"The cost of decreasing a block's count by one."));
 
   53    cl::desc(
"The cost of increasing the entry block's count by one."));
 
   57    cl::desc(
"The cost of decreasing the entry block's count by one."));
 
   61    cl::desc(
"The cost of increasing a count of zero-weight block by one."));
 
   65    cl::desc(
"The cost of increasing an unknown block's count by one."));
 
   70static constexpr int64_t 
INF = ((int64_t)1) << 50;
 
   88  MinCostMaxFlow(
const ProfiParams &Params) : Params(Params) {}
 
   95    Nodes = std::vector<Node>(NodeCount);
 
   96    Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
 
   99          std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>());
 
  104    LLVM_DEBUG(
dbgs() << 
"Starting profi for " << Nodes.size() << 
" nodes\n");
 
  108    size_t AugmentationIters = applyFlowAugmentation();
 
  111    int64_t TotalCost = 0;
 
  112    int64_t TotalFlow = 0;
 
  113    for (
uint64_t Src = 0; Src < Nodes.size(); Src++) {
 
  114      for (
auto &Edge : Edges[Src]) {
 
  116          TotalCost += Edge.Cost * Edge.Flow;
 
  118            TotalFlow += Edge.Flow;
 
  122    LLVM_DEBUG(
dbgs() << 
"Completed profi after " << AugmentationIters
 
  123                      << 
" iterations with " << TotalFlow << 
" total flow" 
  124                      << 
" of " << TotalCost << 
" cost\n");
 
  126    (void)AugmentationIters;
 
  134    assert(Capacity > 0 && 
"adding an edge of zero capacity");
 
  135    assert(Src != Dst && 
"loop edge are not supported");
 
  140    SrcEdge.Capacity = Capacity;
 
  142    SrcEdge.RevEdgeIndex = Edges[Dst].size();
 
  146    DstEdge.Cost = -Cost;
 
  147    DstEdge.Capacity = 0;
 
  149    DstEdge.RevEdgeIndex = Edges[Src].size();
 
  151    Edges[Src].push_back(SrcEdge);
 
  152    Edges[Dst].push_back(DstEdge);
 
  162  std::vector<std::pair<uint64_t, int64_t>> getFlow(
uint64_t Src)
 const {
 
  163    std::vector<std::pair<uint64_t, int64_t>> 
Flow;
 
  164    for (
const auto &Edge : Edges[Src]) {
 
  166        Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
 
  174    for (
const auto &Edge : Edges[Src]) {
 
  175      if (Edge.Dst == Dst) {
 
  185  size_t applyFlowAugmentation() {
 
  186    size_t AugmentationIters = 0;
 
  187    while (findAugmentingPath()) {
 
  188      uint64_t PathCapacity = computeAugmentingPathCapacity();
 
  189      while (PathCapacity > 0) {
 
  190        bool Progress = 
false;
 
  193          identifyShortestEdges(PathCapacity);
 
  196          auto AugmentingOrder = findAugmentingDAG();
 
  199          Progress = augmentFlowAlongDAG(AugmentingOrder);
 
  200          PathCapacity = computeAugmentingPathCapacity();
 
  204          augmentFlowAlongPath(PathCapacity);
 
  211    return AugmentationIters;
 
  216  uint64_t computeAugmentingPathCapacity() {
 
  219    while (Now != Source) {
 
  220      uint64_t Pred = Nodes[Now].ParentNode;
 
  221      auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
 
  223      assert(Edge.Capacity >= Edge.Flow && 
"incorrect edge flow");
 
  225      PathCapacity = std::min(PathCapacity, EdgeCapacity);
 
  233  bool findAugmentingPath() {
 
  235    for (
auto &
Node : Nodes) {
 
  242    std::queue<uint64_t> Queue;
 
  244    Nodes[Source].Distance = 0;
 
  245    Nodes[Source].Taken = 
true;
 
  246    while (!Queue.empty()) {
 
  249      Nodes[Src].Taken = 
false;
 
  266      if (Nodes[Src].Distance > Nodes[
Target].Distance)
 
  270      for (
uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
 
  271        auto &Edge = Edges[Src][EdgeIdx];
 
  272        if (Edge.Flow < Edge.Capacity) {
 
  274          int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
 
  275          if (Nodes[Dst].Distance > NewDistance) {
 
  277            Nodes[Dst].Distance = NewDistance;
 
  278            Nodes[Dst].ParentNode = Src;
 
  279            Nodes[Dst].ParentEdgeIndex = EdgeIdx;
 
  281            if (!Nodes[Dst].Taken) {
 
  283              Nodes[Dst].Taken = 
true;
 
  294  void augmentFlowAlongPath(
uint64_t PathCapacity) {
 
  295    assert(PathCapacity > 0 && 
"found an incorrect augmenting path");
 
  297    while (Now != Source) {
 
  298      uint64_t Pred = Nodes[Now].ParentNode;
 
  299      auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
 
  300      auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
 
  302      Edge.Flow += PathCapacity;
 
  303      RevEdge.Flow -= PathCapacity;
 
  316  std::vector<uint64_t> findAugmentingDAG() {
 
  322    typedef std::pair<uint64_t, uint64_t> StackItemType;
 
  323    std::stack<StackItemType> Stack;
 
  324    std::vector<uint64_t> AugmentingOrder;
 
  327    for (
auto &
Node : Nodes) {
 
  336    Nodes[
Target].Taken = 
true;
 
  339    Stack.emplace(Source, 0);
 
  340    Nodes[Source].Discovery = ++Time;
 
  341    while (!Stack.empty()) {
 
  342      auto NodeIdx = Stack.top().first;
 
  343      auto EdgeIdx = Stack.top().second;
 
  346      if (EdgeIdx < Edges[NodeIdx].
size()) {
 
  347        auto &Edge = Edges[NodeIdx][EdgeIdx];
 
  348        auto &Dst = Nodes[Edge.Dst];
 
  349        Stack.top().second++;
 
  351        if (Edge.OnShortestPath) {
 
  353          if (Dst.Discovery == 0 && Dst.NumCalls < MaxDfsCalls) {
 
  354            Dst.Discovery = ++Time;
 
  355            Stack.emplace(Edge.Dst, 0);
 
  357          } 
else if (Dst.Taken && Dst.Finish != 0) {
 
  359            Nodes[NodeIdx].Taken = 
true;
 
  366        if (!Nodes[NodeIdx].Taken) {
 
  367          Nodes[NodeIdx].Discovery = 0;
 
  371          Nodes[NodeIdx].Finish = ++Time;
 
  373          if (NodeIdx != Source) {
 
  374            assert(!Stack.empty() && 
"empty stack while running dfs");
 
  375            Nodes[Stack.top().first].Taken = 
true;
 
  377          AugmentingOrder.push_back(NodeIdx);
 
  382    std::reverse(AugmentingOrder.begin(), AugmentingOrder.end());
 
  385    for (
size_t Src : AugmentingOrder) {
 
  386      AugmentingEdges[Src].clear();
 
  387      for (
auto &Edge : Edges[Src]) {
 
  389        if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken &&
 
  390            Nodes[Dst].Finish < Nodes[Src].Finish) {
 
  391          AugmentingEdges[Src].push_back(&Edge);
 
  394      assert((Src == 
Target || !AugmentingEdges[Src].empty()) &&
 
  395             "incorrectly constructed augmenting edges");
 
  398    return AugmentingOrder;
 
  405  bool augmentFlowAlongDAG(
const std::vector<uint64_t> &AugmentingOrder) {
 
  407    for (
uint64_t Src : AugmentingOrder) {
 
  408      Nodes[Src].FracFlow = 0;
 
  409      Nodes[Src].IntFlow = 0;
 
  410      for (
auto &Edge : AugmentingEdges[Src]) {
 
  411        Edge->AugmentedFlow = 0;
 
  417    Nodes[Source].FracFlow = 1.0;
 
  418    for (
uint64_t Src : AugmentingOrder) {
 
  420             "incorrectly computed fractional flow");
 
  422      uint64_t Degree = AugmentingEdges[Src].size();
 
  423      for (
auto &Edge : AugmentingEdges[Src]) {
 
  424        double EdgeFlow = Nodes[Src].FracFlow / Degree;
 
  425        Nodes[Edge->Dst].FracFlow += EdgeFlow;
 
  426        if (Edge->Capacity == 
INF)
 
  428        uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;
 
  429        MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow);
 
  433    if (MaxFlowAmount == 0)
 
  437    Nodes[Source].IntFlow = MaxFlowAmount;
 
  438    for (
uint64_t Src : AugmentingOrder) {
 
  443      uint64_t Degree = AugmentingEdges[Src].size();
 
  445      uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree;
 
  446      for (
auto &Edge : AugmentingEdges[Src]) {
 
  448        uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow);
 
  449        EdgeFlow = std::min(EdgeFlow, 
uint64_t(Edge->Capacity - Edge->Flow));
 
  450        Nodes[Dst].IntFlow += EdgeFlow;
 
  451        Nodes[Src].IntFlow -= EdgeFlow;
 
  452        Edge->AugmentedFlow += EdgeFlow;
 
  456    Nodes[
Target].IntFlow = 0;
 
  461    for (
size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) {
 
  462      uint64_t Src = AugmentingOrder[Idx - 1];
 
  465      for (
auto &Edge : AugmentingEdges[Src]) {
 
  467        if (Nodes[Dst].IntFlow == 0)
 
  469        uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow);
 
  470        Nodes[Dst].IntFlow -= EdgeFlow;
 
  471        Nodes[Src].IntFlow += EdgeFlow;
 
  472        Edge->AugmentedFlow -= EdgeFlow;
 
  477    bool HasSaturatedEdges = 
false;
 
  478    for (
uint64_t Src : AugmentingOrder) {
 
  480      assert(Src == Source || Nodes[Src].IntFlow == 0);
 
  481      for (
auto &Edge : AugmentingEdges[Src]) {
 
  482        assert(
uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);
 
  484        auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex];
 
  485        Edge->Flow += Edge->AugmentedFlow;
 
  486        RevEdge.Flow -= Edge->AugmentedFlow;
 
  487        if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0)
 
  488          HasSaturatedEdges = 
true;
 
  493    return HasSaturatedEdges;
 
  497  void identifyShortestEdges(
uint64_t PathCapacity) {
 
  498    assert(PathCapacity > 0 && 
"found an incorrect augmenting DAG");
 
  506    for (
size_t Src = 0; Src < Nodes.size(); Src++) {
 
  508      if (Nodes[Src].Distance > Nodes[
Target].Distance)
 
  511      for (
auto &Edge : Edges[Src]) {
 
  513        Edge.OnShortestPath =
 
  514            Src != 
Target && Dst != Source &&
 
  515            Nodes[Dst].Distance <= Nodes[
Target].Distance &&
 
  516            Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost &&
 
  517            Edge.Capacity > Edge.Flow &&
 
  518            uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;
 
  524  static constexpr uint64_t MaxDfsCalls = 10;
 
  571  std::vector<Node> Nodes;
 
  573  std::vector<std::vector<Edge>> Edges;
 
  579  std::vector<std::vector<Edge *>> AugmentingEdges;
 
  603      : Params(Params), Func(Func) {}
 
  609      joinIsolatedComponents();
 
  614      rebalanceUnknownSubgraphs();
 
  619  void joinIsolatedComponents() {
 
  621    auto Visited = 
BitVector(NumBlocks(), 
false);
 
  622    findReachable(Func.Entry, Visited);
 
  626      auto &
Block = Func.Blocks[
I];
 
  627      if (
Block.Flow > 0 && !Visited[
I]) {
 
  629        auto Path = findShortestPath(
I);
 
  631        assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
 
  632               "incorrectly computed path adjusting control flow");
 
  633        Func.Blocks[Func.Entry].Flow += 1;
 
  634        for (
auto &Jump : Path) {
 
  636          Func.Blocks[Jump->Target].Flow += 1;
 
  638          findReachable(Jump->Target, Visited);
 
  649    std::queue<uint64_t> Queue;
 
  652    while (!Queue.empty()) {
 
  655      for (
auto *Jump : Func.Blocks[Src].SuccJumps) {
 
  657        if (Jump->Flow > 0 && !Visited[Dst]) {
 
  667  std::vector<FlowJump *> findShortestPath(
uint64_t BlockIdx) {
 
  669    auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
 
  671    auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
 
  674    std::vector<FlowJump *> Result;
 
  686      return std::vector<FlowJump *>();
 
  687    if (Func.Blocks[Source].isExit() && 
Target == AnyExitBlock)
 
  688      return std::vector<FlowJump *>();
 
  691    auto Distance = std::vector<int64_t>(NumBlocks(), 
INF);
 
  692    auto Parent = std::vector<FlowJump *>(NumBlocks(), 
nullptr);
 
  693    Distance[Source] = 0;
 
  694    std::set<std::pair<uint64_t, uint64_t>> Queue;
 
  695    Queue.insert(std::make_pair(Distance[Source], Source));
 
  698    while (!Queue.empty()) {
 
  699      uint64_t Src = Queue.begin()->second;
 
  700      Queue.erase(Queue.begin());
 
  703          (Func.Blocks[Src].isExit() && 
Target == AnyExitBlock))
 
  706      for (
auto *Jump : Func.Blocks[Src].SuccJumps) {
 
  708        int64_t JumpDist = jumpDistance(Jump);
 
  709        if (Distance[Dst] > Distance[Src] + JumpDist) {
 
  710          Queue.erase(std::make_pair(Distance[Dst], Dst));
 
  712          Distance[Dst] = Distance[Src] + JumpDist;
 
  715          Queue.insert(std::make_pair(Distance[Dst], Dst));
 
  720    if (
Target == AnyExitBlock) {
 
  722        if (Func.Blocks[
I].isExit() && Parent[
I] != 
nullptr) {
 
  723          if (
Target == AnyExitBlock || Distance[
Target] > Distance[
I]) {
 
  729    assert(Parent[
Target] != 
nullptr && 
"a path does not exist");
 
  732    std::vector<FlowJump *> Result;
 
  734    while (Now != Source) {
 
  735      assert(Now == Parent[Now]->
Target && 
"incorrect parent jump");
 
  736      Result.push_back(Parent[Now]);
 
  737      Now = Parent[Now]->Source;
 
  740    std::reverse(Result.begin(), Result.end());
 
  753  int64_t jumpDistance(
FlowJump *Jump)
 const {
 
  757        std::max(FlowAdjuster::MinBaseDistance,
 
  758                 std::min(Func.Blocks[Func.Entry].Flow,
 
  761      return BaseDistance + BaseDistance / Jump->
Flow;
 
  762    return 2 * BaseDistance * (NumBlocks() + 1);
 
  765  uint64_t NumBlocks()
 const { 
return Func.Blocks.size(); }
 
  771  void rebalanceUnknownSubgraphs() {
 
  773    for (
const FlowBlock &SrcBlock : Func.Blocks) {
 
  775      if (!canRebalanceAtRoot(&SrcBlock))
 
  780      std::vector<FlowBlock *> UnknownBlocks;
 
  781      std::vector<FlowBlock *> KnownDstBlocks;
 
  782      findUnknownSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks);
 
  787      if (!canRebalanceSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks,
 
  792      if (!isAcyclicSubgraph(&SrcBlock, DstBlock, UnknownBlocks))
 
  796      rebalanceUnknownSubgraph(&SrcBlock, DstBlock, UnknownBlocks);
 
  801  bool canRebalanceAtRoot(
const FlowBlock *SrcBlock) {
 
  808    bool HasUnknownSuccs = 
false;
 
  810      if (Func.Blocks[Jump->
Target].HasUnknownWeight) {
 
  811        HasUnknownSuccs = 
true;
 
  815    if (!HasUnknownSuccs)
 
  823  void findUnknownSubgraph(
const FlowBlock *SrcBlock,
 
  824                           std::vector<FlowBlock *> &KnownDstBlocks,
 
  825                           std::vector<FlowBlock *> &UnknownBlocks) {
 
  828    auto Visited = 
BitVector(NumBlocks(), 
false);
 
  829    std::queue<uint64_t> Queue;
 
  831    Queue.push(SrcBlock->
Index);
 
  832    Visited[SrcBlock->
Index] = 
true;
 
  833    while (!Queue.empty()) {
 
  834      auto &
Block = Func.Blocks[Queue.front()];
 
  837      for (
auto *Jump : 
Block.SuccJumps) {
 
  839        if (ignoreJump(SrcBlock, 
nullptr, Jump))
 
  848        if (!Func.Blocks[Dst].HasUnknownWeight) {
 
  849          KnownDstBlocks.push_back(&Func.Blocks[Dst]);
 
  852          UnknownBlocks.push_back(&Func.Blocks[Dst]);
 
  860  bool canRebalanceSubgraph(
const FlowBlock *SrcBlock,
 
  861                            const std::vector<FlowBlock *> &KnownDstBlocks,
 
  862                            const std::vector<FlowBlock *> &UnknownBlocks,
 
  865    if (UnknownBlocks.empty())
 
  869    if (KnownDstBlocks.size() > 1)
 
  871    DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
 
  874    for (
auto *
Block : UnknownBlocks) {
 
  875      if (
Block->SuccJumps.empty()) {
 
  877        if (DstBlock != 
nullptr)
 
  881      size_t NumIgnoredJumps = 0;
 
  882      for (
auto *Jump : 
Block->SuccJumps) {
 
  883        if (ignoreJump(SrcBlock, DstBlock, Jump))
 
  888      if (NumIgnoredJumps == 
Block->SuccJumps.size())
 
  903    auto JumpSource = &Func.Blocks[Jump->
Source];
 
  904    auto JumpTarget = &Func.Blocks[Jump->
Target];
 
  907    if (DstBlock != 
nullptr && JumpTarget == DstBlock)
 
  911    if (!JumpTarget->HasUnknownWeight && JumpSource == SrcBlock)
 
  915    if (!JumpTarget->HasUnknownWeight && JumpTarget->Flow == 0)
 
  924                         std::vector<FlowBlock *> &UnknownBlocks) {
 
  926    auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
 
  928      for (
auto *Jump : 
Block->SuccJumps) {
 
  929        if (ignoreJump(SrcBlock, DstBlock, Jump))
 
  931        LocalInDegree[Jump->
Target]++;
 
  934    fillInDegree(SrcBlock);
 
  935    for (
auto *
Block : UnknownBlocks) {
 
  939    if (LocalInDegree[SrcBlock->
Index] > 0)
 
  942    std::vector<FlowBlock *> AcyclicOrder;
 
  943    std::queue<uint64_t> Queue;
 
  944    Queue.push(SrcBlock->
Index);
 
  945    while (!Queue.empty()) {
 
  949      if (DstBlock != 
nullptr && 
Block == DstBlock)
 
  953      if (
Block->HasUnknownWeight && 
Block != SrcBlock)
 
  954        AcyclicOrder.push_back(
Block);
 
  957      for (
auto *Jump : 
Block->SuccJumps) {
 
  958        if (ignoreJump(SrcBlock, DstBlock, Jump))
 
  961        LocalInDegree[Dst]--;
 
  962        if (LocalInDegree[Dst] == 0) {
 
  970    if (UnknownBlocks.size() != AcyclicOrder.size())
 
  972    UnknownBlocks = AcyclicOrder;
 
  978  void rebalanceUnknownSubgraph(
const FlowBlock *SrcBlock,
 
  980                                const std::vector<FlowBlock *> &UnknownBlocks) {
 
  981    assert(SrcBlock->
Flow > 0 && 
"zero-flow block in unknown subgraph");
 
  987      if (ignoreJump(SrcBlock, DstBlock, Jump))
 
  989      BlockFlow += Jump->
Flow;
 
  991    rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);
 
  994    for (
auto *
Block : UnknownBlocks) {
 
  995      assert(
Block->HasUnknownWeight && 
"incorrect unknown subgraph");
 
  998      for (
auto *Jump : 
Block->PredJumps) {
 
  999        BlockFlow += Jump->
Flow;
 
 1001      Block->Flow = BlockFlow;
 
 1002      rebalanceBlock(SrcBlock, DstBlock, 
Block, BlockFlow);
 
 1011    size_t BlockDegree = 0;
 
 1012    for (
auto *Jump : 
Block->SuccJumps) {
 
 1013      if (ignoreJump(SrcBlock, DstBlock, Jump))
 
 1018    if (DstBlock == 
nullptr && BlockDegree == 0)
 
 1020    assert(BlockDegree > 0 && 
"all outgoing jumps are ignored");
 
 1024    uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
 
 1025    for (
auto *Jump : 
Block->SuccJumps) {
 
 1026      if (ignoreJump(SrcBlock, DstBlock, Jump))
 
 1032    assert(BlockFlow == 0 && 
"not all flow is propagated");
 
 1038  static constexpr uint64_t MinBaseDistance = 10000;
 
 1046std::pair<int64_t, int64_t> assignBlockCosts(
const ProfiParams &Params,
 
 1048std::pair<int64_t, int64_t> assignJumpCosts(
const ProfiParams &Params,
 
 1056void initializeNetwork(
const ProfiParams &Params, MinCostMaxFlow &Network,
 
 1058  uint64_t NumBlocks = Func.Blocks.size();
 
 1059  assert(NumBlocks > 1 && 
"Too few blocks in a function");
 
 1060  uint64_t NumJumps = Func.Jumps.size();
 
 1061  assert(NumJumps > 0 && 
"Too few jumps in a function");
 
 1072  Network.initialize(2 * NumBlocks + 4, 
S1, 
T1);
 
 1076    auto &
Block = Func.Blocks[
B];
 
 1084    if (
Block.isEntry()) {
 
 1085      Network.addEdge(S, 
Bin, 0);
 
 1086    } 
else if (
Block.isExit()) {
 
 1087      Network.addEdge(Bout, 
T, 0);
 
 1091    auto [AuxCostInc, AuxCostDec] = assignBlockCosts(Params, 
Block);
 
 1094    Network.addEdge(
Bin, Bout, AuxCostInc);
 
 1095    if (
Block.Weight > 0) {
 
 1096      Network.addEdge(Bout, 
Bin, 
Block.Weight, AuxCostDec);
 
 1097      Network.addEdge(
S1, Bout, 
Block.Weight, 0);
 
 1103  for (
uint64_t J = 0; J < NumJumps; J++) {
 
 1104    auto &Jump = Func.Jumps[J];
 
 1111    auto [AuxCostInc, AuxCostDec] = assignJumpCosts(Params, Jump);
 
 1114    Network.addEdge(Jin, Jout, AuxCostInc);
 
 1116      Network.addEdge(Jout, Jin, Jump.
Weight, AuxCostDec);
 
 1117      Network.addEdge(
S1, Jout, Jump.
Weight, 0);
 
 1118      Network.addEdge(Jin, 
T1, Jump.
Weight, 0);
 
 1123  Network.addEdge(
T, S, 0);
 
 1127std::pair<int64_t, int64_t> assignBlockCosts(
const ProfiParams &Params,
 
 1130  if (
Block.IsUnlikely)
 
 1137  if (
Block.HasUnknownWeight) {
 
 1143    if (
Block.Weight == 0)
 
 1146    if (
Block.isEntry()) {
 
 1151  return std::make_pair(CostInc, CostDec);
 
 1155std::pair<int64_t, int64_t> assignJumpCosts(
const ProfiParams &Params,
 
 1178    assert(Jump.
Weight > 0 && 
"found zero-weight jump with a positive weight");
 
 1180  return std::make_pair(CostInc, CostDec);
 
 1184void extractWeights(
const ProfiParams &Params, MinCostMaxFlow &Network,
 
 1186  uint64_t NumBlocks = Func.Blocks.size();
 
 1187  uint64_t NumJumps = Func.Jumps.size();
 
 1190  for (
uint64_t J = 0; J < NumJumps; J++) {
 
 1191    auto &Jump = Func.Jumps[J];
 
 1196    int64_t AuxFlow = Network.getFlow(SrcOut, DstIn);
 
 1200      Flow = int64_t(Jump.
Weight) + (AuxFlow > 0 ? AuxFlow : 0);
 
 1207  auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
 
 1208  auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
 
 1209  for (
auto &Jump : Func.Jumps) {
 
 1214    auto &
Block = Func.Blocks[
B];
 
 1215    Block.Flow = std::max(OutFlow[
B], InFlow[
B]);
 
 1223  assert(Func.Entry == 0 && Func.Blocks[0].isEntry());
 
 1224  size_t NumExitBlocks = 0;
 
 1225  for (
size_t I = 1; 
I < Func.Blocks.size(); 
I++) {
 
 1226    assert(!Func.Blocks[
I].isEntry() && 
"multiple entry blocks");
 
 1227    if (Func.Blocks[
I].isExit())
 
 1230  assert(NumExitBlocks > 0 && 
"cannot find exit blocks");
 
 1233  for (
auto &
Block : Func.Blocks) {
 
 1234    std::unordered_set<uint64_t> UniqueSuccs;
 
 1235    for (
auto &Jump : 
Block.SuccJumps) {
 
 1236      auto It = UniqueSuccs.insert(Jump->
Target);
 
 1237      assert(It.second && 
"input CFG contains parallel edges");
 
 1241  for (
auto &
Block : Func.Blocks) {
 
 1243           "a block cannot be an entry and an exit");
 
 1246  for (
auto &
Block : Func.Blocks) {
 
 1248           "non-zero weight of a block w/o weight except for an entry");
 
 1251  for (
auto &Jump : Func.Jumps) {
 
 1253           "non-zero weight of a jump w/o weight");
 
 1259  const uint64_t NumBlocks = Func.Blocks.size();
 
 1260  auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
 
 1261  auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
 
 1262  for (
const auto &Jump : Func.Jumps) {
 
 1270    auto &
Block = Func.Blocks[
I];
 
 1271    if (
Block.isEntry()) {
 
 1272      TotalInFlow += 
Block.Flow;
 
 1273      assert(
Block.Flow == OutFlow[
I] && 
"incorrectly computed control flow");
 
 1274    } 
else if (
Block.isExit()) {
 
 1275      TotalOutFlow += 
Block.Flow;
 
 1276      assert(
Block.Flow == InFlow[
I] && 
"incorrectly computed control flow");
 
 1278      assert(
Block.Flow == OutFlow[
I] && 
"incorrectly computed control flow");
 
 1279      assert(
Block.Flow == InFlow[
I] && 
"incorrectly computed control flow");
 
 1282  assert(TotalInFlow == TotalOutFlow && 
"incorrectly computed control flow");
 
 1287  auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
 
 1288  for (
const auto &Jump : Func.Jumps) {
 
 1289    if (Jump.
Flow > 0) {
 
 1295  std::queue<uint64_t> Queue;
 
 1296  auto Visited = 
BitVector(NumBlocks, 
false);
 
 1297  Queue.push(Func.Entry);
 
 1298  Visited[Func.Entry] = 
true;
 
 1299  while (!Queue.empty()) {
 
 1302    for (
uint64_t Dst : PositiveFlowEdges[Src]) {
 
 1303      if (!Visited[Dst]) {
 
 1305        Visited[Dst] = 
true;
 
 1313    auto &
Block = Func.Blocks[
I];
 
 1314    assert((Visited[
I] || 
Block.Flow == 0) && 
"an isolated flow component");
 
 1325  bool HasSamples = 
false;
 
 1327    if (
Block.Weight > 0)
 
 1331  for (
FlowJump &Jump : Func.Jumps) {
 
 1338  if (Func.Blocks.size() <= 1 || !HasSamples)
 
 1347  auto InferenceNetwork = MinCostMaxFlow(Params);
 
 1348  initializeNetwork(Params, InferenceNetwork, Func);
 
 1349  InferenceNetwork.run();
 
 1352  extractWeights(Params, InferenceNetwork, Func);
 
 1355  auto Adjuster = FlowAdjuster(Params, Func);
 
 
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static void addEdge(SmallVectorImpl< LazyCallGraph::Edge > &Edges, DenseMap< LazyCallGraph::Node *, int > &EdgeIndexMap, LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK)
This file provides the interface for the profile inference algorithm, profi.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Target - Wrapper for Target specific information.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void applyFlowInference(const ProfiParams &Params, FlowFunction &Func)
Apply the profile inference algorithm for a given function and provided profi options.
A wrapper of a binary basic block.
std::vector< FlowJump * > SuccJumps
A wrapper of binary function with basic blocks and jumps.
A wrapper of a jump between two basic blocks.
Various thresholds and options controlling the behavior of the profile inference algorithm.
unsigned CostJumpUnknownFTInc
The cost of increasing an unknown fall-through jump's count by one.
unsigned CostBlockInc
The cost of increasing a block's count by one.
unsigned CostJumpFTInc
The cost of increasing a fall-through jump's count by one.
bool RebalanceUnknown
Evenly re-distribute flow among unknown subgraphs.
const int64_t CostUnlikely
The cost of taking an unlikely block/jump.
unsigned CostJumpDec
The cost of decreasing a jump's count by one.
bool JoinIslands
Join isolated components having positive flow.
unsigned CostBlockZeroInc
The cost of increasing a count of zero-weight block by one.
unsigned CostBlockEntryDec
The cost of decreasing the entry block's count by one.
unsigned CostJumpInc
The cost of increasing a jump's count by one.
unsigned CostJumpUnknownInc
The cost of increasing an unknown jump's count by one.
unsigned CostBlockUnknownInc
The cost of increasing an unknown block's count by one.
unsigned CostJumpFTDec
The cost of decreasing a fall-through jump's count by one.
unsigned CostBlockDec
The cost of decreasing a block's count by one.
unsigned CostBlockEntryInc
The cost of increasing the entry block's count by one.
bool EvenFlowDistribution
Evenly distribute flow when there are multiple equally likely options.