docs/doxygen/NVPTXIRPeephole_8cpp_source.html

//===------ NVPTXIRPeephole.cpp - NVPTX IR Peephole --------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This file implements IR-level peephole optimizations. These transformations

// run late in the NVPTX IR pass pipeline just before the instruction selection.

//

// Currently, it implements the following transformation(s):

// 1. FMA folding (float/double types):

//    Transforms FMUL+FADD/FSUB sequences into FMA intrinsics when the

//    'contract' fast-math flag is present. Supported patterns:

//    - fadd(fmul(a, b), c) => fma(a, b, c)

//    - fadd(c, fmul(a, b)) => fma(a, b, c)

//    - fadd(fmul(a, b), fmul(c, d)) => fma(a, b, fmul(c, d))

//    - fsub(fmul(a, b), c) => fma(a, b, fneg(c))

//    - fsub(a, fmul(b, c)) => fma(fneg(b), c, a)

//    - fsub(fmul(a, b), fmul(c, d)) => fma(a, b, fneg(fmul(c, d)))

//

//===----------------------------------------------------------------------===//


#include "NVPTXUtilities.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/InstIterator.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/Intrinsics.h"


#define DEBUG_TYPE "nvptx-ir-peephole"


using namespace llvm;


static bool tryFoldBinaryFMul(BinaryOperator *BI) {

  Value *Op0 = BI->getOperand(0);

  Value *Op1 = BI->getOperand(1);


  auto *FMul0 = dyn_cast<BinaryOperator>(Op0);

  auto *FMul1 = dyn_cast<BinaryOperator>(Op1);


  BinaryOperator *FMul = nullptr;

  Value *OtherOperand = nullptr;

  bool IsFirstOperand = false;


  // Either Op0 or Op1 should be a valid FMul

  if (FMul0 && FMul0->getOpcode() == Instruction::FMul && FMul0->hasOneUse() &&

      FMul0->hasAllowContract()) {

    FMul = FMul0;

    OtherOperand = Op1;

    IsFirstOperand = true;

  } else if (FMul1 && FMul1->getOpcode() == Instruction::FMul &&

             FMul1->hasOneUse() && FMul1->hasAllowContract()) {

    FMul = FMul1;

    OtherOperand = Op0;

    IsFirstOperand = false;

  } else {

    return false;

  }


  bool IsFSub = BI->getOpcode() == Instruction::FSub;

  LLVM_DEBUG({

    const char *OpName = IsFSub ? "FSub" : "FAdd";

    dbgs() << "Found " << OpName << " with FMul (single use) as "

           << (IsFirstOperand ? "first" : "second") << " operand: " << *BI

           << "\n";

  });


  Value *MulOp0 = FMul->getOperand(0);

  Value *MulOp1 = FMul->getOperand(1);

  IRBuilder<> Builder(BI);

  Value *FMA = nullptr;


  if (!IsFSub) {

    // fadd(fmul(a, b), c) => fma(a, b, c)

    // fadd(c, fmul(a, b)) => fma(a, b, c)

    FMA = Builder.CreateIntrinsic(Intrinsic::fma, {BI->getType()},

                                  {MulOp0, MulOp1, OtherOperand});

  } else {

    if (IsFirstOperand) {

      // fsub(fmul(a, b), c) => fma(a, b, fneg(c))

      Value *NegOtherOp =

          Builder.CreateFNegFMF(OtherOperand, BI->getFastMathFlags());

      FMA = Builder.CreateIntrinsic(Intrinsic::fma, {BI->getType()},

                                    {MulOp0, MulOp1, NegOtherOp});

    } else {

      // fsub(a, fmul(b, c)) => fma(fneg(b), c, a)

      Value *NegMulOp0 =

          Builder.CreateFNegFMF(MulOp0, FMul->getFastMathFlags());

      FMA = Builder.CreateIntrinsic(Intrinsic::fma, {BI->getType()},

                                    {NegMulOp0, MulOp1, OtherOperand});

    }

  }


  // Combine fast-math flags from the original instructions

  auto *FMAInst = cast<Instruction>(FMA);

  FastMathFlags BinaryFMF = BI->getFastMathFlags();

  FastMathFlags FMulFMF = FMul->getFastMathFlags();

  FastMathFlags NewFMF = FastMathFlags::intersectRewrite(BinaryFMF, FMulFMF) |

                         FastMathFlags::unionValue(BinaryFMF, FMulFMF);

  FMAInst->setFastMathFlags(NewFMF);


  LLVM_DEBUG({

    const char *OpName = IsFSub ? "FSub" : "FAdd";

    dbgs() << "Replacing " << OpName << " with FMA: " << *FMA << "\n";

  });

  BI->replaceAllUsesWith(FMA);

  BI->eraseFromParent();

  FMul->eraseFromParent();

  return true;

}


static bool foldFMA(Function &F) {

  bool Changed = false;


  // Iterate and process float/double FAdd/FSub instructions with allow-contract

  for (auto &I : llvm::make_early_inc_range(instructions(F))) {

    if (auto *BI = dyn_cast<BinaryOperator>(&I)) {

      // Only FAdd and FSub are supported.

      if (BI->getOpcode() != Instruction::FAdd &&

          BI->getOpcode() != Instruction::FSub)

        continue;


      // At minimum, the instruction should have allow-contract.

      if (!BI->hasAllowContract())

        continue;


      // Only float and double are supported.

      if (!BI->getType()->isFloatTy() && !BI->getType()->isDoubleTy())

        continue;


      if (tryFoldBinaryFMul(BI))

        Changed = true;

    }

  }

  return Changed;

}


namespace {


struct NVPTXIRPeephole : public FunctionPass {

  static char ID;

  NVPTXIRPeephole() : FunctionPass(ID) {}

  bool runOnFunction(Function &F) override;

};


} // namespace


char NVPTXIRPeephole::ID = 0;


INITIALIZE_PASS(NVPTXIRPeephole, "nvptx-ir-peephole", "NVPTX IR Peephole",

                false, false)


bool NVPTXIRPeephole::runOnFunction(Function &F) { return foldFMA(F); }


FunctionPass *llvm::createNVPTXIRPeepholePass() {

  return new NVPTXIRPeephole();

}


PreservedAnalyses NVPTXIRPeepholePass::run(Function &F,

                                           FunctionAnalysisManager &) {

  if (!foldFMA(F))

    return PreservedAnalyses::all();


  PreservedAnalyses PA;

  PA.preserveSet<CFGAnalyses>();

  return PA;

}


instructions
Expand Atomic instructions
Definition AtomicExpandPass.cpp:184

runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition EntryExitInstrumenter.cpp:103

IRBuilder.h

InstIterator.h

Instructions.h

Intrinsics.h

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

tryFoldBinaryFMul
static bool tryFoldBinaryFMul(BinaryOperator *BI)
Definition NVPTXIRPeephole.cpp:35

foldFMA
static bool foldFMA(Function &F)
Definition NVPTXIRPeephole.cpp:113

NVPTXUtilities.h

INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:114

bool

llvm::BinaryOperator
Definition InstrTypes.h:171

llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition InstrTypes.h:374

llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73

llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22

llvm::FastMathFlags::intersectRewrite
static FastMathFlags intersectRewrite(FastMathFlags LHS, FastMathFlags RHS)
Intersect rewrite-based flags.
Definition FMF.h:112

llvm::FastMathFlags::unionValue
static FastMathFlags unionValue(FastMathFlags LHS, FastMathFlags RHS)
Union value flags.
Definition FMF.h:120

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314

llvm::Function
Definition Function.h:64

llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788

llvm::Instruction::eraseFromParent
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition Instruction.cpp:108

llvm::Instruction::getFastMathFlags
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
Definition Instruction.cpp:683

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118

llvm::PreservedAnalyses::preserveSet
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151

llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition User.h:232

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256

llvm::Value::replaceAllUsesWith
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546

Changed
Changed
Definition ObjCARCOpts.cpp:2369

OpName
Definition R600Defines.h:62

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition AddressRanges.h:18

llvm::createNVPTXIRPeepholePass
FunctionPass * createNVPTXIRPeepholePass()
Definition NVPTXIRPeephole.cpp:155

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643

llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207

llvm::RecurKind::FMul
@ FMul
Product of floats.
Definition IVDescriptors.h:49

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559

llvm::FunctionAnalysisManager
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
Definition PassManager.h:563

llvm::NVPTXIRPeepholePass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition NVPTXIRPeephole.cpp:159