LLVM 17.0.0git
|
#include "NVPTXISelLowering.h"
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "NVPTX.h"
#include "NVPTXSubtarget.h"
#include "NVPTXTargetMachine.h"
#include "NVPTXTargetObjectFile.h"
#include "NVPTXUtilities.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetCallingConv.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/FPEnv.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstdint>
#include <iterator>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
Go to the source code of this file.
Macros | |
#define | DEBUG_TYPE "nvptx-lower" |
Enumerations | |
enum | ParamVectorizationFlags { PVF_INNER = 0x0 , PVF_FIRST = 0x1 , PVF_LAST = 0x2 , PVF_SCALAR = PVF_FIRST | PVF_LAST } |
enum | OperandSignedness { Signed = 0 , Unsigned , Unknown } |
Functions | |
static bool | IsPTXVectorType (MVT VT) |
static void | ComputePTXValueVTs (const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0) |
ComputePTXValueVTs - For the given Type Ty , returns the set of primitive EVTs that compose it. | |
static bool | PromoteScalarIntegerPTX (const EVT &VT, MVT *PromotedVT) |
PromoteScalarIntegerPTX Used to make sure the arguments/returns are suitable for passing and promote them to a larger size if they're not. | |
static unsigned | CanMergeParamLoadStoresStartingAt (unsigned Idx, uint32_t AccessSize, const SmallVectorImpl< EVT > &ValueVTs, const SmallVectorImpl< uint64_t > &Offsets, Align ParamAlignment) |
static SmallVector< ParamVectorizationFlags, 16 > | VectorizePTXValueVTs (const SmallVectorImpl< EVT > &ValueVTs, const SmallVectorImpl< uint64_t > &Offsets, Align ParamAlignment, bool IsVAArg=false) |
static unsigned | getOpcForTextureInstr (unsigned Intrinsic) |
static unsigned | getOpcForSurfaceInstr (unsigned Intrinsic) |
static SDValue | PerformADDCombineWithOperands (SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const NVPTXSubtarget &Subtarget, CodeGenOpt::Level OptLevel) |
PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1. | |
static SDValue | PerformStoreRetvalCombine (SDNode *N) |
static SDValue | PerformADDCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const NVPTXSubtarget &Subtarget, CodeGenOpt::Level OptLevel) |
PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. | |
static SDValue | PerformANDCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI) |
static SDValue | PerformREMCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOpt::Level OptLevel) |
static bool | IsMulWideOperandDemotable (SDValue Op, unsigned OptSize, OperandSignedness &S) |
IsMulWideOperandDemotable - Checks if the provided DAG node is an operand that can be demoted to OptSize bits without loss of information. | |
static bool | AreMulWideOperandsDemotable (SDValue LHS, SDValue RHS, unsigned OptSize, bool &IsSigned) |
AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can be demoted to OptSize bits without loss of information. | |
static SDValue | TryMULWIDECombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI) |
TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply of M/2 bits that produces an M-bit result (i.e. | |
static SDValue | PerformMULCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOpt::Level OptLevel) |
PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes. | |
static SDValue | PerformSHLCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOpt::Level OptLevel) |
PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes. | |
static SDValue | PerformSETCCCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI) |
static void | ReplaceLoadVector (SDNode *N, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) |
ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. | |
static void | ReplaceINTRINSIC_W_CHAIN (SDNode *N, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) |
Variables | |
static std::atomic< unsigned > | GlobalUniqueCallSite |
static cl::opt< bool > | sched4reg ("nvptx-sched4reg", cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)) |
static cl::opt< unsigned > | FMAContractLevelOpt ("nvptx-fma-level", cl::Hidden, cl::desc("NVPTX Specific: FMA contraction (0: don't do it" " 1: do it 2: do it aggressively"), cl::init(2)) |
static cl::opt< int > | UsePrecDivF32 ("nvptx-prec-divf32", cl::Hidden, cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" " IEEE Compliant F32 div.rnd if available."), cl::init(2)) |
static cl::opt< bool > | UsePrecSqrtF32 ("nvptx-prec-sqrtf32", cl::Hidden, cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), cl::init(true)) |
#define DEBUG_TYPE "nvptx-lower" |
Definition at line 65 of file NVPTXISelLowering.cpp.
enum OperandSignedness |
Enumerator | |
---|---|
Signed | |
Unsigned | |
Unknown |
Definition at line 4894 of file NVPTXISelLowering.cpp.
Enumerator | |
---|---|
PVF_INNER | |
PVF_FIRST | |
PVF_LAST | |
PVF_SCALAR |
Definition at line 303 of file NVPTXISelLowering.cpp.
|
static |
AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can be demoted to OptSize
bits without loss of information.
If the operands contain a constant, it should appear as the RHS operand. The signedness of the operands is placed in IsSigned
.
Definition at line 4930 of file NVPTXISelLowering.cpp.
References llvm::APInt::isIntN(), IsMulWideOperandDemotable(), llvm::APInt::isSignedIntN(), LHS, RHS, Signed, llvm::Unknown, and Unsigned.
Referenced by TryMULWIDECombine().
|
static |
Definition at line 257 of file NVPTXISelLowering.cpp.
References llvm::EVT::getStoreSize(), Idx, and llvm::SmallVectorBase< Size_T >::size().
Referenced by VectorizePTXValueVTs().
|
static |
ComputePTXValueVTs - For the given Type Ty
, returns the set of primitive EVTs that compose it.
Unlike ComputeValueVTs, this will break apart vectors into their primitive components. NOTE: This is a band-aid for code that expects ComputeValueVTs to return the same number of types as the Ins/Outs arrays in LowerFormalArguments, LowerCall, and LowerReturn.
Definition at line 152 of file NVPTXISelLowering.cpp.
References llvm::MVT::bf16, ComputePTXValueVTs(), llvm::ComputeValueVTs(), DL, llvm::MVT::f16, llvm::EVT::getStoreSize(), llvm::EVT::getVectorElementType(), llvm::EVT::getVectorNumElements(), llvm::MVT::i64, llvm::Type::isIntegerTy(), llvm::EVT::isVector(), llvm::SmallVectorTemplateBase< T, bool >::push_back(), llvm::SmallVectorBase< Size_T >::size(), llvm::MVT::v2bf16, and llvm::MVT::v2f16.
Referenced by ComputePTXValueVTs(), llvm::NVPTXTargetLowering::LowerCall(), llvm::NVPTXTargetLowering::LowerFormalArguments(), and llvm::NVPTXTargetLowering::LowerReturn().
Definition at line 3282 of file NVPTXISelLowering.cpp.
References llvm::NVPTXISD::Suld1DArrayI16Clamp, llvm::NVPTXISD::Suld1DArrayI16Trap, llvm::NVPTXISD::Suld1DArrayI16Zero, llvm::NVPTXISD::Suld1DArrayI32Clamp, llvm::NVPTXISD::Suld1DArrayI32Trap, llvm::NVPTXISD::Suld1DArrayI32Zero, llvm::NVPTXISD::Suld1DArrayI64Clamp, llvm::NVPTXISD::Suld1DArrayI64Trap, llvm::NVPTXISD::Suld1DArrayI64Zero, llvm::NVPTXISD::Suld1DArrayI8Clamp, llvm::NVPTXISD::Suld1DArrayI8Trap, llvm::NVPTXISD::Suld1DArrayI8Zero, llvm::NVPTXISD::Suld1DArrayV2I16Clamp, llvm::NVPTXISD::Suld1DArrayV2I16Trap, llvm::NVPTXISD::Suld1DArrayV2I16Zero, llvm::NVPTXISD::Suld1DArrayV2I32Clamp, llvm::NVPTXISD::Suld1DArrayV2I32Trap, llvm::NVPTXISD::Suld1DArrayV2I32Zero, llvm::NVPTXISD::Suld1DArrayV2I64Clamp, llvm::NVPTXISD::Suld1DArrayV2I64Trap, llvm::NVPTXISD::Suld1DArrayV2I64Zero, llvm::NVPTXISD::Suld1DArrayV2I8Clamp, llvm::NVPTXISD::Suld1DArrayV2I8Trap, llvm::NVPTXISD::Suld1DArrayV2I8Zero, llvm::NVPTXISD::Suld1DArrayV4I16Clamp, llvm::NVPTXISD::Suld1DArrayV4I16Trap, llvm::NVPTXISD::Suld1DArrayV4I16Zero, llvm::NVPTXISD::Suld1DArrayV4I32Clamp, llvm::NVPTXISD::Suld1DArrayV4I32Trap, llvm::NVPTXISD::Suld1DArrayV4I32Zero, llvm::NVPTXISD::Suld1DArrayV4I8Clamp, llvm::NVPTXISD::Suld1DArrayV4I8Trap, llvm::NVPTXISD::Suld1DArrayV4I8Zero, llvm::NVPTXISD::Suld1DI16Clamp, llvm::NVPTXISD::Suld1DI16Trap, llvm::NVPTXISD::Suld1DI16Zero, llvm::NVPTXISD::Suld1DI32Clamp, llvm::NVPTXISD::Suld1DI32Trap, llvm::NVPTXISD::Suld1DI32Zero, llvm::NVPTXISD::Suld1DI64Clamp, llvm::NVPTXISD::Suld1DI64Trap, llvm::NVPTXISD::Suld1DI64Zero, llvm::NVPTXISD::Suld1DI8Clamp, llvm::NVPTXISD::Suld1DI8Trap, llvm::NVPTXISD::Suld1DI8Zero, llvm::NVPTXISD::Suld1DV2I16Clamp, llvm::NVPTXISD::Suld1DV2I16Trap, llvm::NVPTXISD::Suld1DV2I16Zero, llvm::NVPTXISD::Suld1DV2I32Clamp, llvm::NVPTXISD::Suld1DV2I32Trap, llvm::NVPTXISD::Suld1DV2I32Zero, llvm::NVPTXISD::Suld1DV2I64Clamp, llvm::NVPTXISD::Suld1DV2I64Trap, llvm::NVPTXISD::Suld1DV2I64Zero, llvm::NVPTXISD::Suld1DV2I8Clamp, llvm::NVPTXISD::Suld1DV2I8Trap, llvm::NVPTXISD::Suld1DV2I8Zero, llvm::NVPTXISD::Suld1DV4I16Clamp, llvm::NVPTXISD::Suld1DV4I16Trap, llvm::NVPTXISD::Suld1DV4I16Zero, llvm::NVPTXISD::Suld1DV4I32Clamp, llvm::NVPTXISD::Suld1DV4I32Trap, llvm::NVPTXISD::Suld1DV4I32Zero, llvm::NVPTXISD::Suld1DV4I8Clamp, llvm::NVPTXISD::Suld1DV4I8Trap, llvm::NVPTXISD::Suld1DV4I8Zero, llvm::NVPTXISD::Suld2DArrayI16Clamp, llvm::NVPTXISD::Suld2DArrayI16Trap, llvm::NVPTXISD::Suld2DArrayI16Zero, llvm::NVPTXISD::Suld2DArrayI32Clamp, llvm::NVPTXISD::Suld2DArrayI32Trap, llvm::NVPTXISD::Suld2DArrayI32Zero, llvm::NVPTXISD::Suld2DArrayI64Clamp, llvm::NVPTXISD::Suld2DArrayI64Trap, llvm::NVPTXISD::Suld2DArrayI64Zero, llvm::NVPTXISD::Suld2DArrayI8Clamp, llvm::NVPTXISD::Suld2DArrayI8Trap, llvm::NVPTXISD::Suld2DArrayI8Zero, llvm::NVPTXISD::Suld2DArrayV2I16Clamp, llvm::NVPTXISD::Suld2DArrayV2I16Trap, llvm::NVPTXISD::Suld2DArrayV2I16Zero, llvm::NVPTXISD::Suld2DArrayV2I32Clamp, llvm::NVPTXISD::Suld2DArrayV2I32Trap, llvm::NVPTXISD::Suld2DArrayV2I32Zero, llvm::NVPTXISD::Suld2DArrayV2I64Clamp, llvm::NVPTXISD::Suld2DArrayV2I64Trap, llvm::NVPTXISD::Suld2DArrayV2I64Zero, llvm::NVPTXISD::Suld2DArrayV2I8Clamp, llvm::NVPTXISD::Suld2DArrayV2I8Trap, llvm::NVPTXISD::Suld2DArrayV2I8Zero, llvm::NVPTXISD::Suld2DArrayV4I16Clamp, llvm::NVPTXISD::Suld2DArrayV4I16Trap, llvm::NVPTXISD::Suld2DArrayV4I16Zero, llvm::NVPTXISD::Suld2DArrayV4I32Clamp, llvm::NVPTXISD::Suld2DArrayV4I32Trap, llvm::NVPTXISD::Suld2DArrayV4I32Zero, llvm::NVPTXISD::Suld2DArrayV4I8Clamp, llvm::NVPTXISD::Suld2DArrayV4I8Trap, llvm::NVPTXISD::Suld2DArrayV4I8Zero, llvm::NVPTXISD::Suld2DI16Clamp, llvm::NVPTXISD::Suld2DI16Trap, llvm::NVPTXISD::Suld2DI16Zero, llvm::NVPTXISD::Suld2DI32Clamp, llvm::NVPTXISD::Suld2DI32Trap, llvm::NVPTXISD::Suld2DI32Zero, llvm::NVPTXISD::Suld2DI64Clamp, llvm::NVPTXISD::Suld2DI64Trap, llvm::NVPTXISD::Suld2DI64Zero, llvm::NVPTXISD::Suld2DI8Clamp, llvm::NVPTXISD::Suld2DI8Trap, llvm::NVPTXISD::Suld2DI8Zero, llvm::NVPTXISD::Suld2DV2I16Clamp, llvm::NVPTXISD::Suld2DV2I16Trap, llvm::NVPTXISD::Suld2DV2I16Zero, llvm::NVPTXISD::Suld2DV2I32Clamp, llvm::NVPTXISD::Suld2DV2I32Trap, llvm::NVPTXISD::Suld2DV2I32Zero, llvm::NVPTXISD::Suld2DV2I64Clamp, llvm::NVPTXISD::Suld2DV2I64Trap, llvm::NVPTXISD::Suld2DV2I64Zero, llvm::NVPTXISD::Suld2DV2I8Clamp, llvm::NVPTXISD::Suld2DV2I8Trap, llvm::NVPTXISD::Suld2DV2I8Zero, llvm::NVPTXISD::Suld2DV4I16Clamp, llvm::NVPTXISD::Suld2DV4I16Trap, llvm::NVPTXISD::Suld2DV4I16Zero, llvm::NVPTXISD::Suld2DV4I32Clamp, llvm::NVPTXISD::Suld2DV4I32Trap, llvm::NVPTXISD::Suld2DV4I32Zero, llvm::NVPTXISD::Suld2DV4I8Clamp, llvm::NVPTXISD::Suld2DV4I8Trap, llvm::NVPTXISD::Suld2DV4I8Zero, llvm::NVPTXISD::Suld3DI16Clamp, llvm::NVPTXISD::Suld3DI16Trap, llvm::NVPTXISD::Suld3DI16Zero, llvm::NVPTXISD::Suld3DI32Clamp, llvm::NVPTXISD::Suld3DI32Trap, llvm::NVPTXISD::Suld3DI32Zero, llvm::NVPTXISD::Suld3DI64Clamp, llvm::NVPTXISD::Suld3DI64Trap, llvm::NVPTXISD::Suld3DI64Zero, llvm::NVPTXISD::Suld3DI8Clamp, llvm::NVPTXISD::Suld3DI8Trap, llvm::NVPTXISD::Suld3DI8Zero, llvm::NVPTXISD::Suld3DV2I16Clamp, llvm::NVPTXISD::Suld3DV2I16Trap, llvm::NVPTXISD::Suld3DV2I16Zero, llvm::NVPTXISD::Suld3DV2I32Clamp, llvm::NVPTXISD::Suld3DV2I32Trap, llvm::NVPTXISD::Suld3DV2I32Zero, llvm::NVPTXISD::Suld3DV2I64Clamp, llvm::NVPTXISD::Suld3DV2I64Trap, llvm::NVPTXISD::Suld3DV2I64Zero, llvm::NVPTXISD::Suld3DV2I8Clamp, llvm::NVPTXISD::Suld3DV2I8Trap, llvm::NVPTXISD::Suld3DV2I8Zero, llvm::NVPTXISD::Suld3DV4I16Clamp, llvm::NVPTXISD::Suld3DV4I16Trap, llvm::NVPTXISD::Suld3DV4I16Zero, llvm::NVPTXISD::Suld3DV4I32Clamp, llvm::NVPTXISD::Suld3DV4I32Trap, llvm::NVPTXISD::Suld3DV4I32Zero, llvm::NVPTXISD::Suld3DV4I8Clamp, llvm::NVPTXISD::Suld3DV4I8Trap, and llvm::NVPTXISD::Suld3DV4I8Zero.
Referenced by llvm::NVPTXTargetLowering::getTgtMemIntrinsic().
Definition at line 2923 of file NVPTXISelLowering.cpp.
References llvm::NVPTXISD::Tex1DArrayFloatFloat, llvm::NVPTXISD::Tex1DArrayFloatFloatGrad, llvm::NVPTXISD::Tex1DArrayFloatFloatLevel, llvm::NVPTXISD::Tex1DArrayFloatS32, llvm::NVPTXISD::Tex1DArrayS32Float, llvm::NVPTXISD::Tex1DArrayS32FloatGrad, llvm::NVPTXISD::Tex1DArrayS32FloatLevel, llvm::NVPTXISD::Tex1DArrayS32S32, llvm::NVPTXISD::Tex1DArrayU32Float, llvm::NVPTXISD::Tex1DArrayU32FloatGrad, llvm::NVPTXISD::Tex1DArrayU32FloatLevel, llvm::NVPTXISD::Tex1DArrayU32S32, llvm::NVPTXISD::Tex1DFloatFloat, llvm::NVPTXISD::Tex1DFloatFloatGrad, llvm::NVPTXISD::Tex1DFloatFloatLevel, llvm::NVPTXISD::Tex1DFloatS32, llvm::NVPTXISD::Tex1DS32Float, llvm::NVPTXISD::Tex1DS32FloatGrad, llvm::NVPTXISD::Tex1DS32FloatLevel, llvm::NVPTXISD::Tex1DS32S32, llvm::NVPTXISD::Tex1DU32Float, llvm::NVPTXISD::Tex1DU32FloatGrad, llvm::NVPTXISD::Tex1DU32FloatLevel, llvm::NVPTXISD::Tex1DU32S32, llvm::NVPTXISD::Tex2DArrayFloatFloat, llvm::NVPTXISD::Tex2DArrayFloatFloatGrad, llvm::NVPTXISD::Tex2DArrayFloatFloatLevel, llvm::NVPTXISD::Tex2DArrayFloatS32, llvm::NVPTXISD::Tex2DArrayS32Float, llvm::NVPTXISD::Tex2DArrayS32FloatGrad, llvm::NVPTXISD::Tex2DArrayS32FloatLevel, llvm::NVPTXISD::Tex2DArrayS32S32, llvm::NVPTXISD::Tex2DArrayU32Float, llvm::NVPTXISD::Tex2DArrayU32FloatGrad, llvm::NVPTXISD::Tex2DArrayU32FloatLevel, llvm::NVPTXISD::Tex2DArrayU32S32, llvm::NVPTXISD::Tex2DFloatFloat, llvm::NVPTXISD::Tex2DFloatFloatGrad, llvm::NVPTXISD::Tex2DFloatFloatLevel, llvm::NVPTXISD::Tex2DFloatS32, llvm::NVPTXISD::Tex2DS32Float, llvm::NVPTXISD::Tex2DS32FloatGrad, llvm::NVPTXISD::Tex2DS32FloatLevel, llvm::NVPTXISD::Tex2DS32S32, llvm::NVPTXISD::Tex2DU32Float, llvm::NVPTXISD::Tex2DU32FloatGrad, llvm::NVPTXISD::Tex2DU32FloatLevel, llvm::NVPTXISD::Tex2DU32S32, llvm::NVPTXISD::Tex3DFloatFloat, llvm::NVPTXISD::Tex3DFloatFloatGrad, llvm::NVPTXISD::Tex3DFloatFloatLevel, llvm::NVPTXISD::Tex3DFloatS32, llvm::NVPTXISD::Tex3DS32Float, llvm::NVPTXISD::Tex3DS32FloatGrad, llvm::NVPTXISD::Tex3DS32FloatLevel, llvm::NVPTXISD::Tex3DS32S32, llvm::NVPTXISD::Tex3DU32Float, llvm::NVPTXISD::Tex3DU32FloatGrad, llvm::NVPTXISD::Tex3DU32FloatLevel, llvm::NVPTXISD::Tex3DU32S32, llvm::NVPTXISD::TexCubeArrayFloatFloat, llvm::NVPTXISD::TexCubeArrayFloatFloatLevel, llvm::NVPTXISD::TexCubeArrayS32Float, llvm::NVPTXISD::TexCubeArrayS32FloatLevel, llvm::NVPTXISD::TexCubeArrayU32Float, llvm::NVPTXISD::TexCubeArrayU32FloatLevel, llvm::NVPTXISD::TexCubeFloatFloat, llvm::NVPTXISD::TexCubeFloatFloatLevel, llvm::NVPTXISD::TexCubeS32Float, llvm::NVPTXISD::TexCubeS32FloatLevel, llvm::NVPTXISD::TexCubeU32Float, llvm::NVPTXISD::TexCubeU32FloatLevel, llvm::NVPTXISD::TexUnified1DArrayFloatFloat, llvm::NVPTXISD::TexUnified1DArrayFloatFloatGrad, llvm::NVPTXISD::TexUnified1DArrayFloatFloatLevel, llvm::NVPTXISD::TexUnified1DArrayFloatS32, llvm::NVPTXISD::TexUnified1DArrayS32Float, llvm::NVPTXISD::TexUnified1DArrayS32FloatGrad, llvm::NVPTXISD::TexUnified1DArrayS32FloatLevel, llvm::NVPTXISD::TexUnified1DArrayS32S32, llvm::NVPTXISD::TexUnified1DArrayU32Float, llvm::NVPTXISD::TexUnified1DArrayU32FloatGrad, llvm::NVPTXISD::TexUnified1DArrayU32FloatLevel, llvm::NVPTXISD::TexUnified1DArrayU32S32, llvm::NVPTXISD::TexUnified1DFloatFloat, llvm::NVPTXISD::TexUnified1DFloatFloatGrad, llvm::NVPTXISD::TexUnified1DFloatFloatLevel, llvm::NVPTXISD::TexUnified1DFloatS32, llvm::NVPTXISD::TexUnified1DS32Float, llvm::NVPTXISD::TexUnified1DS32FloatGrad, llvm::NVPTXISD::TexUnified1DS32FloatLevel, llvm::NVPTXISD::TexUnified1DS32S32, llvm::NVPTXISD::TexUnified1DU32Float, llvm::NVPTXISD::TexUnified1DU32FloatGrad, llvm::NVPTXISD::TexUnified1DU32FloatLevel, llvm::NVPTXISD::TexUnified1DU32S32, llvm::NVPTXISD::TexUnified2DArrayFloatFloat, llvm::NVPTXISD::TexUnified2DArrayFloatFloatGrad, llvm::NVPTXISD::TexUnified2DArrayFloatFloatLevel, llvm::NVPTXISD::TexUnified2DArrayFloatS32, llvm::NVPTXISD::TexUnified2DArrayS32Float, llvm::NVPTXISD::TexUnified2DArrayS32FloatGrad, llvm::NVPTXISD::TexUnified2DArrayS32FloatLevel, llvm::NVPTXISD::TexUnified2DArrayS32S32, llvm::NVPTXISD::TexUnified2DArrayU32Float, llvm::NVPTXISD::TexUnified2DArrayU32FloatGrad, llvm::NVPTXISD::TexUnified2DArrayU32FloatLevel, llvm::NVPTXISD::TexUnified2DArrayU32S32, llvm::NVPTXISD::TexUnified2DFloatFloat, llvm::NVPTXISD::TexUnified2DFloatFloatGrad, llvm::NVPTXISD::TexUnified2DFloatFloatLevel, llvm::NVPTXISD::TexUnified2DFloatS32, llvm::NVPTXISD::TexUnified2DS32Float, llvm::NVPTXISD::TexUnified2DS32FloatGrad, llvm::NVPTXISD::TexUnified2DS32FloatLevel, llvm::NVPTXISD::TexUnified2DS32S32, llvm::NVPTXISD::TexUnified2DU32Float, llvm::NVPTXISD::TexUnified2DU32FloatGrad, llvm::NVPTXISD::TexUnified2DU32FloatLevel, llvm::NVPTXISD::TexUnified2DU32S32, llvm::NVPTXISD::TexUnified3DFloatFloat, llvm::NVPTXISD::TexUnified3DFloatFloatGrad, llvm::NVPTXISD::TexUnified3DFloatFloatLevel, llvm::NVPTXISD::TexUnified3DFloatS32, llvm::NVPTXISD::TexUnified3DS32Float, llvm::NVPTXISD::TexUnified3DS32FloatGrad, llvm::NVPTXISD::TexUnified3DS32FloatLevel, llvm::NVPTXISD::TexUnified3DS32S32, llvm::NVPTXISD::TexUnified3DU32Float, llvm::NVPTXISD::TexUnified3DU32FloatGrad, llvm::NVPTXISD::TexUnified3DU32FloatLevel, llvm::NVPTXISD::TexUnified3DU32S32, llvm::NVPTXISD::TexUnifiedCubeArrayFloatFloat, llvm::NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel, llvm::NVPTXISD::TexUnifiedCubeArrayS32Float, llvm::NVPTXISD::TexUnifiedCubeArrayS32FloatLevel, llvm::NVPTXISD::TexUnifiedCubeArrayU32Float, llvm::NVPTXISD::TexUnifiedCubeArrayU32FloatLevel, llvm::NVPTXISD::TexUnifiedCubeFloatFloat, llvm::NVPTXISD::TexUnifiedCubeFloatFloatLevel, llvm::NVPTXISD::TexUnifiedCubeS32Float, llvm::NVPTXISD::TexUnifiedCubeS32FloatLevel, llvm::NVPTXISD::TexUnifiedCubeU32Float, llvm::NVPTXISD::TexUnifiedCubeU32FloatLevel, llvm::NVPTXISD::Tld4A2DFloatFloat, llvm::NVPTXISD::Tld4A2DS64Float, llvm::NVPTXISD::Tld4A2DU64Float, llvm::NVPTXISD::Tld4B2DFloatFloat, llvm::NVPTXISD::Tld4B2DS64Float, llvm::NVPTXISD::Tld4B2DU64Float, llvm::NVPTXISD::Tld4G2DFloatFloat, llvm::NVPTXISD::Tld4G2DS64Float, llvm::NVPTXISD::Tld4G2DU64Float, llvm::NVPTXISD::Tld4R2DFloatFloat, llvm::NVPTXISD::Tld4R2DS64Float, llvm::NVPTXISD::Tld4R2DU64Float, llvm::NVPTXISD::Tld4UnifiedA2DFloatFloat, llvm::NVPTXISD::Tld4UnifiedA2DS64Float, llvm::NVPTXISD::Tld4UnifiedA2DU64Float, llvm::NVPTXISD::Tld4UnifiedB2DFloatFloat, llvm::NVPTXISD::Tld4UnifiedB2DS64Float, llvm::NVPTXISD::Tld4UnifiedB2DU64Float, llvm::NVPTXISD::Tld4UnifiedG2DFloatFloat, llvm::NVPTXISD::Tld4UnifiedG2DS64Float, llvm::NVPTXISD::Tld4UnifiedG2DU64Float, llvm::NVPTXISD::Tld4UnifiedR2DFloatFloat, llvm::NVPTXISD::Tld4UnifiedR2DS64Float, and llvm::NVPTXISD::Tld4UnifiedR2DU64Float.
Referenced by llvm::NVPTXTargetLowering::getTgtMemIntrinsic().
|
static |
IsMulWideOperandDemotable - Checks if the provided DAG node is an operand that can be demoted to OptSize
bits without loss of information.
The signedness of the operand, if determinable, is placed in S
.
Definition at line 4903 of file NVPTXISelLowering.cpp.
References llvm::EVT::getFixedSizeInBits(), llvm::ISD::SIGN_EXTEND, llvm::ISD::SIGN_EXTEND_INREG, Signed, llvm::Unknown, Unsigned, and llvm::ISD::ZERO_EXTEND.
Referenced by AreMulWideOperandsDemotable().
Definition at line 120 of file NVPTXISelLowering.cpp.
References llvm::MVT::SimpleTy, llvm::MVT::v2bf16, llvm::MVT::v2f16, llvm::MVT::v2f32, llvm::MVT::v2f64, llvm::MVT::v2i1, llvm::MVT::v2i16, llvm::MVT::v2i32, llvm::MVT::v2i64, llvm::MVT::v2i8, llvm::MVT::v4bf16, llvm::MVT::v4f16, llvm::MVT::v4f32, llvm::MVT::v4i1, llvm::MVT::v4i16, llvm::MVT::v4i32, llvm::MVT::v4i8, llvm::MVT::v8bf16, and llvm::MVT::v8f16.
Referenced by llvm::NVPTXTargetLowering::NVPTXTargetLowering().
|
static |
PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
Definition at line 4772 of file NVPTXISelLowering.cpp.
References N, and PerformADDCombineWithOperands().
|
static |
PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1.
This is a helper for PerformADDCombine that is called with the default operands, and if that fails, with commuted operands.
Definition at line 4660 of file NVPTXISelLowering.cpp.
References assert(), llvm::TargetLowering::DAGCombinerInfo::DAG, llvm::MVT::f32, llvm::MVT::f64, llvm::ISD::FADD, llvm::ISD::FMA, llvm::ISD::FMUL, llvm::SDNode::getIROrder(), llvm::SelectionDAG::getMachineFunction(), llvm::SDValue::getNode(), llvm::SelectionDAG::getNode(), llvm::SDValue::getOpcode(), llvm::SDValue::getOperand(), llvm::SelectionDAG::getTargetLoweringInfo(), llvm::SDValue::getValueType(), llvm::SDNode::hasOneUse(), llvm::MVT::i32, llvm::NVPTXISD::IMAD, llvm::EVT::isInteger(), llvm::EVT::isVector(), llvm::ISD::MUL, N, llvm::CodeGenOpt::None, and llvm::SDNode::uses().
|
static |
Definition at line 4788 of file NVPTXISelLowering.cpp.
References llvm::ISD::ANY_EXTEND, llvm::TargetLowering::DAGCombinerInfo::CombineTo(), llvm::TargetLowering::DAGCombinerInfo::DAG, llvm::SDNode::getMachineOpcode(), llvm::MemSDNode::getMemoryVT(), llvm::SDValue::getNode(), llvm::SelectionDAG::getNode(), llvm::SDNode::getNumOperands(), llvm::SDValue::getOpcode(), llvm::SDNode::getOpcode(), llvm::SDNode::getOperand(), llvm::SDValue::getValueType(), llvm::ConstantSDNode::getZExtValue(), llvm::SDNode::isMachineOpcode(), llvm::NVPTXISD::LoadV2, llvm::NVPTXISD::LoadV4, N, llvm::ISD::SEXTLOAD, std::swap(), llvm::MVT::v2i8, llvm::MVT::v4i8, and llvm::ISD::ZERO_EXTEND.
|
static |
PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
Definition at line 5033 of file NVPTXISelLowering.cpp.
References N, and TryMULWIDECombine().
|
static |
Definition at line 4863 of file NVPTXISelLowering.cpp.
References assert(), llvm::TargetLowering::DAGCombinerInfo::DAG, llvm::CodeGenOpt::Default, DL, llvm::SelectionDAG::getNode(), llvm::ISD::MUL, N, llvm::ISD::SDIV, llvm::ISD::SREM, llvm::ISD::SUB, llvm::ISD::UDIV, llvm::ISD::UREM, and llvm::SDNode::uses().
|
static |
Definition at line 5058 of file NVPTXISelLowering.cpp.
References A, B, llvm::ISD::BUILD_VECTOR, llvm::TargetLowering::DAGCombinerInfo::DAG, DL, llvm::SelectionDAG::getNode(), llvm::SDValue::getValue(), llvm::SelectionDAG::getVTList(), llvm::MVT::i1, N, llvm::NVPTXISD::SETP_F16X2, llvm::MVT::v2f16, and llvm::MVT::v2i1.
|
static |
PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
Definition at line 5046 of file NVPTXISelLowering.cpp.
References N, and TryMULWIDECombine().
Definition at line 4759 of file NVPTXISelLowering.cpp.
PromoteScalarIntegerPTX Used to make sure the arguments/returns are suitable for passing and promote them to a larger size if they're not.
The promoted type is placed in PromoteVT
if the function returns true.
Definition at line 218 of file NVPTXISelLowering.cpp.
References llvm::EVT::getFixedSizeInBits(), llvm::MVT::i1, llvm::MVT::i16, llvm::MVT::i32, llvm::MVT::i64, llvm::MVT::i8, llvm::EVT::isScalarInteger(), llvm_unreachable, and llvm::PowerOf2Ceil().
Referenced by llvm::NVPTXTargetLowering::LowerCall(), llvm::NVPTXTargetLowering::LowerFormalArguments(), and llvm::NVPTXTargetLowering::LowerReturn().
|
static |
Definition at line 5237 of file NVPTXISelLowering.cpp.
References llvm::SmallVectorImpl< T >::append(), assert(), DL, llvm::SelectionDAG::getBuildVector(), llvm::SelectionDAG::getMemIntrinsicNode(), llvm::MemSDNode::getMemOperand(), llvm::MemSDNode::getMemoryVT(), llvm::SDValue::getNode(), llvm::SelectionDAG::getNode(), llvm::EVT::getSimpleVT(), llvm::EVT::getSizeInBits(), llvm::SDValue::getValue(), llvm::EVT::getVectorElementType(), llvm::EVT::getVectorNumElements(), llvm::SelectionDAG::getVTList(), llvm::MVT::i16, llvm::MVT::i8, llvm::ISD::INTRINSIC_W_CHAIN, llvm::EVT::isSimple(), llvm::EVT::isVector(), llvm::NVPTXISD::LDGV2, llvm::NVPTXISD::LDGV4, llvm::NVPTXISD::LDUV2, llvm::NVPTXISD::LDUV4, N, llvm::MVT::Other, llvm::SmallVectorTemplateBase< T, bool >::push_back(), Results, llvm::MVT::SimpleTy, and llvm::ISD::TRUNCATE.
|
static |
ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
Definition at line 5107 of file NVPTXISelLowering.cpp.
References assert(), llvm::MVT::bf16, DL, llvm::ISD::EXTRACT_VECTOR_ELT, llvm::MVT::f16, llvm::SelectionDAG::getBuildVector(), llvm::SelectionDAG::getContext(), llvm::SelectionDAG::getDataLayout(), llvm::SelectionDAG::getIntPtrConstant(), llvm::SelectionDAG::getMemIntrinsicNode(), llvm::SelectionDAG::getNode(), llvm::DataLayout::getPrefTypeAlign(), llvm::EVT::getSimpleVT(), llvm::EVT::getSizeInBits(), llvm::EVT::getTypeForEVT(), llvm::SDValue::getValue(), llvm::EVT::getVectorElementType(), llvm::EVT::getVectorNumElements(), llvm::SelectionDAG::getVTList(), llvm::MVT::i16, llvm::EVT::isSimple(), llvm::EVT::isVector(), llvm::NVPTXISD::LoadV2, llvm::NVPTXISD::LoadV4, N, llvm::MVT::Other, llvm::SmallVectorTemplateBase< T, bool >::push_back(), Results, llvm::MVT::SimpleTy, llvm::ISD::TRUNCATE, llvm::MVT::v2bf16, llvm::MVT::v2f16, llvm::MVT::v2f32, llvm::MVT::v2f64, llvm::MVT::v2i16, llvm::MVT::v2i32, llvm::MVT::v2i64, llvm::MVT::v2i8, llvm::MVT::v4f16, llvm::MVT::v4f32, llvm::MVT::v4i16, llvm::MVT::v4i32, llvm::MVT::v4i8, and llvm::MVT::v8f16.
|
static |
TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply of M/2 bits that produces an M-bit result (i.e.
mul.wide). This transform works on both multiply DAG nodes and SHL DAG nodes with a constant shift amount.
Definition at line 4966 of file NVPTXISelLowering.cpp.
References AreMulWideOperandsDemotable(), llvm::BitWidth, llvm::TargetLowering::DAGCombinerInfo::DAG, DL, llvm::ConstantSDNode::getAPIntValue(), llvm::SelectionDAG::getConstant(), llvm::SelectionDAG::getNode(), llvm::EVT::getSizeInBits(), llvm::MVT::i16, llvm::MVT::i32, llvm::MVT::i64, LHS, llvm::ISD::MUL, llvm::NVPTXISD::MUL_WIDE_SIGNED, llvm::NVPTXISD::MUL_WIDE_UNSIGNED, N, RHS, llvm::APInt::sge(), llvm::ISD::SHL, Signed, llvm::APInt::slt(), std::swap(), and llvm::ISD::TRUNCATE.
Referenced by PerformMULCombine(), and PerformSHLCombine().
|
static |
Definition at line 320 of file NVPTXISelLowering.cpp.
References assert(), llvm::SmallVectorImpl< T >::assign(), CanMergeParamLoadStoresStartingAt(), E, I, llvm_unreachable, PVF_FIRST, PVF_INNER, PVF_LAST, PVF_SCALAR, and llvm::SmallVectorBase< Size_T >::size().
Referenced by llvm::NVPTXTargetLowering::LowerCall(), llvm::NVPTXTargetLowering::LowerFormalArguments(), and llvm::NVPTXTargetLowering::LowerReturn().
|
static |
Referenced by llvm::NVPTXTargetLowering::allowFMA().
|
static |
Definition at line 69 of file NVPTXISelLowering.cpp.
Referenced by llvm::NVPTXTargetLowering::LowerCall().
|
static |
Referenced by llvm::NVPTXTargetLowering::NVPTXTargetLowering().
|
static |
Referenced by llvm::NVPTXTargetLowering::getDivF32Level().
|
static |
Referenced by llvm::NVPTXTargetLowering::usePrecSqrtF32().