1//===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
9// This file defines the interfaces that NVPTX uses to lower LLVM code into a
10// selection DAG.
17#include "NVPTX.h"
21namespace llvm {
22namespace NVPTXISD {
23enum NodeType : unsigned {
24 // Start the numbering from where ISD NodeType finishes.
64 LDGV2, // LDG.v2
65 LDGV4, // LDG.v4
66 LDUV2, // LDU.v2
67 LDUV4, // LDU.v4
76 StoreParamS32, // to sext and store a <32bit value, not used currently
77 StoreParamU32, // to zext and store a <32bit value, not used currently
82 // Texture intrinsics
252 // Surface intrinsics
435class NVPTXSubtarget;
438// TargetLowering Implementation
443 const NVPTXSubtarget &STI);
444 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
448 const char *getTargetNodeName(unsigned Opcode) const override;
451 MachineFunction &MF,
452 unsigned Intrinsic) const override;
454 /// getFunctionParamOptimizedAlign - since function arguments are passed via
455 /// .param space, we may want to increase their alignment in a way that
456 /// ensures that we can effectively vectorize their loads & stores. We can
457 /// increase alignment only if the function has internal or has private
458 /// linkage as for other linkage types callers may already rely on default
459 /// alignment. To allow using 128-bit vectorized loads/stores, this function
460 /// ensures that alignment is 16 or greater.
462 const DataLayout &DL) const;
464 /// Helper for computing alignment of a device function byval parameter.
466 Align InitialAlign,
467 const DataLayout &DL) const;
469 /// isLegalAddressingMode - Return true if the addressing mode represented
470 /// by AM is legal for this target, for a load/store of the specified type
471 /// Used to guide target specific optimizations, like loop strength
472 /// reduction (LoopStrengthReduce.cpp) and memory optimization for
473 /// address mode (CodeGenPrepare.cpp)
474 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
475 unsigned AS,
476 Instruction *I = nullptr) const override;
478 bool isTruncateFree(Type *SrcTy, Type *DstTy) const override {
479 // Truncating 64-bit to 32-bit is free in SASS.
480 if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
481 return false;
482 return SrcTy->getPrimitiveSizeInBits() == 64 &&
483 DstTy->getPrimitiveSizeInBits() == 32;
484 }
487 EVT VT) const override {
488 if (VT.isVector())
490 return MVT::i1;
491 }
493 ConstraintType getConstraintType(StringRef Constraint) const override;
494 std::pair<unsigned, const TargetRegisterClass *>
496 StringRef Constraint, MVT VT) const override;
499 bool isVarArg,
501 const SDLoc &dl, SelectionDAG &DAG,
502 SmallVectorImpl<SDValue> &InVals) const override;
504 SDValue LowerCall(CallLoweringInfo &CLI,
505 SmallVectorImpl<SDValue> &InVals) const override;
507 std::string
508 getPrototype(const DataLayout &DL, Type *, const ArgListTy &,
509 const SmallVectorImpl<ISD::OutputArg> &, MaybeAlign retAlignment,
510 std::optional<std::pair<unsigned, const APInt &>> VAInfo,
511 const CallBase &CB, unsigned UniqueCallSite) const;
513 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
515 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl,
516 SelectionDAG &DAG) const override;
518 void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
519 std::vector<SDValue> &Ops,
520 SelectionDAG &DAG) const override;
524 // PTX always uses 32-bit shift amounts
525 MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
526 return MVT::i32;
527 }
530 getPreferredVectorAction(MVT VT) const override;
532 // Get the degree of precision we want from 32-bit floating point division
533 // operations.
534 //
535 // 0 - Use ptx div.approx
536 // 1 - Use ptx.div.full (approximate, but less so than div.approx)
537 // 2 - Use IEEE-compliant div instructions, if available.
538 int getDivF32Level() const;
540 // Get whether we should use a precise or approximate 32-bit floating point
541 // sqrt instruction.
542 bool usePrecSqrtF32() const;
544 // Get whether we should use instructions that flush floating-point denormals
545 // to sign-preserving zero.
546 bool useF32FTZ(const MachineFunction &MF) const;
549 int &ExtraSteps, bool &UseOneConst,
550 bool Reciprocal) const override;
552 unsigned combineRepeatedFPDivisors() const override { return 2; }
554 bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const;
555 bool allowUnsafeFPMath(MachineFunction &MF) const;
558 EVT) const override {
559 return true;
560 }
562 bool enableAggressiveFMAFusion(EVT VT) const override { return true; }
564 // The default is to transform llvm.ctlz(x, false) (where false indicates that
565 // x == 0 is not undefined behavior) into a branch that checks whether x is 0
566 // and avoids calling ctlz in that case. We have a dedicated ctlz
567 // instruction, so we say that ctlz is cheap to speculate.
568 bool isCheapToSpeculateCtlz(Type *Ty) const override { return true; }
572 }
576 }
579 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
582 const NVPTXSubtarget &STI; // cache the subtarget here
583 SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
585 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
586 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
587 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
589 SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
590 SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const;
591 SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
593 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
594 SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
596 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
597 SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
598 SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
600 SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
601 SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
603 SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const;
605 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
606 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
608 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
609 SelectionDAG &DAG) const override;
610 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
612 Align getArgumentAlignment(SDValue Callee, const CallBase *CB, Type *Ty,
613 unsigned Idx, const DataLayout &DL) const;
615} // namespace llvm
