LLVM 19.0.0git
NVPTXISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the NVPTX target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "NVPTXISelDAGToDAG.h"
15#include "NVPTXUtilities.h"
18#include "llvm/IR/GlobalValue.h"
20#include "llvm/IR/IntrinsicsNVPTX.h"
23#include "llvm/Support/Debug.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "nvptx-isel"
31#define PASS_NAME "NVPTX DAG->DAG Pattern Instruction Selection"
32
33static cl::opt<bool>
34 EnableRsqrtOpt("nvptx-rsqrt-approx-opt", cl::init(true), cl::Hidden,
35 cl::desc("Enable reciprocal sqrt optimization"));
36
37/// createNVPTXISelDag - This pass converts a legalized DAG into a
38/// NVPTX-specific DAG, ready for instruction scheduling.
40 llvm::CodeGenOptLevel OptLevel) {
41 return new NVPTXDAGToDAGISel(TM, OptLevel);
42}
43
45
47
49 CodeGenOptLevel OptLevel)
50 : SelectionDAGISel(ID, tm, OptLevel), TM(tm) {
51 doMulWide = (OptLevel > CodeGenOptLevel::None);
52}
53
57}
58
59int NVPTXDAGToDAGISel::getDivF32Level() const {
61}
62
63bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
65}
66
67bool NVPTXDAGToDAGISel::useF32FTZ() const {
69}
70
71bool NVPTXDAGToDAGISel::allowFMA() const {
73 return TL->allowFMA(*MF, OptLevel);
74}
75
76bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
78 return TL->allowUnsafeFPMath(*MF);
79}
80
81bool NVPTXDAGToDAGISel::doRsqrtOpt() const { return EnableRsqrtOpt; }
82
83/// Select - Select instructions not customized! Used for
84/// expanded, promoted and normal instructions.
85void NVPTXDAGToDAGISel::Select(SDNode *N) {
86
87 if (N->isMachineOpcode()) {
88 N->setNodeId(-1);
89 return; // Already selected.
90 }
91
92 switch (N->getOpcode()) {
93 case ISD::LOAD:
95 if (tryLoad(N))
96 return;
97 break;
98 case ISD::STORE:
100 if (tryStore(N))
101 return;
102 break;
104 if (tryEXTRACT_VECTOR_ELEMENT(N))
105 return;
106 break;
108 SelectSETP_F16X2(N);
109 return;
111 SelectSETP_BF16X2(N);
112 return;
113 case NVPTXISD::LoadV2:
114 case NVPTXISD::LoadV4:
115 if (tryLoadVector(N))
116 return;
117 break;
118 case NVPTXISD::LDGV2:
119 case NVPTXISD::LDGV4:
120 case NVPTXISD::LDUV2:
121 case NVPTXISD::LDUV4:
122 if (tryLDGLDU(N))
123 return;
124 break;
127 if (tryStoreVector(N))
128 return;
129 break;
133 if (tryLoadParam(N))
134 return;
135 break;
139 if (tryStoreRetval(N))
140 return;
141 break;
147 if (tryStoreParam(N))
148 return;
149 break;
151 if (tryIntrinsicNoChain(N))
152 return;
153 break;
155 if (tryIntrinsicChain(N))
156 return;
157 break;
332 if (tryTextureIntrinsic(N))
333 return;
334 break;
500 if (trySurfaceIntrinsic(N))
501 return;
502 break;
503 case ISD::AND:
504 case ISD::SRA:
505 case ISD::SRL:
506 // Try to select BFE
507 if (tryBFE(N))
508 return;
509 break;
511 SelectAddrSpaceCast(N);
512 return;
513 case ISD::ConstantFP:
514 if (tryConstantFP(N))
515 return;
516 break;
517 default:
518 break;
519 }
520 SelectCode(N);
521}
522
523bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
524 unsigned IID = N->getConstantOperandVal(1);
525 switch (IID) {
526 default:
527 return false;
528 case Intrinsic::nvvm_ldg_global_f:
529 case Intrinsic::nvvm_ldg_global_i:
530 case Intrinsic::nvvm_ldg_global_p:
531 case Intrinsic::nvvm_ldu_global_f:
532 case Intrinsic::nvvm_ldu_global_i:
533 case Intrinsic::nvvm_ldu_global_p:
534 return tryLDGLDU(N);
535 }
536}
537
538// There's no way to specify FP16 and BF16 immediates in .(b)f16 ops, so we
539// have to load them into an .(b)f16 register first.
540bool NVPTXDAGToDAGISel::tryConstantFP(SDNode *N) {
541 if (N->getValueType(0) != MVT::f16 && N->getValueType(0) != MVT::bf16)
542 return false;
544 cast<ConstantFPSDNode>(N)->getValueAPF(), SDLoc(N), N->getValueType(0));
545 SDNode *LoadConstF16 = CurDAG->getMachineNode(
546 (N->getValueType(0) == MVT::f16 ? NVPTX::LOAD_CONST_F16
547 : NVPTX::LOAD_CONST_BF16),
548 SDLoc(N), N->getValueType(0), Val);
549 ReplaceNode(N, LoadConstF16);
550 return true;
551}
552
553// Map ISD:CONDCODE value to appropriate CmpMode expected by
554// NVPTXInstPrinter::printCmpMode()
555static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) {
557 unsigned PTXCmpMode = [](ISD::CondCode CC) {
558 switch (CC) {
559 default:
560 llvm_unreachable("Unexpected condition code.");
561 case ISD::SETOEQ:
562 return CmpMode::EQ;
563 case ISD::SETOGT:
564 return CmpMode::GT;
565 case ISD::SETOGE:
566 return CmpMode::GE;
567 case ISD::SETOLT:
568 return CmpMode::LT;
569 case ISD::SETOLE:
570 return CmpMode::LE;
571 case ISD::SETONE:
572 return CmpMode::NE;
573 case ISD::SETO:
574 return CmpMode::NUM;
575 case ISD::SETUO:
576 return CmpMode::NotANumber;
577 case ISD::SETUEQ:
578 return CmpMode::EQU;
579 case ISD::SETUGT:
580 return CmpMode::GTU;
581 case ISD::SETUGE:
582 return CmpMode::GEU;
583 case ISD::SETULT:
584 return CmpMode::LTU;
585 case ISD::SETULE:
586 return CmpMode::LEU;
587 case ISD::SETUNE:
588 return CmpMode::NEU;
589 case ISD::SETEQ:
590 return CmpMode::EQ;
591 case ISD::SETGT:
592 return CmpMode::GT;
593 case ISD::SETGE:
594 return CmpMode::GE;
595 case ISD::SETLT:
596 return CmpMode::LT;
597 case ISD::SETLE:
598 return CmpMode::LE;
599 case ISD::SETNE:
600 return CmpMode::NE;
601 }
602 }(CondCode.get());
603
604 if (FTZ)
605 PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG;
606
607 return PTXCmpMode;
608}
609
610bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) {
611 unsigned PTXCmpMode =
612 getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
613 SDLoc DL(N);
615 NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
616 N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
617 ReplaceNode(N, SetP);
618 return true;
619}
620
621bool NVPTXDAGToDAGISel::SelectSETP_BF16X2(SDNode *N) {
622 unsigned PTXCmpMode =
623 getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
624 SDLoc DL(N);
626 NVPTX::SETP_bf16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
627 N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
628 ReplaceNode(N, SetP);
629 return true;
630}
631
632// Find all instances of extract_vector_elt that use this v2f16 vector
633// and coalesce them into a scattering move instruction.
634bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
635 SDValue Vector = N->getOperand(0);
636
637 // We only care about 16x2 as it's the only real vector type we
638 // need to deal with.
639 MVT VT = Vector.getSimpleValueType();
640 if (!Isv2x16VT(VT))
641 return false;
642 // Find and record all uses of this vector that extract element 0 or 1.
644 for (auto *U : Vector.getNode()->uses()) {
645 if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
646 continue;
647 if (U->getOperand(0) != Vector)
648 continue;
649 if (const ConstantSDNode *IdxConst =
650 dyn_cast<ConstantSDNode>(U->getOperand(1))) {
651 if (IdxConst->getZExtValue() == 0)
652 E0.push_back(U);
653 else if (IdxConst->getZExtValue() == 1)
654 E1.push_back(U);
655 else
656 llvm_unreachable("Invalid vector index.");
657 }
658 }
659
660 // There's no point scattering f16x2 if we only ever access one
661 // element of it.
662 if (E0.empty() || E1.empty())
663 return false;
664
665 // Merge (f16 extractelt(V, 0), f16 extractelt(V,1))
666 // into f16,f16 SplitF16x2(V)
667 MVT EltVT = VT.getVectorElementType();
668 SDNode *ScatterOp =
669 CurDAG->getMachineNode(NVPTX::I32toV2I16, SDLoc(N), EltVT, EltVT, Vector);
670 for (auto *Node : E0)
671 ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0));
672 for (auto *Node : E1)
673 ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 1));
674
675 return true;
676}
677
678static unsigned int getCodeAddrSpace(MemSDNode *N) {
679 const Value *Src = N->getMemOperand()->getValue();
680
681 if (!Src)
683
684 if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
685 switch (PT->getAddressSpace()) {
692 default: break;
693 }
694 }
696}
697
698static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
699 unsigned CodeAddrSpace, MachineFunction *F) {
700 // We use ldg (i.e. ld.global.nc) for invariant loads from the global address
701 // space.
702 //
703 // We have two ways of identifying invariant loads: Loads may be explicitly
704 // marked as invariant, or we may infer them to be invariant.
705 //
706 // We currently infer invariance for loads from
707 // - constant global variables, and
708 // - kernel function pointer params that are noalias (i.e. __restrict) and
709 // never written to.
710 //
711 // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
712 // not during the SelectionDAG phase).
713 //
714 // TODO: Infer invariance only at -O2. We still want to use ldg at -O0 for
715 // explicitly invariant loads because these are how clang tells us to use ldg
716 // when the user uses a builtin.
717 if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL)
718 return false;
719
720 if (N->isInvariant())
721 return true;
722
723 bool IsKernelFn = isKernelFunction(F->getFunction());
724
725 // We use getUnderlyingObjects() here instead of getUnderlyingObject() mainly
726 // because the former looks through phi nodes while the latter does not. We
727 // need to look through phi nodes to handle pointer induction variables.
729 getUnderlyingObjects(N->getMemOperand()->getValue(), Objs);
730
731 return all_of(Objs, [&](const Value *V) {
732 if (auto *A = dyn_cast<const Argument>(V))
733 return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr();
734 if (auto *GV = dyn_cast<const GlobalVariable>(V))
735 return GV->isConstant();
736 return false;
737 });
738}
739
740bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
741 unsigned IID = N->getConstantOperandVal(0);
742 switch (IID) {
743 default:
744 return false;
745 case Intrinsic::nvvm_texsurf_handle_internal:
746 SelectTexSurfHandle(N);
747 return true;
748 }
749}
750
751void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
752 // Op 0 is the intrinsic ID
753 SDValue Wrapper = N->getOperand(1);
754 SDValue GlobalVal = Wrapper.getOperand(0);
755 ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
756 MVT::i64, GlobalVal));
757}
758
759void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
760 SDValue Src = N->getOperand(0);
761 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
762 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
763 unsigned DstAddrSpace = CastN->getDestAddressSpace();
764 assert(SrcAddrSpace != DstAddrSpace &&
765 "addrspacecast must be between different address spaces");
766
767 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
768 // Specific to generic
769 unsigned Opc;
770 switch (SrcAddrSpace) {
771 default: report_fatal_error("Bad address space in addrspacecast");
773 Opc = TM.is64Bit() ? NVPTX::cvta_global_64 : NVPTX::cvta_global;
774 break;
776 Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(SrcAddrSpace) == 32
777 ? NVPTX::cvta_shared_6432
778 : NVPTX::cvta_shared_64)
779 : NVPTX::cvta_shared;
780 break;
782 Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(SrcAddrSpace) == 32
783 ? NVPTX::cvta_const_6432
784 : NVPTX::cvta_const_64)
785 : NVPTX::cvta_const;
786 break;
788 Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(SrcAddrSpace) == 32
789 ? NVPTX::cvta_local_6432
790 : NVPTX::cvta_local_64)
791 : NVPTX::cvta_local;
792 break;
793 }
794 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
795 Src));
796 return;
797 } else {
798 // Generic to specific
799 if (SrcAddrSpace != 0)
800 report_fatal_error("Cannot cast between two non-generic address spaces");
801 unsigned Opc;
802 switch (DstAddrSpace) {
803 default: report_fatal_error("Bad address space in addrspacecast");
805 Opc = TM.is64Bit() ? NVPTX::cvta_to_global_64 : NVPTX::cvta_to_global;
806 break;
808 Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(DstAddrSpace) == 32
809 ? NVPTX::cvta_to_shared_3264
810 : NVPTX::cvta_to_shared_64)
811 : NVPTX::cvta_to_shared;
812 break;
814 Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(DstAddrSpace) == 32
815 ? NVPTX::cvta_to_const_3264
816 : NVPTX::cvta_to_const_64)
817 : NVPTX::cvta_to_const;
818 break;
820 Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(DstAddrSpace) == 32
821 ? NVPTX::cvta_to_local_3264
822 : NVPTX::cvta_to_local_64)
823 : NVPTX::cvta_to_local;
824 break;
826 Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
827 : NVPTX::nvvm_ptr_gen_to_param;
828 break;
829 }
830 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
831 Src));
832 return;
833 }
834}
835
836// Helper function template to reduce amount of boilerplate code for
837// opcode selection.
838static std::optional<unsigned>
840 unsigned Opcode_i16, unsigned Opcode_i32,
841 std::optional<unsigned> Opcode_i64, unsigned Opcode_f32,
842 std::optional<unsigned> Opcode_f64) {
843 switch (VT) {
844 case MVT::i1:
845 case MVT::i8:
846 return Opcode_i8;
847 case MVT::i16:
848 return Opcode_i16;
849 case MVT::i32:
850 return Opcode_i32;
851 case MVT::i64:
852 return Opcode_i64;
853 case MVT::f16:
854 case MVT::bf16:
855 return Opcode_i16;
856 case MVT::v2f16:
857 case MVT::v2bf16:
858 case MVT::v2i16:
859 case MVT::v4i8:
860 return Opcode_i32;
861 case MVT::f32:
862 return Opcode_f32;
863 case MVT::f64:
864 return Opcode_f64;
865 default:
866 return std::nullopt;
867 }
868}
869
870static int getLdStRegType(EVT VT) {
871 if (VT.isFloatingPoint())
872 switch (VT.getSimpleVT().SimpleTy) {
873 case MVT::f16:
874 case MVT::bf16:
875 case MVT::v2f16:
876 case MVT::v2bf16:
878 default:
880 }
881 else
883}
884
885bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
886 SDLoc dl(N);
887 MemSDNode *LD = cast<MemSDNode>(N);
888 assert(LD->readMem() && "Expected load");
889 LoadSDNode *PlainLoad = dyn_cast<LoadSDNode>(N);
890 EVT LoadedVT = LD->getMemoryVT();
891 SDNode *NVPTXLD = nullptr;
892
893 // do not support pre/post inc/dec
894 if (PlainLoad && PlainLoad->isIndexed())
895 return false;
896
897 if (!LoadedVT.isSimple())
898 return false;
899
900 AtomicOrdering Ordering = LD->getSuccessOrdering();
901 // In order to lower atomic loads with stronger guarantees we would need to
902 // use load.acquire or insert fences. However these features were only added
903 // with PTX ISA 6.0 / sm_70.
904 // TODO: Check if we can actually use the new instructions and implement them.
905 if (isStrongerThanMonotonic(Ordering))
906 return false;
907
908 // Address Space Setting
909 unsigned int CodeAddrSpace = getCodeAddrSpace(LD);
910 if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) {
911 return tryLDGLDU(N);
912 }
913
914 unsigned int PointerSize =
915 CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace());
916
917 // Volatile Setting
918 // - .volatile is only available for .global and .shared
919 // - .volatile has the same memory synchronization semantics as .relaxed.sys
920 bool isVolatile = LD->isVolatile() || Ordering == AtomicOrdering::Monotonic;
921 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
922 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
923 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
924 isVolatile = false;
925
926 // Type Setting: fromType + fromTypeWidth
927 //
928 // Sign : ISD::SEXTLOAD
929 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
930 // type is integer
931 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
932 MVT SimpleVT = LoadedVT.getSimpleVT();
933 MVT ScalarVT = SimpleVT.getScalarType();
934 // Read at least 8 bits (predicates are stored as 8-bit values)
935 unsigned fromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits());
936 unsigned int fromType;
937
938 // Vector Setting
939 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
940 if (SimpleVT.isVector()) {
941 assert((Isv2x16VT(LoadedVT) || LoadedVT == MVT::v4i8) &&
942 "Unexpected vector type");
943 // v2f16/v2bf16/v2i16 is loaded using ld.b32
944 fromTypeWidth = 32;
945 }
946
947 if (PlainLoad && (PlainLoad->getExtensionType() == ISD::SEXTLOAD))
949 else
950 fromType = getLdStRegType(ScalarVT);
951
952 // Create the machine instruction DAG
953 SDValue Chain = N->getOperand(0);
954 SDValue N1 = N->getOperand(1);
957 std::optional<unsigned> Opcode;
958 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
959
960 if (SelectDirectAddr(N1, Addr)) {
961 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar,
962 NVPTX::LD_i32_avar, NVPTX::LD_i64_avar,
963 NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
964 if (!Opcode)
965 return false;
966 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
967 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
968 getI32Imm(fromTypeWidth, dl), Addr, Chain };
969 NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
970 } else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
971 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
972 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
973 NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
974 NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
975 if (!Opcode)
976 return false;
977 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
978 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
979 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
980 NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
981 } else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
982 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
983 if (PointerSize == 64)
984 Opcode =
985 pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
986 NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64,
987 NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
988 else
989 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari,
990 NVPTX::LD_i32_ari, NVPTX::LD_i64_ari,
991 NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
992 if (!Opcode)
993 return false;
994 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
995 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
996 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
997 NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
998 } else {
999 if (PointerSize == 64)
1000 Opcode =
1001 pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
1002 NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64,
1003 NVPTX::LD_f32_areg_64, NVPTX::LD_f64_areg_64);
1004 else
1005 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg,
1006 NVPTX::LD_i32_areg, NVPTX::LD_i64_areg,
1007 NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
1008 if (!Opcode)
1009 return false;
1010 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
1011 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
1012 getI32Imm(fromTypeWidth, dl), N1, Chain };
1013 NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
1014 }
1015
1016 if (!NVPTXLD)
1017 return false;
1018
1019 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
1020 CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXLD), {MemRef});
1021
1022 ReplaceNode(N, NVPTXLD);
1023 return true;
1024}
1025
1026bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
1027
1028 SDValue Chain = N->getOperand(0);
1029 SDValue Op1 = N->getOperand(1);
1031 std::optional<unsigned> Opcode;
1032 SDLoc DL(N);
1033 SDNode *LD;
1034 MemSDNode *MemSD = cast<MemSDNode>(N);
1035 EVT LoadedVT = MemSD->getMemoryVT();
1036
1037 if (!LoadedVT.isSimple())
1038 return false;
1039
1040 // Address Space Setting
1041 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
1042 if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
1043 return tryLDGLDU(N);
1044 }
1045
1046 unsigned int PointerSize =
1048
1049 // Volatile Setting
1050 // - .volatile is only availalble for .global and .shared
1051 bool IsVolatile = MemSD->isVolatile();
1052 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1053 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1054 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1055 IsVolatile = false;
1056
1057 // Vector Setting
1058 MVT SimpleVT = LoadedVT.getSimpleVT();
1059
1060 // Type Setting: fromType + fromTypeWidth
1061 //
1062 // Sign : ISD::SEXTLOAD
1063 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
1064 // type is integer
1065 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
1066 MVT ScalarVT = SimpleVT.getScalarType();
1067 // Read at least 8 bits (predicates are stored as 8-bit values)
1068 unsigned FromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits());
1069 unsigned int FromType;
1070 // The last operand holds the original LoadSDNode::getExtensionType() value
1071 unsigned ExtensionType = cast<ConstantSDNode>(
1072 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
1073 if (ExtensionType == ISD::SEXTLOAD)
1075 else
1076 FromType = getLdStRegType(ScalarVT);
1077
1078 unsigned VecType;
1079
1080 switch (N->getOpcode()) {
1081 case NVPTXISD::LoadV2:
1083 break;
1084 case NVPTXISD::LoadV4:
1086 break;
1087 default:
1088 return false;
1089 }
1090
1091 EVT EltVT = N->getValueType(0);
1092
1093 // v8x16 is a special case. PTX doesn't have ld.v8.16
1094 // instruction. Instead, we split the vector into v2x16 chunks and
1095 // load them with ld.v4.b32.
1096 if (Isv2x16VT(EltVT)) {
1097 assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode.");
1098 EltVT = MVT::i32;
1100 FromTypeWidth = 32;
1101 }
1102
1103 if (SelectDirectAddr(Op1, Addr)) {
1104 switch (N->getOpcode()) {
1105 default:
1106 return false;
1107 case NVPTXISD::LoadV2:
1108 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1109 NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1110 NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1111 NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
1112 break;
1113 case NVPTXISD::LoadV4:
1114 Opcode =
1115 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar,
1116 NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar,
1117 std::nullopt, NVPTX::LDV_f32_v4_avar, std::nullopt);
1118 break;
1119 }
1120 if (!Opcode)
1121 return false;
1122 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1123 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1124 getI32Imm(FromTypeWidth, DL), Addr, Chain };
1125 LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
1126 } else if (PointerSize == 64
1127 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1128 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
1129 switch (N->getOpcode()) {
1130 default:
1131 return false;
1132 case NVPTXISD::LoadV2:
1133 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1134 NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1135 NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1136 NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
1137 break;
1138 case NVPTXISD::LoadV4:
1139 Opcode =
1140 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi,
1141 NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi,
1142 std::nullopt, NVPTX::LDV_f32_v4_asi, std::nullopt);
1143 break;
1144 }
1145 if (!Opcode)
1146 return false;
1147 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1148 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1149 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1150 LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
1151 } else if (PointerSize == 64
1152 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1153 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1154 if (PointerSize == 64) {
1155 switch (N->getOpcode()) {
1156 default:
1157 return false;
1158 case NVPTXISD::LoadV2:
1159 Opcode =
1161 NVPTX::LDV_i8_v2_ari_64, NVPTX::LDV_i16_v2_ari_64,
1162 NVPTX::LDV_i32_v2_ari_64, NVPTX::LDV_i64_v2_ari_64,
1163 NVPTX::LDV_f32_v2_ari_64, NVPTX::LDV_f64_v2_ari_64);
1164 break;
1165 case NVPTXISD::LoadV4:
1166 Opcode = pickOpcodeForVT(
1167 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64,
1168 NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, std::nullopt,
1169 NVPTX::LDV_f32_v4_ari_64, std::nullopt);
1170 break;
1171 }
1172 } else {
1173 switch (N->getOpcode()) {
1174 default:
1175 return false;
1176 case NVPTXISD::LoadV2:
1177 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1178 NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1179 NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1180 NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
1181 break;
1182 case NVPTXISD::LoadV4:
1183 Opcode =
1184 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari,
1185 NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari,
1186 std::nullopt, NVPTX::LDV_f32_v4_ari, std::nullopt);
1187 break;
1188 }
1189 }
1190 if (!Opcode)
1191 return false;
1192 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1193 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1194 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1195
1196 LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
1197 } else {
1198 if (PointerSize == 64) {
1199 switch (N->getOpcode()) {
1200 default:
1201 return false;
1202 case NVPTXISD::LoadV2:
1203 Opcode = pickOpcodeForVT(
1204 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64,
1205 NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1206 NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1207 NVPTX::LDV_f64_v2_areg_64);
1208 break;
1209 case NVPTXISD::LoadV4:
1210 Opcode = pickOpcodeForVT(
1211 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64,
1212 NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, std::nullopt,
1213 NVPTX::LDV_f32_v4_areg_64, std::nullopt);
1214 break;
1215 }
1216 } else {
1217 switch (N->getOpcode()) {
1218 default:
1219 return false;
1220 case NVPTXISD::LoadV2:
1221 Opcode =
1222 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg,
1223 NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1224 NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f32_v2_areg,
1225 NVPTX::LDV_f64_v2_areg);
1226 break;
1227 case NVPTXISD::LoadV4:
1228 Opcode =
1229 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg,
1230 NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg,
1231 std::nullopt, NVPTX::LDV_f32_v4_areg, std::nullopt);
1232 break;
1233 }
1234 }
1235 if (!Opcode)
1236 return false;
1237 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1238 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1239 getI32Imm(FromTypeWidth, DL), Op1, Chain };
1240 LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
1241 }
1242
1243 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
1244 CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef});
1245
1246 ReplaceNode(N, LD);
1247 return true;
1248}
1249
1250bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1251
1252 SDValue Chain = N->getOperand(0);
1253 SDValue Op1;
1254 MemSDNode *Mem;
1255 bool IsLDG = true;
1256
1257 // If this is an LDG intrinsic, the address is the third operand. If its an
1258 // LDG/LDU SD node (from custom vector handling), then its the second operand
1259 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1260 Op1 = N->getOperand(2);
1261 Mem = cast<MemIntrinsicSDNode>(N);
1262 unsigned IID = N->getConstantOperandVal(1);
1263 switch (IID) {
1264 default:
1265 return false;
1266 case Intrinsic::nvvm_ldg_global_f:
1267 case Intrinsic::nvvm_ldg_global_i:
1268 case Intrinsic::nvvm_ldg_global_p:
1269 IsLDG = true;
1270 break;
1271 case Intrinsic::nvvm_ldu_global_f:
1272 case Intrinsic::nvvm_ldu_global_i:
1273 case Intrinsic::nvvm_ldu_global_p:
1274 IsLDG = false;
1275 break;
1276 }
1277 } else {
1278 Op1 = N->getOperand(1);
1279 Mem = cast<MemSDNode>(N);
1280 }
1281
1282 std::optional<unsigned> Opcode;
1283 SDLoc DL(N);
1284 SDNode *LD;
1286 EVT OrigType = N->getValueType(0);
1287
1288 EVT EltVT = Mem->getMemoryVT();
1289 unsigned NumElts = 1;
1290 if (EltVT.isVector()) {
1291 NumElts = EltVT.getVectorNumElements();
1292 EltVT = EltVT.getVectorElementType();
1293 // vectors of 16bits type are loaded/stored as multiples of v2x16 elements.
1294 if ((EltVT == MVT::f16 && OrigType == MVT::v2f16) ||
1295 (EltVT == MVT::bf16 && OrigType == MVT::v2bf16) ||
1296 (EltVT == MVT::i16 && OrigType == MVT::v2i16)) {
1297 assert(NumElts % 2 == 0 && "Vector must have even number of elements");
1298 EltVT = OrigType;
1299 NumElts /= 2;
1300 } else if (OrigType == MVT::v4i8) {
1301 EltVT = OrigType;
1302 NumElts = 1;
1303 }
1304 }
1305
1306 // Build the "promoted" result VTList for the load. If we are really loading
1307 // i8s, then the return type will be promoted to i16 since we do not expose
1308 // 8-bit registers in NVPTX.
1309 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1310 SmallVector<EVT, 5> InstVTs;
1311 for (unsigned i = 0; i != NumElts; ++i) {
1312 InstVTs.push_back(NodeVT);
1313 }
1314 InstVTs.push_back(MVT::Other);
1315 SDVTList InstVTList = CurDAG->getVTList(InstVTs);
1316
1317 if (SelectDirectAddr(Op1, Addr)) {
1318 switch (N->getOpcode()) {
1319 default:
1320 return false;
1321 case ISD::LOAD:
1323 if (IsLDG)
1324 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1325 NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1326 NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1327 NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1328 NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1329 NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1330 NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1331 else
1332 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1333 NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1334 NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1335 NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1336 NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1337 NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1338 NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
1339 break;
1340 case NVPTXISD::LoadV2:
1341 case NVPTXISD::LDGV2:
1342 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1343 NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1344 NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1345 NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1346 NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1347 NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1348 NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
1349 break;
1350 case NVPTXISD::LDUV2:
1351 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1352 NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1353 NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1354 NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1355 NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1356 NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1357 NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
1358 break;
1359 case NVPTXISD::LoadV4:
1360 case NVPTXISD::LDGV4:
1361 Opcode = pickOpcodeForVT(
1362 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
1363 NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1364 NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, std::nullopt,
1365 NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, std::nullopt);
1366 break;
1367 case NVPTXISD::LDUV4:
1368 Opcode = pickOpcodeForVT(
1369 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar,
1370 NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1371 NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, std::nullopt,
1372 NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, std::nullopt);
1373 break;
1374 }
1375 if (!Opcode)
1376 return false;
1377 SDValue Ops[] = { Addr, Chain };
1378 LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops);
1379 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1380 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1381 if (TM.is64Bit()) {
1382 switch (N->getOpcode()) {
1383 default:
1384 return false;
1385 case ISD::LOAD:
1387 if (IsLDG)
1388 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1389 NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1390 NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1391 NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1392 NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1393 NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1394 NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1395 else
1396 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1397 NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1398 NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1399 NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1400 NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1401 NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1402 NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
1403 break;
1404 case NVPTXISD::LoadV2:
1405 case NVPTXISD::LDGV2:
1406 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1407 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1408 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1409 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1410 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1411 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1412 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
1413 break;
1414 case NVPTXISD::LDUV2:
1415 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1416 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1417 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1418 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1419 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1420 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1421 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
1422 break;
1423 case NVPTXISD::LoadV4:
1424 case NVPTXISD::LDGV4:
1425 Opcode = pickOpcodeForVT(
1426 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64,
1427 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1428 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, std::nullopt,
1429 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, std::nullopt);
1430 break;
1431 case NVPTXISD::LDUV4:
1432 Opcode = pickOpcodeForVT(
1433 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64,
1434 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1435 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, std::nullopt,
1436 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, std::nullopt);
1437 break;
1438 }
1439 } else {
1440 switch (N->getOpcode()) {
1441 default:
1442 return false;
1443 case ISD::LOAD:
1445 if (IsLDG)
1446 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1447 NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1448 NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1449 NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1450 NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1451 NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1452 NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1453 else
1454 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1455 NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1456 NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1457 NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1458 NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1459 NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1460 NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
1461 break;
1462 case NVPTXISD::LoadV2:
1463 case NVPTXISD::LDGV2:
1464 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1465 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1466 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1467 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1468 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1469 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1470 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
1471 break;
1472 case NVPTXISD::LDUV2:
1473 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1474 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1475 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1476 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1477 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1478 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1479 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
1480 break;
1481 case NVPTXISD::LoadV4:
1482 case NVPTXISD::LDGV4:
1483 Opcode = pickOpcodeForVT(
1484 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32,
1485 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1486 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, std::nullopt,
1487 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, std::nullopt);
1488 break;
1489 case NVPTXISD::LDUV4:
1490 Opcode = pickOpcodeForVT(
1491 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32,
1492 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1493 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, std::nullopt,
1494 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, std::nullopt);
1495 break;
1496 }
1497 }
1498 if (!Opcode)
1499 return false;
1500 SDValue Ops[] = {Base, Offset, Chain};
1501 LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops);
1502 } else {
1503 if (TM.is64Bit()) {
1504 switch (N->getOpcode()) {
1505 default:
1506 return false;
1507 case ISD::LOAD:
1509 if (IsLDG)
1510 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1511 NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1512 NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1513 NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1514 NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1515 NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1516 NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1517 else
1518 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1519 NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1520 NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1521 NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1522 NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1523 NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1524 NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
1525 break;
1526 case NVPTXISD::LoadV2:
1527 case NVPTXISD::LDGV2:
1528 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1529 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1530 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1531 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1532 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1533 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1534 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
1535 break;
1536 case NVPTXISD::LDUV2:
1537 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1538 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1539 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1540 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1541 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1542 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1543 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
1544 break;
1545 case NVPTXISD::LoadV4:
1546 case NVPTXISD::LDGV4:
1547 Opcode = pickOpcodeForVT(
1548 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64,
1549 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1550 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, std::nullopt,
1551 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, std::nullopt);
1552 break;
1553 case NVPTXISD::LDUV4:
1554 Opcode = pickOpcodeForVT(
1555 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64,
1556 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1557 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, std::nullopt,
1558 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, std::nullopt);
1559 break;
1560 }
1561 } else {
1562 switch (N->getOpcode()) {
1563 default:
1564 return false;
1565 case ISD::LOAD:
1567 if (IsLDG)
1568 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1569 NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1570 NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1571 NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1572 NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1573 NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1574 NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1575 else
1576 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1577 NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1578 NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1579 NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1580 NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1581 NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1582 NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
1583 break;
1584 case NVPTXISD::LoadV2:
1585 case NVPTXISD::LDGV2:
1586 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1587 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1588 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1589 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1590 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1591 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1592 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
1593 break;
1594 case NVPTXISD::LDUV2:
1595 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1596 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1597 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1598 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1599 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1600 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1601 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
1602 break;
1603 case NVPTXISD::LoadV4:
1604 case NVPTXISD::LDGV4:
1605 Opcode = pickOpcodeForVT(
1606 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32,
1607 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1608 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, std::nullopt,
1609 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, std::nullopt);
1610 break;
1611 case NVPTXISD::LDUV4:
1612 Opcode = pickOpcodeForVT(
1613 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32,
1614 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1615 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, std::nullopt,
1616 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, std::nullopt);
1617 break;
1618 }
1619 }
1620 if (!Opcode)
1621 return false;
1622 SDValue Ops[] = { Op1, Chain };
1623 LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops);
1624 }
1625
1626 // For automatic generation of LDG (through SelectLoad[Vector], not the
1627 // intrinsics), we may have an extending load like:
1628 //
1629 // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
1630 //
1631 // In this case, the matching logic above will select a load for the original
1632 // memory type (in this case, i8) and our types will not match (the node needs
1633 // to return an i32 in this case). Our LDG/LDU nodes do not support the
1634 // concept of sign-/zero-extension, so emulate it here by adding an explicit
1635 // CVT instruction. Ptxas should clean up any redundancies here.
1636
1637 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
1638
1639 if (OrigType != EltVT &&
1640 (LdNode || (OrigType.isFloatingPoint() && EltVT.isFloatingPoint()))) {
1641 // We have an extending-load. The instruction we selected operates on the
1642 // smaller type, but the SDNode we are replacing has the larger type. We
1643 // need to emit a CVT to make the types match.
1644 unsigned CvtOpc =
1645 GetConvertOpcode(OrigType.getSimpleVT(), EltVT.getSimpleVT(), LdNode);
1646
1647 // For each output value, apply the manual sign/zero-extension and make sure
1648 // all users of the load go through that CVT.
1649 for (unsigned i = 0; i != NumElts; ++i) {
1650 SDValue Res(LD, i);
1651 SDValue OrigVal(N, i);
1652
1653 SDNode *CvtNode =
1654 CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
1656 DL, MVT::i32));
1657 ReplaceUses(OrigVal, SDValue(CvtNode, 0));
1658 }
1659 }
1660
1661 ReplaceNode(N, LD);
1662 return true;
1663}
1664
1665bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
1666 SDLoc dl(N);
1667 MemSDNode *ST = cast<MemSDNode>(N);
1668 assert(ST->writeMem() && "Expected store");
1669 StoreSDNode *PlainStore = dyn_cast<StoreSDNode>(N);
1670 AtomicSDNode *AtomicStore = dyn_cast<AtomicSDNode>(N);
1671 assert((PlainStore || AtomicStore) && "Expected store");
1672 EVT StoreVT = ST->getMemoryVT();
1673 SDNode *NVPTXST = nullptr;
1674
1675 // do not support pre/post inc/dec
1676 if (PlainStore && PlainStore->isIndexed())
1677 return false;
1678
1679 if (!StoreVT.isSimple())
1680 return false;
1681
1682 AtomicOrdering Ordering = ST->getSuccessOrdering();
1683 // In order to lower atomic loads with stronger guarantees we would need to
1684 // use store.release or insert fences. However these features were only added
1685 // with PTX ISA 6.0 / sm_70.
1686 // TODO: Check if we can actually use the new instructions and implement them.
1687 if (isStrongerThanMonotonic(Ordering))
1688 return false;
1689
1690 // Address Space Setting
1691 unsigned int CodeAddrSpace = getCodeAddrSpace(ST);
1692 unsigned int PointerSize =
1693 CurDAG->getDataLayout().getPointerSizeInBits(ST->getAddressSpace());
1694
1695 // Volatile Setting
1696 // - .volatile is only available for .global and .shared
1697 // - .volatile has the same memory synchronization semantics as .relaxed.sys
1698 bool isVolatile = ST->isVolatile() || Ordering == AtomicOrdering::Monotonic;
1699 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1700 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1701 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1702 isVolatile = false;
1703
1704 // Vector Setting
1705 MVT SimpleVT = StoreVT.getSimpleVT();
1706 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1707
1708 // Type Setting: toType + toTypeWidth
1709 // - for integer type, always use 'u'
1710 //
1711 MVT ScalarVT = SimpleVT.getScalarType();
1712 unsigned toTypeWidth = ScalarVT.getSizeInBits();
1713 if (SimpleVT.isVector()) {
1714 assert((Isv2x16VT(StoreVT) || StoreVT == MVT::v4i8) &&
1715 "Unexpected vector type");
1716 // v2x16 is stored using st.b32
1717 toTypeWidth = 32;
1718 }
1719
1720 unsigned int toType = getLdStRegType(ScalarVT);
1721
1722 // Create the machine instruction DAG
1723 SDValue Chain = ST->getChain();
1724 SDValue Value = PlainStore ? PlainStore->getValue() : AtomicStore->getVal();
1725 SDValue BasePtr = ST->getBasePtr();
1726 SDValue Addr;
1728 std::optional<unsigned> Opcode;
1729 MVT::SimpleValueType SourceVT =
1730 Value.getNode()->getSimpleValueType(0).SimpleTy;
1731
1732 if (SelectDirectAddr(BasePtr, Addr)) {
1733 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1734 NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1735 NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1736 if (!Opcode)
1737 return false;
1738 SDValue Ops[] = {Value,
1739 getI32Imm(isVolatile, dl),
1740 getI32Imm(CodeAddrSpace, dl),
1741 getI32Imm(vecType, dl),
1742 getI32Imm(toType, dl),
1743 getI32Imm(toTypeWidth, dl),
1744 Addr,
1745 Chain};
1746 NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1747 } else if (PointerSize == 64
1748 ? SelectADDRsi64(BasePtr.getNode(), BasePtr, Base, Offset)
1749 : SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) {
1750 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1751 NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1752 NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1753 if (!Opcode)
1754 return false;
1755 SDValue Ops[] = {Value,
1756 getI32Imm(isVolatile, dl),
1757 getI32Imm(CodeAddrSpace, dl),
1758 getI32Imm(vecType, dl),
1759 getI32Imm(toType, dl),
1760 getI32Imm(toTypeWidth, dl),
1761 Base,
1762 Offset,
1763 Chain};
1764 NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1765 } else if (PointerSize == 64
1766 ? SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset)
1767 : SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset)) {
1768 if (PointerSize == 64)
1769 Opcode =
1770 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
1771 NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64,
1772 NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1773 else
1774 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1775 NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1776 NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1777 if (!Opcode)
1778 return false;
1779
1780 SDValue Ops[] = {Value,
1781 getI32Imm(isVolatile, dl),
1782 getI32Imm(CodeAddrSpace, dl),
1783 getI32Imm(vecType, dl),
1784 getI32Imm(toType, dl),
1785 getI32Imm(toTypeWidth, dl),
1786 Base,
1787 Offset,
1788 Chain};
1789 NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1790 } else {
1791 if (PointerSize == 64)
1792 Opcode =
1793 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1794 NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1795 NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1796 else
1797 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1798 NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1799 NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1800 if (!Opcode)
1801 return false;
1802 SDValue Ops[] = {Value,
1803 getI32Imm(isVolatile, dl),
1804 getI32Imm(CodeAddrSpace, dl),
1805 getI32Imm(vecType, dl),
1806 getI32Imm(toType, dl),
1807 getI32Imm(toTypeWidth, dl),
1808 BasePtr,
1809 Chain};
1810 NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1811 }
1812
1813 if (!NVPTXST)
1814 return false;
1815
1816 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
1817 CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXST), {MemRef});
1818 ReplaceNode(N, NVPTXST);
1819 return true;
1820}
1821
1822bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
1823 SDValue Chain = N->getOperand(0);
1824 SDValue Op1 = N->getOperand(1);
1826 std::optional<unsigned> Opcode;
1827 SDLoc DL(N);
1828 SDNode *ST;
1829 EVT EltVT = Op1.getValueType();
1830 MemSDNode *MemSD = cast<MemSDNode>(N);
1831 EVT StoreVT = MemSD->getMemoryVT();
1832
1833 // Address Space Setting
1834 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
1835 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1836 report_fatal_error("Cannot store to pointer that points to constant "
1837 "memory space");
1838 }
1839 unsigned int PointerSize =
1841
1842 // Volatile Setting
1843 // - .volatile is only availalble for .global and .shared
1844 bool IsVolatile = MemSD->isVolatile();
1845 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1846 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1847 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1848 IsVolatile = false;
1849
1850 // Type Setting: toType + toTypeWidth
1851 // - for integer type, always use 'u'
1852 assert(StoreVT.isSimple() && "Store value is not simple");
1853 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
1854 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
1855 unsigned ToType = getLdStRegType(ScalarVT);
1856
1858 SDValue N2;
1859 unsigned VecType;
1860
1861 switch (N->getOpcode()) {
1862 case NVPTXISD::StoreV2:
1864 StOps.push_back(N->getOperand(1));
1865 StOps.push_back(N->getOperand(2));
1866 N2 = N->getOperand(3);
1867 break;
1868 case NVPTXISD::StoreV4:
1870 StOps.push_back(N->getOperand(1));
1871 StOps.push_back(N->getOperand(2));
1872 StOps.push_back(N->getOperand(3));
1873 StOps.push_back(N->getOperand(4));
1874 N2 = N->getOperand(5);
1875 break;
1876 default:
1877 return false;
1878 }
1879
1880 // v8x16 is a special case. PTX doesn't have st.v8.x16
1881 // instruction. Instead, we split the vector into v2x16 chunks and
1882 // store them with st.v4.b32.
1883 if (Isv2x16VT(EltVT)) {
1884 assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode.");
1885 EltVT = MVT::i32;
1887 ToTypeWidth = 32;
1888 }
1889
1890 StOps.push_back(getI32Imm(IsVolatile, DL));
1891 StOps.push_back(getI32Imm(CodeAddrSpace, DL));
1892 StOps.push_back(getI32Imm(VecType, DL));
1893 StOps.push_back(getI32Imm(ToType, DL));
1894 StOps.push_back(getI32Imm(ToTypeWidth, DL));
1895
1896 if (SelectDirectAddr(N2, Addr)) {
1897 switch (N->getOpcode()) {
1898 default:
1899 return false;
1900 case NVPTXISD::StoreV2:
1901 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1902 NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1903 NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1904 NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
1905 break;
1906 case NVPTXISD::StoreV4:
1907 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1908 NVPTX::STV_i8_v4_avar, NVPTX::STV_i16_v4_avar,
1909 NVPTX::STV_i32_v4_avar, std::nullopt,
1910 NVPTX::STV_f32_v4_avar, std::nullopt);
1911 break;
1912 }
1913 StOps.push_back(Addr);
1914 } else if (PointerSize == 64 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1915 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1916 switch (N->getOpcode()) {
1917 default:
1918 return false;
1919 case NVPTXISD::StoreV2:
1920 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1921 NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1922 NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1923 NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
1924 break;
1925 case NVPTXISD::StoreV4:
1926 Opcode =
1927 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi,
1928 NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi,
1929 std::nullopt, NVPTX::STV_f32_v4_asi, std::nullopt);
1930 break;
1931 }
1932 StOps.push_back(Base);
1933 StOps.push_back(Offset);
1934 } else if (PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1935 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1936 if (PointerSize == 64) {
1937 switch (N->getOpcode()) {
1938 default:
1939 return false;
1940 case NVPTXISD::StoreV2:
1941 Opcode =
1943 NVPTX::STV_i8_v2_ari_64, NVPTX::STV_i16_v2_ari_64,
1944 NVPTX::STV_i32_v2_ari_64, NVPTX::STV_i64_v2_ari_64,
1945 NVPTX::STV_f32_v2_ari_64, NVPTX::STV_f64_v2_ari_64);
1946 break;
1947 case NVPTXISD::StoreV4:
1948 Opcode = pickOpcodeForVT(
1949 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64,
1950 NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, std::nullopt,
1951 NVPTX::STV_f32_v4_ari_64, std::nullopt);
1952 break;
1953 }
1954 } else {
1955 switch (N->getOpcode()) {
1956 default:
1957 return false;
1958 case NVPTXISD::StoreV2:
1959 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1960 NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
1961 NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
1962 NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
1963 break;
1964 case NVPTXISD::StoreV4:
1965 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1966 NVPTX::STV_i8_v4_ari, NVPTX::STV_i16_v4_ari,
1967 NVPTX::STV_i32_v4_ari, std::nullopt,
1968 NVPTX::STV_f32_v4_ari, std::nullopt);
1969 break;
1970 }
1971 }
1972 StOps.push_back(Base);
1973 StOps.push_back(Offset);
1974 } else {
1975 if (PointerSize == 64) {
1976 switch (N->getOpcode()) {
1977 default:
1978 return false;
1979 case NVPTXISD::StoreV2:
1980 Opcode = pickOpcodeForVT(
1981 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64,
1982 NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
1983 NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
1984 NVPTX::STV_f64_v2_areg_64);
1985 break;
1986 case NVPTXISD::StoreV4:
1987 Opcode = pickOpcodeForVT(
1988 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64,
1989 NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, std::nullopt,
1990 NVPTX::STV_f32_v4_areg_64, std::nullopt);
1991 break;
1992 }
1993 } else {
1994 switch (N->getOpcode()) {
1995 default:
1996 return false;
1997 case NVPTXISD::StoreV2:
1998 Opcode =
1999 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg,
2000 NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
2001 NVPTX::STV_i64_v2_areg, NVPTX::STV_f32_v2_areg,
2002 NVPTX::STV_f64_v2_areg);
2003 break;
2004 case NVPTXISD::StoreV4:
2005 Opcode =
2006 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg,
2007 NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg,
2008 std::nullopt, NVPTX::STV_f32_v4_areg, std::nullopt);
2009 break;
2010 }
2011 }
2012 StOps.push_back(N2);
2013 }
2014
2015 if (!Opcode)
2016 return false;
2017
2018 StOps.push_back(Chain);
2019
2020 ST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, StOps);
2021
2022 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2023 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ST), {MemRef});
2024
2025 ReplaceNode(N, ST);
2026 return true;
2027}
2028
2029bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
2030 SDValue Chain = Node->getOperand(0);
2031 SDValue Offset = Node->getOperand(2);
2032 SDValue Glue = Node->getOperand(3);
2033 SDLoc DL(Node);
2034 MemSDNode *Mem = cast<MemSDNode>(Node);
2035
2036 unsigned VecSize;
2037 switch (Node->getOpcode()) {
2038 default:
2039 return false;
2041 VecSize = 1;
2042 break;
2044 VecSize = 2;
2045 break;
2047 VecSize = 4;
2048 break;
2049 }
2050
2051 EVT EltVT = Node->getValueType(0);
2052 EVT MemVT = Mem->getMemoryVT();
2053
2054 std::optional<unsigned> Opcode;
2055
2056 switch (VecSize) {
2057 default:
2058 return false;
2059 case 1:
2060 Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
2061 NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2062 NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2063 NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
2064 break;
2065 case 2:
2066 Opcode =
2067 pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8,
2068 NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2069 NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F32,
2070 NVPTX::LoadParamMemV2F64);
2071 break;
2072 case 4:
2073 Opcode =
2074 pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8,
2075 NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32,
2076 std::nullopt, NVPTX::LoadParamMemV4F32, std::nullopt);
2077 break;
2078 }
2079 if (!Opcode)
2080 return false;
2081
2082 SDVTList VTs;
2083 if (VecSize == 1) {
2084 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2085 } else if (VecSize == 2) {
2086 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2087 } else {
2088 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2089 VTs = CurDAG->getVTList(EVTs);
2090 }
2091
2092 unsigned OffsetVal = Offset->getAsZExtVal();
2093
2095 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2096 Ops.push_back(Chain);
2097 Ops.push_back(Glue);
2098
2099 ReplaceNode(Node, CurDAG->getMachineNode(*Opcode, DL, VTs, Ops));
2100 return true;
2101}
2102
2103bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
2104 SDLoc DL(N);
2105 SDValue Chain = N->getOperand(0);
2106 SDValue Offset = N->getOperand(1);
2107 unsigned OffsetVal = Offset->getAsZExtVal();
2108 MemSDNode *Mem = cast<MemSDNode>(N);
2109
2110 // How many elements do we have?
2111 unsigned NumElts = 1;
2112 switch (N->getOpcode()) {
2113 default:
2114 return false;
2116 NumElts = 1;
2117 break;
2119 NumElts = 2;
2120 break;
2122 NumElts = 4;
2123 break;
2124 }
2125
2126 // Build vector of operands
2128 for (unsigned i = 0; i < NumElts; ++i)
2129 Ops.push_back(N->getOperand(i + 2));
2130 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2131 Ops.push_back(Chain);
2132
2133 // Determine target opcode
2134 // If we have an i1, use an 8-bit store. The lowering code in
2135 // NVPTXISelLowering will have already emitted an upcast.
2136 std::optional<unsigned> Opcode = 0;
2137 switch (NumElts) {
2138 default:
2139 return false;
2140 case 1:
2142 NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2143 NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2144 NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
2145 if (Opcode == NVPTX::StoreRetvalI8) {
2146 // Fine tune the opcode depending on the size of the operand.
2147 // This helps to avoid creating redundant COPY instructions in
2148 // InstrEmitter::AddRegisterOperand().
2149 switch (Ops[0].getSimpleValueType().SimpleTy) {
2150 default:
2151 break;
2152 case MVT::i32:
2153 Opcode = NVPTX::StoreRetvalI8TruncI32;
2154 break;
2155 case MVT::i64:
2156 Opcode = NVPTX::StoreRetvalI8TruncI64;
2157 break;
2158 }
2159 }
2160 break;
2161 case 2:
2163 NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2164 NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2165 NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
2166 break;
2167 case 4:
2169 NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2170 NVPTX::StoreRetvalV4I32, std::nullopt,
2171 NVPTX::StoreRetvalV4F32, std::nullopt);
2172 break;
2173 }
2174 if (!Opcode)
2175 return false;
2176
2177 SDNode *Ret = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops);
2178 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2179 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
2180
2181 ReplaceNode(N, Ret);
2182 return true;
2183}
2184
2185bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
2186 SDLoc DL(N);
2187 SDValue Chain = N->getOperand(0);
2188 SDValue Param = N->getOperand(1);
2189 unsigned ParamVal = Param->getAsZExtVal();
2190 SDValue Offset = N->getOperand(2);
2191 unsigned OffsetVal = Offset->getAsZExtVal();
2192 MemSDNode *Mem = cast<MemSDNode>(N);
2193 SDValue Glue = N->getOperand(N->getNumOperands() - 1);
2194
2195 // How many elements do we have?
2196 unsigned NumElts = 1;
2197 switch (N->getOpcode()) {
2198 default:
2199 return false;
2203 NumElts = 1;
2204 break;
2206 NumElts = 2;
2207 break;
2209 NumElts = 4;
2210 break;
2211 }
2212
2213 // Build vector of operands
2215 for (unsigned i = 0; i < NumElts; ++i)
2216 Ops.push_back(N->getOperand(i + 3));
2217 Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
2218 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2219 Ops.push_back(Chain);
2220 Ops.push_back(Glue);
2221
2222 // Determine target opcode
2223 // If we have an i1, use an 8-bit store. The lowering code in
2224 // NVPTXISelLowering will have already emitted an upcast.
2225 std::optional<unsigned> Opcode = 0;
2226 switch (N->getOpcode()) {
2227 default:
2228 switch (NumElts) {
2229 default:
2230 return false;
2231 case 1:
2233 NVPTX::StoreParamI8, NVPTX::StoreParamI16,
2234 NVPTX::StoreParamI32, NVPTX::StoreParamI64,
2235 NVPTX::StoreParamF32, NVPTX::StoreParamF64);
2236 if (Opcode == NVPTX::StoreParamI8) {
2237 // Fine tune the opcode depending on the size of the operand.
2238 // This helps to avoid creating redundant COPY instructions in
2239 // InstrEmitter::AddRegisterOperand().
2240 switch (Ops[0].getSimpleValueType().SimpleTy) {
2241 default:
2242 break;
2243 case MVT::i32:
2244 Opcode = NVPTX::StoreParamI8TruncI32;
2245 break;
2246 case MVT::i64:
2247 Opcode = NVPTX::StoreParamI8TruncI64;
2248 break;
2249 }
2250 }
2251 break;
2252 case 2:
2254 NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
2255 NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
2256 NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
2257 break;
2258 case 4:
2260 NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
2261 NVPTX::StoreParamV4I32, std::nullopt,
2262 NVPTX::StoreParamV4F32, std::nullopt);
2263 break;
2264 }
2265 if (!Opcode)
2266 return false;
2267 break;
2268 // Special case: if we have a sign-extend/zero-extend node, insert the
2269 // conversion instruction first, and use that as the value operand to
2270 // the selected StoreParam node.
2272 Opcode = NVPTX::StoreParamI32;
2274 MVT::i32);
2275 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2276 MVT::i32, Ops[0], CvtNone);
2277 Ops[0] = SDValue(Cvt, 0);
2278 break;
2279 }
2281 Opcode = NVPTX::StoreParamI32;
2283 MVT::i32);
2284 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2285 MVT::i32, Ops[0], CvtNone);
2286 Ops[0] = SDValue(Cvt, 0);
2287 break;
2288 }
2289 }
2290
2291 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2292 SDNode *Ret = CurDAG->getMachineNode(*Opcode, DL, RetVTs, Ops);
2293 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2294 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
2295
2296 ReplaceNode(N, Ret);
2297 return true;
2298}
2299
2300bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
2301 unsigned Opc = 0;
2302
2303 switch (N->getOpcode()) {
2304 default: return false;
2306 Opc = NVPTX::TEX_1D_F32_S32_RR;
2307 break;
2309 Opc = NVPTX::TEX_1D_F32_F32_RR;
2310 break;
2312 Opc = NVPTX::TEX_1D_F32_F32_LEVEL_RR;
2313 break;
2315 Opc = NVPTX::TEX_1D_F32_F32_GRAD_RR;
2316 break;
2318 Opc = NVPTX::TEX_1D_S32_S32_RR;
2319 break;
2321 Opc = NVPTX::TEX_1D_S32_F32_RR;
2322 break;
2324 Opc = NVPTX::TEX_1D_S32_F32_LEVEL_RR;
2325 break;
2327 Opc = NVPTX::TEX_1D_S32_F32_GRAD_RR;
2328 break;
2330 Opc = NVPTX::TEX_1D_U32_S32_RR;
2331 break;
2333 Opc = NVPTX::TEX_1D_U32_F32_RR;
2334 break;
2336 Opc = NVPTX::TEX_1D_U32_F32_LEVEL_RR;
2337 break;
2339 Opc = NVPTX::TEX_1D_U32_F32_GRAD_RR;
2340 break;
2342 Opc = NVPTX::TEX_1D_ARRAY_F32_S32_RR;
2343 break;
2345 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_RR;
2346 break;
2348 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR;
2349 break;
2351 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR;
2352 break;
2354 Opc = NVPTX::TEX_1D_ARRAY_S32_S32_RR;
2355 break;
2357 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_RR;
2358 break;
2360 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR;
2361 break;
2363 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR;
2364 break;
2366 Opc = NVPTX::TEX_1D_ARRAY_U32_S32_RR;
2367 break;
2369 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_RR;
2370 break;
2372 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR;
2373 break;
2375 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR;
2376 break;
2378 Opc = NVPTX::TEX_2D_F32_S32_RR;
2379 break;
2381 Opc = NVPTX::TEX_2D_F32_F32_RR;
2382 break;
2384 Opc = NVPTX::TEX_2D_F32_F32_LEVEL_RR;
2385 break;
2387 Opc = NVPTX::TEX_2D_F32_F32_GRAD_RR;
2388 break;
2390 Opc = NVPTX::TEX_2D_S32_S32_RR;
2391 break;
2393 Opc = NVPTX::TEX_2D_S32_F32_RR;
2394 break;
2396 Opc = NVPTX::TEX_2D_S32_F32_LEVEL_RR;
2397 break;
2399 Opc = NVPTX::TEX_2D_S32_F32_GRAD_RR;
2400 break;
2402 Opc = NVPTX::TEX_2D_U32_S32_RR;
2403 break;
2405 Opc = NVPTX::TEX_2D_U32_F32_RR;
2406 break;
2408 Opc = NVPTX::TEX_2D_U32_F32_LEVEL_RR;
2409 break;
2411 Opc = NVPTX::TEX_2D_U32_F32_GRAD_RR;
2412 break;
2414 Opc = NVPTX::TEX_2D_ARRAY_F32_S32_RR;
2415 break;
2417 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_RR;
2418 break;
2420 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR;
2421 break;
2423 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR;
2424 break;
2426 Opc = NVPTX::TEX_2D_ARRAY_S32_S32_RR;
2427 break;
2429 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_RR;
2430 break;
2432 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR;
2433 break;
2435 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR;
2436 break;
2438 Opc = NVPTX::TEX_2D_ARRAY_U32_S32_RR;
2439 break;
2441 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_RR;
2442 break;
2444 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR;
2445 break;
2447 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR;
2448 break;
2450 Opc = NVPTX::TEX_3D_F32_S32_RR;
2451 break;
2453 Opc = NVPTX::TEX_3D_F32_F32_RR;
2454 break;
2456 Opc = NVPTX::TEX_3D_F32_F32_LEVEL_RR;
2457 break;
2459 Opc = NVPTX::TEX_3D_F32_F32_GRAD_RR;
2460 break;
2462 Opc = NVPTX::TEX_3D_S32_S32_RR;
2463 break;
2465 Opc = NVPTX::TEX_3D_S32_F32_RR;
2466 break;
2468 Opc = NVPTX::TEX_3D_S32_F32_LEVEL_RR;
2469 break;
2471 Opc = NVPTX::TEX_3D_S32_F32_GRAD_RR;
2472 break;
2474 Opc = NVPTX::TEX_3D_U32_S32_RR;
2475 break;
2477 Opc = NVPTX::TEX_3D_U32_F32_RR;
2478 break;
2480 Opc = NVPTX::TEX_3D_U32_F32_LEVEL_RR;
2481 break;
2483 Opc = NVPTX::TEX_3D_U32_F32_GRAD_RR;
2484 break;
2486 Opc = NVPTX::TEX_CUBE_F32_F32_RR;
2487 break;
2489 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL_RR;
2490 break;
2492 Opc = NVPTX::TEX_CUBE_S32_F32_RR;
2493 break;
2495 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL_RR;
2496 break;
2498 Opc = NVPTX::TEX_CUBE_U32_F32_RR;
2499 break;
2501 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL_RR;
2502 break;
2504 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_RR;
2505 break;
2507 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR;
2508 break;
2510 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_RR;
2511 break;
2513 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR;
2514 break;
2516 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_RR;
2517 break;
2519 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR;
2520 break;
2522 Opc = NVPTX::TLD4_R_2D_F32_F32_RR;
2523 break;
2525 Opc = NVPTX::TLD4_G_2D_F32_F32_RR;
2526 break;
2528 Opc = NVPTX::TLD4_B_2D_F32_F32_RR;
2529 break;
2531 Opc = NVPTX::TLD4_A_2D_F32_F32_RR;
2532 break;
2534 Opc = NVPTX::TLD4_R_2D_S32_F32_RR;
2535 break;
2537 Opc = NVPTX::TLD4_G_2D_S32_F32_RR;
2538 break;
2540 Opc = NVPTX::TLD4_B_2D_S32_F32_RR;
2541 break;
2543 Opc = NVPTX::TLD4_A_2D_S32_F32_RR;
2544 break;
2546 Opc = NVPTX::TLD4_R_2D_U32_F32_RR;
2547 break;
2549 Opc = NVPTX::TLD4_G_2D_U32_F32_RR;
2550 break;
2552 Opc = NVPTX::TLD4_B_2D_U32_F32_RR;
2553 break;
2555 Opc = NVPTX::TLD4_A_2D_U32_F32_RR;
2556 break;
2558 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32_R;
2559 break;
2561 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_R;
2562 break;
2564 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_R;
2565 break;
2567 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_R;
2568 break;
2570 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32_R;
2571 break;
2573 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_R;
2574 break;
2576 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_R;
2577 break;
2579 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_R;
2580 break;
2582 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32_R;
2583 break;
2585 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_R;
2586 break;
2588 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_R;
2589 break;
2591 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_R;
2592 break;
2594 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_R;
2595 break;
2597 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_R;
2598 break;
2600 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_R;
2601 break;
2603 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_R;
2604 break;
2606 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_R;
2607 break;
2609 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_R;
2610 break;
2612 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_R;
2613 break;
2615 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_R;
2616 break;
2618 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_R;
2619 break;
2621 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_R;
2622 break;
2624 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_R;
2625 break;
2627 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_R;
2628 break;
2630 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32_R;
2631 break;
2633 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_R;
2634 break;
2636 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_R;
2637 break;
2639 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_R;
2640 break;
2642 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32_R;
2643 break;
2645 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_R;
2646 break;
2648 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_R;
2649 break;
2651 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_R;
2652 break;
2654 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32_R;
2655 break;
2657 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_R;
2658 break;
2660 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_R;
2661 break;
2663 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_R;
2664 break;
2666 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_R;
2667 break;
2669 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_R;
2670 break;
2672 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_R;
2673 break;
2675 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_R;
2676 break;
2678 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_R;
2679 break;
2681 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_R;
2682 break;
2684 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_R;
2685 break;
2687 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_R;
2688 break;
2690 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_R;
2691 break;
2693 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_R;
2694 break;
2696 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_R;
2697 break;
2699 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_R;
2700 break;
2702 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32_R;
2703 break;
2705 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_R;
2706 break;
2708 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_R;
2709 break;
2711 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_R;
2712 break;
2714 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32_R;
2715 break;
2717 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_R;
2718 break;
2720 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_R;
2721 break;
2723 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_R;
2724 break;
2726 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32_R;
2727 break;
2729 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_R;
2730 break;
2732 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_R;
2733 break;
2735 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_R;
2736 break;
2738 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_R;
2739 break;
2741 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_R;
2742 break;
2744 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_R;
2745 break;
2747 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_R;
2748 break;
2750 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_R;
2751 break;
2753 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_R;
2754 break;
2756 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_R;
2757 break;
2759 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_R;
2760 break;
2762 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_R;
2763 break;
2765 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_R;
2766 break;
2768 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_R;
2769 break;
2771 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R;
2772 break;
2774 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R;
2775 break;
2777 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R;
2778 break;
2780 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32_R;
2781 break;
2783 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32_R;
2784 break;
2786 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32_R;
2787 break;
2789 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32_R;
2790 break;
2792 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32_R;
2793 break;
2795 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32_R;
2796 break;
2798 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32_R;
2799 break;
2801 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32_R;
2802 break;
2804 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32_R;
2805 break;
2807 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R;
2808 break;
2810 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_GRAD_R;
2811 break;
2813 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_GRAD_R;
2814 break;
2816 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_GRAD_R;
2817 break;
2819 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD_R;
2820 break;
2822 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD_R;
2823 break;
2825 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD_R;
2826 break;
2827 }
2828
2829 // Copy over operands
2831 Ops.push_back(N->getOperand(0)); // Move chain to the back.
2832
2833 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
2834 return true;
2835}
2836
2837bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
2838 unsigned Opc = 0;
2839 switch (N->getOpcode()) {
2840 default: return false;
2842 Opc = NVPTX::SULD_1D_I8_CLAMP_R;
2843 break;
2845 Opc = NVPTX::SULD_1D_I16_CLAMP_R;
2846 break;
2848 Opc = NVPTX::SULD_1D_I32_CLAMP_R;
2849 break;
2851 Opc = NVPTX::SULD_1D_I64_CLAMP_R;
2852 break;
2854 Opc = NVPTX::SULD_1D_V2I8_CLAMP_R;
2855 break;
2857 Opc = NVPTX::SULD_1D_V2I16_CLAMP_R;
2858 break;
2860 Opc = NVPTX::SULD_1D_V2I32_CLAMP_R;
2861 break;
2863 Opc = NVPTX::SULD_1D_V2I64_CLAMP_R;
2864 break;
2866 Opc = NVPTX::SULD_1D_V4I8_CLAMP_R;
2867 break;
2869 Opc = NVPTX::SULD_1D_V4I16_CLAMP_R;
2870 break;
2872 Opc = NVPTX::SULD_1D_V4I32_CLAMP_R;
2873 break;
2875 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP_R;
2876 break;
2878 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP_R;
2879 break;
2881 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP_R;
2882 break;
2884 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP_R;
2885 break;
2887 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_R;
2888 break;
2890 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_R;
2891 break;
2893 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_R;
2894 break;
2896 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_R;
2897 break;
2899 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_R;
2900 break;
2902 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_R;
2903 break;
2905 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_R;
2906 break;
2908 Opc = NVPTX::SULD_2D_I8_CLAMP_R;
2909 break;
2911 Opc = NVPTX::SULD_2D_I16_CLAMP_R;
2912 break;
2914 Opc = NVPTX::SULD_2D_I32_CLAMP_R;
2915 break;
2917 Opc = NVPTX::SULD_2D_I64_CLAMP_R;
2918 break;
2920 Opc = NVPTX::SULD_2D_V2I8_CLAMP_R;
2921 break;
2923 Opc = NVPTX::SULD_2D_V2I16_CLAMP_R;
2924 break;
2926 Opc = NVPTX::SULD_2D_V2I32_CLAMP_R;
2927 break;
2929 Opc = NVPTX::SULD_2D_V2I64_CLAMP_R;
2930 break;
2932 Opc = NVPTX::SULD_2D_V4I8_CLAMP_R;
2933 break;
2935 Opc = NVPTX::SULD_2D_V4I16_CLAMP_R;
2936 break;
2938 Opc = NVPTX::SULD_2D_V4I32_CLAMP_R;
2939 break;
2941 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP_R;
2942 break;
2944 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP_R;
2945 break;
2947 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP_R;
2948 break;
2950 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP_R;
2951 break;
2953 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_R;
2954 break;
2956 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_R;
2957 break;
2959 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_R;
2960 break;
2962 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_R;
2963 break;
2965 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_R;
2966 break;
2968 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_R;
2969 break;
2971 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_R;
2972 break;
2974 Opc = NVPTX::SULD_3D_I8_CLAMP_R;
2975 break;
2977 Opc = NVPTX::SULD_3D_I16_CLAMP_R;
2978 break;
2980 Opc = NVPTX::SULD_3D_I32_CLAMP_R;
2981 break;
2983 Opc = NVPTX::SULD_3D_I64_CLAMP_R;
2984 break;
2986 Opc = NVPTX::SULD_3D_V2I8_CLAMP_R;
2987 break;
2989 Opc = NVPTX::SULD_3D_V2I16_CLAMP_R;
2990 break;
2992 Opc = NVPTX::SULD_3D_V2I32_CLAMP_R;
2993 break;
2995 Opc = NVPTX::SULD_3D_V2I64_CLAMP_R;
2996 break;
2998 Opc = NVPTX::SULD_3D_V4I8_CLAMP_R;
2999 break;
3001 Opc = NVPTX::SULD_3D_V4I16_CLAMP_R;
3002 break;
3004 Opc = NVPTX::SULD_3D_V4I32_CLAMP_R;
3005 break;
3007 Opc = NVPTX::SULD_1D_I8_TRAP_R;
3008 break;
3010 Opc = NVPTX::SULD_1D_I16_TRAP_R;
3011 break;
3013 Opc = NVPTX::SULD_1D_I32_TRAP_R;
3014 break;
3016 Opc = NVPTX::SULD_1D_I64_TRAP_R;
3017 break;
3019 Opc = NVPTX::SULD_1D_V2I8_TRAP_R;
3020 break;
3022 Opc = NVPTX::SULD_1D_V2I16_TRAP_R;
3023 break;
3025 Opc = NVPTX::SULD_1D_V2I32_TRAP_R;
3026 break;
3028 Opc = NVPTX::SULD_1D_V2I64_TRAP_R;
3029 break;
3031 Opc = NVPTX::SULD_1D_V4I8_TRAP_R;
3032 break;
3034 Opc = NVPTX::SULD_1D_V4I16_TRAP_R;
3035 break;
3037 Opc = NVPTX::SULD_1D_V4I32_TRAP_R;
3038 break;
3040 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP_R;
3041 break;
3043 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP_R;
3044 break;
3046 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP_R;
3047 break;
3049 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP_R;
3050 break;
3052 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP_R;
3053 break;
3055 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP_R;
3056 break;
3058 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP_R;
3059 break;
3061 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP_R;
3062 break;
3064 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP_R;
3065 break;
3067 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP_R;
3068 break;
3070 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP_R;
3071 break;
3073 Opc = NVPTX::SULD_2D_I8_TRAP_R;
3074 break;
3076 Opc = NVPTX::SULD_2D_I16_TRAP_R;
3077 break;
3079 Opc = NVPTX::SULD_2D_I32_TRAP_R;
3080 break;
3082 Opc = NVPTX::SULD_2D_I64_TRAP_R;
3083 break;
3085 Opc = NVPTX::SULD_2D_V2I8_TRAP_R;
3086 break;
3088 Opc = NVPTX::SULD_2D_V2I16_TRAP_R;
3089 break;
3091 Opc = NVPTX::SULD_2D_V2I32_TRAP_R;
3092 break;
3094 Opc = NVPTX::SULD_2D_V2I64_TRAP_R;
3095 break;
3097 Opc = NVPTX::SULD_2D_V4I8_TRAP_R;
3098 break;
3100 Opc = NVPTX::SULD_2D_V4I16_TRAP_R;
3101 break;
3103 Opc = NVPTX::SULD_2D_V4I32_TRAP_R;
3104 break;
3106 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP_R;
3107 break;
3109 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP_R;
3110 break;
3112 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP_R;
3113 break;
3115 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP_R;
3116 break;
3118 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP_R;
3119 break;
3121 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP_R;
3122 break;
3124 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP_R;
3125 break;
3127 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP_R;
3128 break;
3130 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP_R;
3131 break;
3133 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP_R;
3134 break;
3136 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP_R;
3137 break;
3139 Opc = NVPTX::SULD_3D_I8_TRAP_R;
3140 break;
3142 Opc = NVPTX::SULD_3D_I16_TRAP_R;
3143 break;
3145 Opc = NVPTX::SULD_3D_I32_TRAP_R;
3146 break;
3148 Opc = NVPTX::SULD_3D_I64_TRAP_R;
3149 break;
3151 Opc = NVPTX::SULD_3D_V2I8_TRAP_R;
3152 break;
3154 Opc = NVPTX::SULD_3D_V2I16_TRAP_R;
3155 break;
3157 Opc = NVPTX::SULD_3D_V2I32_TRAP_R;
3158 break;
3160 Opc = NVPTX::SULD_3D_V2I64_TRAP_R;
3161 break;
3163 Opc = NVPTX::SULD_3D_V4I8_TRAP_R;
3164 break;
3166 Opc = NVPTX::SULD_3D_V4I16_TRAP_R;
3167 break;
3169 Opc = NVPTX::SULD_3D_V4I32_TRAP_R;
3170 break;
3172 Opc = NVPTX::SULD_1D_I8_ZERO_R;
3173 break;
3175 Opc = NVPTX::SULD_1D_I16_ZERO_R;
3176 break;
3178 Opc = NVPTX::SULD_1D_I32_ZERO_R;
3179 break;
3181 Opc = NVPTX::SULD_1D_I64_ZERO_R;
3182 break;
3184 Opc = NVPTX::SULD_1D_V2I8_ZERO_R;
3185 break;
3187 Opc = NVPTX::SULD_1D_V2I16_ZERO_R;
3188 break;
3190 Opc = NVPTX::SULD_1D_V2I32_ZERO_R;
3191 break;
3193 Opc = NVPTX::SULD_1D_V2I64_ZERO_R;
3194 break;
3196 Opc = NVPTX::SULD_1D_V4I8_ZERO_R;
3197 break;
3199 Opc = NVPTX::SULD_1D_V4I16_ZERO_R;
3200 break;
3202 Opc = NVPTX::SULD_1D_V4I32_ZERO_R;
3203 break;
3205 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO_R;
3206 break;
3208 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO_R;
3209 break;
3211 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO_R;
3212 break;
3214 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO_R;
3215 break;
3217 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO_R;
3218 break;
3220 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO_R;
3221 break;
3223 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO_R;
3224 break;
3226 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO_R;
3227 break;
3229 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO_R;
3230 break;
3232 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO_R;
3233 break;
3235 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO_R;
3236 break;
3238 Opc = NVPTX::SULD_2D_I8_ZERO_R;
3239 break;
3241 Opc = NVPTX::SULD_2D_I16_ZERO_R;
3242 break;
3244 Opc = NVPTX::SULD_2D_I32_ZERO_R;
3245 break;
3247 Opc = NVPTX::SULD_2D_I64_ZERO_R;
3248 break;
3250 Opc = NVPTX::SULD_2D_V2I8_ZERO_R;
3251 break;
3253 Opc = NVPTX::SULD_2D_V2I16_ZERO_R;
3254 break;
3256 Opc = NVPTX::SULD_2D_V2I32_ZERO_R;
3257 break;
3259 Opc = NVPTX::SULD_2D_V2I64_ZERO_R;
3260 break;
3262 Opc = NVPTX::SULD_2D_V4I8_ZERO_R;
3263 break;
3265 Opc = NVPTX::SULD_2D_V4I16_ZERO_R;
3266 break;
3268 Opc = NVPTX::SULD_2D_V4I32_ZERO_R;
3269 break;
3271 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO_R;
3272 break;
3274 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO_R;
3275 break;
3277 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO_R;
3278 break;
3280 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO_R;
3281 break;
3283 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO_R;
3284 break;
3286 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO_R;
3287 break;
3289 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO_R;
3290 break;
3292 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO_R;
3293 break;
3295 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO_R;
3296 break;
3298 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO_R;
3299 break;
3301 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO_R;
3302 break;
3304 Opc = NVPTX::SULD_3D_I8_ZERO_R;
3305 break;
3307 Opc = NVPTX::SULD_3D_I16_ZERO_R;
3308 break;
3310 Opc = NVPTX::SULD_3D_I32_ZERO_R;
3311 break;
3313 Opc = NVPTX::SULD_3D_I64_ZERO_R;
3314 break;
3316 Opc = NVPTX::SULD_3D_V2I8_ZERO_R;
3317 break;
3319 Opc = NVPTX::SULD_3D_V2I16_ZERO_R;
3320 break;
3322 Opc = NVPTX::SULD_3D_V2I32_ZERO_R;
3323 break;
3325 Opc = NVPTX::SULD_3D_V2I64_ZERO_R;
3326 break;
3328 Opc = NVPTX::SULD_3D_V4I8_ZERO_R;
3329 break;
3331 Opc = NVPTX::SULD_3D_V4I16_ZERO_R;
3332 break;
3334 Opc = NVPTX::SULD_3D_V4I32_ZERO_R;
3335 break;
3336 }
3337
3338 // Copy over operands
3340 Ops.push_back(N->getOperand(0)); // Move chain to the back.
3341
3342 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
3343 return true;
3344}
3345
3346
3347/// SelectBFE - Look for instruction sequences that can be made more efficient
3348/// by using the 'bfe' (bit-field extract) PTX instruction
3349bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
3350 SDLoc DL(N);
3351 SDValue LHS = N->getOperand(0);
3352 SDValue RHS = N->getOperand(1);
3353 SDValue Len;
3354 SDValue Start;
3355 SDValue Val;
3356 bool IsSigned = false;
3357
3358 if (N->getOpcode() == ISD::AND) {
3359 // Canonicalize the operands
3360 // We want 'and %val, %mask'
3361 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
3362 std::swap(LHS, RHS);
3363 }
3364
3365 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
3366 if (!Mask) {
3367 // We need a constant mask on the RHS of the AND
3368 return false;
3369 }
3370
3371 // Extract the mask bits
3372 uint64_t MaskVal = Mask->getZExtValue();
3373 if (!isMask_64(MaskVal)) {
3374 // We *could* handle shifted masks here, but doing so would require an
3375 // 'and' operation to fix up the low-order bits so we would trade
3376 // shr+and for bfe+and, which has the same throughput
3377 return false;
3378 }
3379
3380 // How many bits are in our mask?
3381 int64_t NumBits = countr_one(MaskVal);
3382 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
3383
3384 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
3385 // We have a 'srl/and' pair, extract the effective start bit and length
3386 Val = LHS.getNode()->getOperand(0);
3387 Start = LHS.getNode()->getOperand(1);
3388 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
3389 if (StartConst) {
3390 uint64_t StartVal = StartConst->getZExtValue();
3391 // How many "good" bits do we have left? "good" is defined here as bits
3392 // that exist in the original value, not shifted in.
3393 int64_t GoodBits = Start.getValueSizeInBits() - StartVal;
3394 if (NumBits > GoodBits) {
3395 // Do not handle the case where bits have been shifted in. In theory
3396 // we could handle this, but the cost is likely higher than just
3397 // emitting the srl/and pair.
3398 return false;
3399 }
3400 Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
3401 } else {
3402 // Do not handle the case where the shift amount (can be zero if no srl
3403 // was found) is not constant. We could handle this case, but it would
3404 // require run-time logic that would be more expensive than just
3405 // emitting the srl/and pair.
3406 return false;
3407 }
3408 } else {
3409 // Do not handle the case where the LHS of the and is not a shift. While
3410 // it would be trivial to handle this case, it would just transform
3411 // 'and' -> 'bfe', but 'and' has higher-throughput.
3412 return false;
3413 }
3414 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
3415 if (LHS->getOpcode() == ISD::AND) {
3416 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
3417 if (!ShiftCnst) {
3418 // Shift amount must be constant
3419 return false;
3420 }
3421
3422 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
3423
3424 SDValue AndLHS = LHS->getOperand(0);
3425 SDValue AndRHS = LHS->getOperand(1);
3426
3427 // Canonicalize the AND to have the mask on the RHS
3428 if (isa<ConstantSDNode>(AndLHS)) {
3429 std::swap(AndLHS, AndRHS);
3430 }
3431
3432 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
3433 if (!MaskCnst) {
3434 // Mask must be constant
3435 return false;
3436 }
3437
3438 uint64_t MaskVal = MaskCnst->getZExtValue();
3439 uint64_t NumZeros;
3440 uint64_t NumBits;
3441 if (isMask_64(MaskVal)) {
3442 NumZeros = 0;
3443 // The number of bits in the result bitfield will be the number of
3444 // trailing ones (the AND) minus the number of bits we shift off
3445 NumBits = llvm::countr_one(MaskVal) - ShiftAmt;
3446 } else if (isShiftedMask_64(MaskVal)) {
3447 NumZeros = llvm::countr_zero(MaskVal);
3448 unsigned NumOnes = llvm::countr_one(MaskVal >> NumZeros);
3449 // The number of bits in the result bitfield will be the number of
3450 // trailing zeros plus the number of set bits in the mask minus the
3451 // number of bits we shift off
3452 NumBits = NumZeros + NumOnes - ShiftAmt;
3453 } else {
3454 // This is not a mask we can handle
3455 return false;
3456 }
3457
3458 if (ShiftAmt < NumZeros) {
3459 // Handling this case would require extra logic that would make this
3460 // transformation non-profitable
3461 return false;
3462 }
3463
3464 Val = AndLHS;
3465 Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
3466 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
3467 } else if (LHS->getOpcode() == ISD::SHL) {
3468 // Here, we have a pattern like:
3469 //
3470 // (sra (shl val, NN), MM)
3471 // or
3472 // (srl (shl val, NN), MM)
3473 //
3474 // If MM >= NN, we can efficiently optimize this with bfe
3475 Val = LHS->getOperand(0);
3476
3477 SDValue ShlRHS = LHS->getOperand(1);
3478 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
3479 if (!ShlCnst) {
3480 // Shift amount must be constant
3481 return false;
3482 }
3483 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
3484
3485 SDValue ShrRHS = RHS;
3486 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
3487 if (!ShrCnst) {
3488 // Shift amount must be constant
3489 return false;
3490 }
3491 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
3492
3493 // To avoid extra codegen and be profitable, we need Outer >= Inner
3494 if (OuterShiftAmt < InnerShiftAmt) {
3495 return false;
3496 }
3497
3498 // If the outer shift is more than the type size, we have no bitfield to
3499 // extract (since we also check that the inner shift is <= the outer shift
3500 // then this also implies that the inner shift is < the type size)
3501 if (OuterShiftAmt >= Val.getValueSizeInBits()) {
3502 return false;
3503 }
3504
3505 Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL,
3506 MVT::i32);
3507 Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt,
3508 DL, MVT::i32);
3509
3510 if (N->getOpcode() == ISD::SRA) {
3511 // If we have a arithmetic right shift, we need to use the signed bfe
3512 // variant
3513 IsSigned = true;
3514 }
3515 } else {
3516 // No can do...
3517 return false;
3518 }
3519 } else {
3520 // No can do...
3521 return false;
3522 }
3523
3524
3525 unsigned Opc;
3526 // For the BFE operations we form here from "and" and "srl", always use the
3527 // unsigned variants.
3528 if (Val.getValueType() == MVT::i32) {
3529 if (IsSigned) {
3530 Opc = NVPTX::BFE_S32rii;
3531 } else {
3532 Opc = NVPTX::BFE_U32rii;
3533 }
3534 } else if (Val.getValueType() == MVT::i64) {
3535 if (IsSigned) {
3536 Opc = NVPTX::BFE_S64rii;
3537 } else {
3538 Opc = NVPTX::BFE_U64rii;
3539 }
3540 } else {
3541 // We cannot handle this type
3542 return false;
3543 }
3544
3545 SDValue Ops[] = {
3546 Val, Start, Len
3547 };
3548
3549 ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops));
3550 return true;
3551}
3552
3553// SelectDirectAddr - Match a direct address for DAG.
3554// A direct address could be a globaladdress or externalsymbol.
3555bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
3556 // Return true if TGA or ES.
3557 if (N.getOpcode() == ISD::TargetGlobalAddress ||
3558 N.getOpcode() == ISD::TargetExternalSymbol) {
3559 Address = N;
3560 return true;
3561 }
3562 if (N.getOpcode() == NVPTXISD::Wrapper) {
3563 Address = N.getOperand(0);
3564 return true;
3565 }
3566 // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol
3567 if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) {
3568 if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
3571 return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address);
3572 }
3573 return false;
3574}
3575
3576// symbol+offset
3577bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3578 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3579 if (Addr.getOpcode() == ISD::ADD) {
3580 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3581 SDValue base = Addr.getOperand(0);
3582 if (SelectDirectAddr(base, Base)) {
3583 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3584 mvt);
3585 return true;
3586 }
3587 }
3588 }
3589 return false;
3590}
3591
3592// symbol+offset
3593bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
3595 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
3596}
3597
3598// symbol+offset
3599bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
3601 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
3602}
3603
3604// register+offset
3605bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3606 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3607 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3608 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3609 Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
3610 return true;
3611 }
3612 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
3613 Addr.getOpcode() == ISD::TargetGlobalAddress)
3614 return false; // direct calls.
3615
3616 if (Addr.getOpcode() == ISD::ADD) {
3617 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
3618 return false;
3619 }
3620 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3621 if (FrameIndexSDNode *FIN =
3622 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
3623 // Constant offset from frame ref.
3624 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3625 else
3626 Base = Addr.getOperand(0);
3627 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3628 mvt);
3629 return true;
3630 }
3631 }
3632 return false;
3633}
3634
3635// register+offset
3636bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
3638 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
3639}
3640
3641// register+offset
3642bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
3644 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
3645}
3646
3647bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
3648 unsigned int spN) const {
3649 const Value *Src = nullptr;
3650 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
3651 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3652 return true;
3653 Src = mN->getMemOperand()->getValue();
3654 }
3655 if (!Src)
3656 return false;
3657 if (auto *PT = dyn_cast<PointerType>(Src->getType()))
3658 return (PT->getAddressSpace() == spN);
3659 return false;
3660}
3661
3662/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
3663/// inline asm expressions.
3665 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
3666 std::vector<SDValue> &OutOps) {
3667 SDValue Op0, Op1;
3668 switch (ConstraintID) {
3669 default:
3670 return true;
3671 case InlineAsm::ConstraintCode::m: // memory
3672 if (SelectDirectAddr(Op, Op0)) {
3673 OutOps.push_back(Op0);
3674 OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
3675 return false;
3676 }
3677 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
3678 OutOps.push_back(Op0);
3679 OutOps.push_back(Op1);
3680 return false;
3681 }
3682 break;
3683 }
3684 return true;
3685}
3686
3687/// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
3688/// conversion from \p SrcTy to \p DestTy.
3689unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
3690 LoadSDNode *LdNode) {
3691 bool IsSigned = LdNode && LdNode->getExtensionType() == ISD::SEXTLOAD;
3692 switch (SrcTy.SimpleTy) {
3693 default:
3694 llvm_unreachable("Unhandled source type");
3695 case MVT::i8:
3696 switch (DestTy.SimpleTy) {
3697 default:
3698 llvm_unreachable("Unhandled dest type");
3699 case MVT::i16:
3700 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
3701 case MVT::i32:
3702 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
3703 case MVT::i64:
3704 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
3705 }
3706 case MVT::i16:
3707 switch (DestTy.SimpleTy) {
3708 default:
3709 llvm_unreachable("Unhandled dest type");
3710 case MVT::i8:
3711 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
3712 case MVT::i32:
3713 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
3714 case MVT::i64:
3715 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
3716 }
3717 case MVT::i32:
3718 switch (DestTy.SimpleTy) {
3719 default:
3720 llvm_unreachable("Unhandled dest type");
3721 case MVT::i8:
3722 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
3723 case MVT::i16:
3724 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
3725 case MVT::i64:
3726 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
3727 }
3728 case MVT::i64:
3729 switch (DestTy.SimpleTy) {
3730 default:
3731 llvm_unreachable("Unhandled dest type");
3732 case MVT::i8:
3733 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
3734 case MVT::i16:
3735 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
3736 case MVT::i32:
3737 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
3738 }
3739 case MVT::f16:
3740 switch (DestTy.SimpleTy) {
3741 default:
3742 llvm_unreachable("Unhandled dest type");
3743 case MVT::f32:
3744 return NVPTX::CVT_f32_f16;
3745 case MVT::f64:
3746 return NVPTX::CVT_f64_f16;
3747 }
3748 }
3749}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
Atomic ordering constants.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
uint64_t Addr
#define F(x, y, z)
Definition: MD5.cpp:55
static unsigned int getCodeAddrSpace(MemSDNode *N)
static int getLdStRegType(EVT VT)
static cl::opt< bool > EnableRsqrtOpt("nvptx-rsqrt-approx-opt", cl::init(true), cl::Hidden, cl::desc("Enable reciprocal sqrt optimization"))
static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ)
static std::optional< unsigned > pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16, unsigned Opcode_i32, std::optional< unsigned > Opcode_i64, unsigned Opcode_f32, std::optional< unsigned > Opcode_f64)
#define PASS_NAME
#define DEBUG_TYPE
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, unsigned CodeAddrSpace, MachineFunction *F)
if(VerifyEach)
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Value * RHS
Value * LHS
unsigned getSrcAddressSpace() const
unsigned getDestAddressSpace() const
This is an SDNode representing atomic operations.
const SDValue & getVal() const
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:410
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Machine Value Type.
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
A description of a memory reference used in the backend.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
bool isVolatile() const
EVT getMemoryVT() const
Return the type of the in-memory value.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions.
const NVPTXSubtarget * Subtarget
const NVPTXTargetLowering * getTargetLowering() const override
bool useF32FTZ(const MachineFunction &MF) const
bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const
bool allowUnsafeFPMath(MachineFunction &MF) const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const SDValue & getOperand(unsigned Num) const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
MachineFunction * MF
CodeGenOptLevel OptLevel
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:727
SDValue getTargetConstantFP(double Val, const SDLoc &DL, EVT VT)
Definition: SelectionDAG.h:708
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
bool empty() const
Definition: SmallVector.h:94
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class is used to represent ISD::STORE nodes.
const SDValue & getValue() const
unsigned getPointerSizeInBits(unsigned AS) const
LLVM Value Representation.
Definition: Value.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: Lint.cpp:86
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1247
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1037
@ TargetExternalSymbol
Definition: ISDOpcodes.h:169
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1243
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:164
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:535
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition: ISDOpcodes.h:907
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1529
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool Isv2x16VT(EVT VT)
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
@ ADDRESS_SPACE_GENERIC
Definition: NVPTXBaseInfo.h:22
@ ADDRESS_SPACE_LOCAL
Definition: NVPTXBaseInfo.h:26
@ ADDRESS_SPACE_CONST
Definition: NVPTXBaseInfo.h:25
@ ADDRESS_SPACE_GLOBAL
Definition: NVPTXBaseInfo.h:23
@ ADDRESS_SPACE_PARAM
Definition: NVPTXBaseInfo.h:29
@ ADDRESS_SPACE_SHARED
Definition: NVPTXBaseInfo.h:24
FunctionPass * createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOptLevel OptLevel)
createNVPTXISelDag - This pass converts a legalized DAG into a NVPTX-specific DAG,...
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:269
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:257
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
AtomicOrdering
Atomic ordering for LLVM's memory model.
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
bool isKernelFunction(const Function &F)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Extended Value Type.
Definition: ValueTypes.h:34
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
This represents a list of ValueType's that has been intern'd by a SelectionDAG.