LLVM 19.0.0git
NVPTXISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the NVPTX target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "NVPTXISelDAGToDAG.h"
15#include "NVPTXUtilities.h"
18#include "llvm/IR/GlobalValue.h"
20#include "llvm/IR/IntrinsicsNVPTX.h"
23#include "llvm/Support/Debug.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "nvptx-isel"
31#define PASS_NAME "NVPTX DAG->DAG Pattern Instruction Selection"
32
33/// createNVPTXISelDag - This pass converts a legalized DAG into a
34/// NVPTX-specific DAG, ready for instruction scheduling.
36 llvm::CodeGenOptLevel OptLevel) {
37 return new NVPTXDAGToDAGISel(TM, OptLevel);
38}
39
41
43
45 CodeGenOptLevel OptLevel)
46 : SelectionDAGISel(ID, tm, OptLevel), TM(tm) {
47 doMulWide = (OptLevel > CodeGenOptLevel::None);
48}
49
53}
54
55int NVPTXDAGToDAGISel::getDivF32Level() const {
57}
58
59bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
61}
62
63bool NVPTXDAGToDAGISel::useF32FTZ() const {
65}
66
67bool NVPTXDAGToDAGISel::allowFMA() const {
69 return TL->allowFMA(*MF, OptLevel);
70}
71
72bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
74 return TL->allowUnsafeFPMath(*MF);
75}
76
77bool NVPTXDAGToDAGISel::useShortPointers() const {
78 return TM.useShortPointers();
79}
80
81/// Select - Select instructions not customized! Used for
82/// expanded, promoted and normal instructions.
83void NVPTXDAGToDAGISel::Select(SDNode *N) {
84
85 if (N->isMachineOpcode()) {
86 N->setNodeId(-1);
87 return; // Already selected.
88 }
89
90 switch (N->getOpcode()) {
91 case ISD::LOAD:
93 if (tryLoad(N))
94 return;
95 break;
96 case ISD::STORE:
98 if (tryStore(N))
99 return;
100 break;
102 if (tryEXTRACT_VECTOR_ELEMENT(N))
103 return;
104 break;
106 SelectSETP_F16X2(N);
107 return;
109 SelectSETP_BF16X2(N);
110 return;
111 case NVPTXISD::LoadV2:
112 case NVPTXISD::LoadV4:
113 if (tryLoadVector(N))
114 return;
115 break;
116 case NVPTXISD::LDGV2:
117 case NVPTXISD::LDGV4:
118 case NVPTXISD::LDUV2:
119 case NVPTXISD::LDUV4:
120 if (tryLDGLDU(N))
121 return;
122 break;
125 if (tryStoreVector(N))
126 return;
127 break;
131 if (tryLoadParam(N))
132 return;
133 break;
137 if (tryStoreRetval(N))
138 return;
139 break;
145 if (tryStoreParam(N))
146 return;
147 break;
149 if (tryIntrinsicNoChain(N))
150 return;
151 break;
153 if (tryIntrinsicChain(N))
154 return;
155 break;
330 if (tryTextureIntrinsic(N))
331 return;
332 break;
498 if (trySurfaceIntrinsic(N))
499 return;
500 break;
501 case ISD::AND:
502 case ISD::SRA:
503 case ISD::SRL:
504 // Try to select BFE
505 if (tryBFE(N))
506 return;
507 break;
509 SelectAddrSpaceCast(N);
510 return;
511 case ISD::ConstantFP:
512 if (tryConstantFP(N))
513 return;
514 break;
515 default:
516 break;
517 }
518 SelectCode(N);
519}
520
521bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
522 unsigned IID = N->getConstantOperandVal(1);
523 switch (IID) {
524 default:
525 return false;
526 case Intrinsic::nvvm_ldg_global_f:
527 case Intrinsic::nvvm_ldg_global_i:
528 case Intrinsic::nvvm_ldg_global_p:
529 case Intrinsic::nvvm_ldu_global_f:
530 case Intrinsic::nvvm_ldu_global_i:
531 case Intrinsic::nvvm_ldu_global_p:
532 return tryLDGLDU(N);
533 }
534}
535
536// There's no way to specify FP16 and BF16 immediates in .(b)f16 ops, so we
537// have to load them into an .(b)f16 register first.
538bool NVPTXDAGToDAGISel::tryConstantFP(SDNode *N) {
539 if (N->getValueType(0) != MVT::f16 && N->getValueType(0) != MVT::bf16)
540 return false;
542 cast<ConstantFPSDNode>(N)->getValueAPF(), SDLoc(N), N->getValueType(0));
543 SDNode *LoadConstF16 = CurDAG->getMachineNode(
544 (N->getValueType(0) == MVT::f16 ? NVPTX::LOAD_CONST_F16
545 : NVPTX::LOAD_CONST_BF16),
546 SDLoc(N), N->getValueType(0), Val);
547 ReplaceNode(N, LoadConstF16);
548 return true;
549}
550
551// Map ISD:CONDCODE value to appropriate CmpMode expected by
552// NVPTXInstPrinter::printCmpMode()
553static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) {
555 unsigned PTXCmpMode = [](ISD::CondCode CC) {
556 switch (CC) {
557 default:
558 llvm_unreachable("Unexpected condition code.");
559 case ISD::SETOEQ:
560 return CmpMode::EQ;
561 case ISD::SETOGT:
562 return CmpMode::GT;
563 case ISD::SETOGE:
564 return CmpMode::GE;
565 case ISD::SETOLT:
566 return CmpMode::LT;
567 case ISD::SETOLE:
568 return CmpMode::LE;
569 case ISD::SETONE:
570 return CmpMode::NE;
571 case ISD::SETO:
572 return CmpMode::NUM;
573 case ISD::SETUO:
574 return CmpMode::NotANumber;
575 case ISD::SETUEQ:
576 return CmpMode::EQU;
577 case ISD::SETUGT:
578 return CmpMode::GTU;
579 case ISD::SETUGE:
580 return CmpMode::GEU;
581 case ISD::SETULT:
582 return CmpMode::LTU;
583 case ISD::SETULE:
584 return CmpMode::LEU;
585 case ISD::SETUNE:
586 return CmpMode::NEU;
587 case ISD::SETEQ:
588 return CmpMode::EQ;
589 case ISD::SETGT:
590 return CmpMode::GT;
591 case ISD::SETGE:
592 return CmpMode::GE;
593 case ISD::SETLT:
594 return CmpMode::LT;
595 case ISD::SETLE:
596 return CmpMode::LE;
597 case ISD::SETNE:
598 return CmpMode::NE;
599 }
600 }(CondCode.get());
601
602 if (FTZ)
603 PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG;
604
605 return PTXCmpMode;
606}
607
608bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) {
609 unsigned PTXCmpMode =
610 getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
611 SDLoc DL(N);
613 NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
614 N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
615 ReplaceNode(N, SetP);
616 return true;
617}
618
619bool NVPTXDAGToDAGISel::SelectSETP_BF16X2(SDNode *N) {
620 unsigned PTXCmpMode =
621 getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
622 SDLoc DL(N);
624 NVPTX::SETP_bf16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
625 N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
626 ReplaceNode(N, SetP);
627 return true;
628}
629
630// Find all instances of extract_vector_elt that use this v2f16 vector
631// and coalesce them into a scattering move instruction.
632bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
633 SDValue Vector = N->getOperand(0);
634
635 // We only care about 16x2 as it's the only real vector type we
636 // need to deal with.
637 MVT VT = Vector.getSimpleValueType();
638 if (!Isv2x16VT(VT))
639 return false;
640 // Find and record all uses of this vector that extract element 0 or 1.
642 for (auto *U : Vector.getNode()->uses()) {
643 if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
644 continue;
645 if (U->getOperand(0) != Vector)
646 continue;
647 if (const ConstantSDNode *IdxConst =
648 dyn_cast<ConstantSDNode>(U->getOperand(1))) {
649 if (IdxConst->getZExtValue() == 0)
650 E0.push_back(U);
651 else if (IdxConst->getZExtValue() == 1)
652 E1.push_back(U);
653 else
654 llvm_unreachable("Invalid vector index.");
655 }
656 }
657
658 // There's no point scattering f16x2 if we only ever access one
659 // element of it.
660 if (E0.empty() || E1.empty())
661 return false;
662
663 // Merge (f16 extractelt(V, 0), f16 extractelt(V,1))
664 // into f16,f16 SplitF16x2(V)
665 MVT EltVT = VT.getVectorElementType();
666 SDNode *ScatterOp =
667 CurDAG->getMachineNode(NVPTX::I32toV2I16, SDLoc(N), EltVT, EltVT, Vector);
668 for (auto *Node : E0)
669 ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0));
670 for (auto *Node : E1)
671 ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 1));
672
673 return true;
674}
675
676static unsigned int getCodeAddrSpace(MemSDNode *N) {
677 const Value *Src = N->getMemOperand()->getValue();
678
679 if (!Src)
681
682 if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
683 switch (PT->getAddressSpace()) {
690 default: break;
691 }
692 }
694}
695
696static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
697 unsigned CodeAddrSpace, MachineFunction *F) {
698 // We use ldg (i.e. ld.global.nc) for invariant loads from the global address
699 // space.
700 //
701 // We have two ways of identifying invariant loads: Loads may be explicitly
702 // marked as invariant, or we may infer them to be invariant.
703 //
704 // We currently infer invariance for loads from
705 // - constant global variables, and
706 // - kernel function pointer params that are noalias (i.e. __restrict) and
707 // never written to.
708 //
709 // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
710 // not during the SelectionDAG phase).
711 //
712 // TODO: Infer invariance only at -O2. We still want to use ldg at -O0 for
713 // explicitly invariant loads because these are how clang tells us to use ldg
714 // when the user uses a builtin.
715 if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL)
716 return false;
717
718 if (N->isInvariant())
719 return true;
720
721 bool IsKernelFn = isKernelFunction(F->getFunction());
722
723 // We use getUnderlyingObjects() here instead of getUnderlyingObject() mainly
724 // because the former looks through phi nodes while the latter does not. We
725 // need to look through phi nodes to handle pointer induction variables.
727 getUnderlyingObjects(N->getMemOperand()->getValue(), Objs);
728
729 return all_of(Objs, [&](const Value *V) {
730 if (auto *A = dyn_cast<const Argument>(V))
731 return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr();
732 if (auto *GV = dyn_cast<const GlobalVariable>(V))
733 return GV->isConstant();
734 return false;
735 });
736}
737
738bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
739 unsigned IID = N->getConstantOperandVal(0);
740 switch (IID) {
741 default:
742 return false;
743 case Intrinsic::nvvm_texsurf_handle_internal:
744 SelectTexSurfHandle(N);
745 return true;
746 }
747}
748
749void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
750 // Op 0 is the intrinsic ID
751 SDValue Wrapper = N->getOperand(1);
752 SDValue GlobalVal = Wrapper.getOperand(0);
753 ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
754 MVT::i64, GlobalVal));
755}
756
757void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
758 SDValue Src = N->getOperand(0);
759 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
760 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
761 unsigned DstAddrSpace = CastN->getDestAddressSpace();
762 assert(SrcAddrSpace != DstAddrSpace &&
763 "addrspacecast must be between different address spaces");
764
765 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
766 // Specific to generic
767 unsigned Opc;
768 switch (SrcAddrSpace) {
769 default: report_fatal_error("Bad address space in addrspacecast");
771 Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
772 break;
774 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_shared_yes_6432
775 : NVPTX::cvta_shared_yes_64)
776 : NVPTX::cvta_shared_yes;
777 break;
779 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_const_yes_6432
780 : NVPTX::cvta_const_yes_64)
781 : NVPTX::cvta_const_yes;
782 break;
784 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_local_yes_6432
785 : NVPTX::cvta_local_yes_64)
786 : NVPTX::cvta_local_yes;
787 break;
788 }
789 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
790 Src));
791 return;
792 } else {
793 // Generic to specific
794 if (SrcAddrSpace != 0)
795 report_fatal_error("Cannot cast between two non-generic address spaces");
796 unsigned Opc;
797 switch (DstAddrSpace) {
798 default: report_fatal_error("Bad address space in addrspacecast");
800 Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
801 : NVPTX::cvta_to_global_yes;
802 break;
804 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_shared_yes_3264
805 : NVPTX::cvta_to_shared_yes_64)
806 : NVPTX::cvta_to_shared_yes;
807 break;
809 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_const_yes_3264
810 : NVPTX::cvta_to_const_yes_64)
811 : NVPTX::cvta_to_const_yes;
812 break;
814 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_local_yes_3264
815 : NVPTX::cvta_to_local_yes_64)
816 : NVPTX::cvta_to_local_yes;
817 break;
819 Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
820 : NVPTX::nvvm_ptr_gen_to_param;
821 break;
822 }
823 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
824 Src));
825 return;
826 }
827}
828
829// Helper function template to reduce amount of boilerplate code for
830// opcode selection.
831static std::optional<unsigned>
833 unsigned Opcode_i16, unsigned Opcode_i32,
834 std::optional<unsigned> Opcode_i64, unsigned Opcode_f32,
835 std::optional<unsigned> Opcode_f64) {
836 switch (VT) {
837 case MVT::i1:
838 case MVT::i8:
839 return Opcode_i8;
840 case MVT::i16:
841 return Opcode_i16;
842 case MVT::i32:
843 return Opcode_i32;
844 case MVT::i64:
845 return Opcode_i64;
846 case MVT::f16:
847 case MVT::bf16:
848 return Opcode_i16;
849 case MVT::v2f16:
850 case MVT::v2bf16:
851 case MVT::v2i16:
852 case MVT::v4i8:
853 return Opcode_i32;
854 case MVT::f32:
855 return Opcode_f32;
856 case MVT::f64:
857 return Opcode_f64;
858 default:
859 return std::nullopt;
860 }
861}
862
863static int getLdStRegType(EVT VT) {
864 if (VT.isFloatingPoint())
865 switch (VT.getSimpleVT().SimpleTy) {
866 case MVT::f16:
867 case MVT::bf16:
868 case MVT::v2f16:
869 case MVT::v2bf16:
871 default:
873 }
874 else
876}
877
878bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
879 SDLoc dl(N);
880 MemSDNode *LD = cast<MemSDNode>(N);
881 assert(LD->readMem() && "Expected load");
882 LoadSDNode *PlainLoad = dyn_cast<LoadSDNode>(N);
883 EVT LoadedVT = LD->getMemoryVT();
884 SDNode *NVPTXLD = nullptr;
885
886 // do not support pre/post inc/dec
887 if (PlainLoad && PlainLoad->isIndexed())
888 return false;
889
890 if (!LoadedVT.isSimple())
891 return false;
892
893 AtomicOrdering Ordering = LD->getSuccessOrdering();
894 // In order to lower atomic loads with stronger guarantees we would need to
895 // use load.acquire or insert fences. However these features were only added
896 // with PTX ISA 6.0 / sm_70.
897 // TODO: Check if we can actually use the new instructions and implement them.
898 if (isStrongerThanMonotonic(Ordering))
899 return false;
900
901 // Address Space Setting
902 unsigned int CodeAddrSpace = getCodeAddrSpace(LD);
903 if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) {
904 return tryLDGLDU(N);
905 }
906
907 unsigned int PointerSize =
908 CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace());
909
910 // Volatile Setting
911 // - .volatile is only available for .global and .shared
912 // - .volatile has the same memory synchronization semantics as .relaxed.sys
913 bool isVolatile = LD->isVolatile() || Ordering == AtomicOrdering::Monotonic;
914 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
915 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
916 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
917 isVolatile = false;
918
919 // Type Setting: fromType + fromTypeWidth
920 //
921 // Sign : ISD::SEXTLOAD
922 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
923 // type is integer
924 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
925 MVT SimpleVT = LoadedVT.getSimpleVT();
926 MVT ScalarVT = SimpleVT.getScalarType();
927 // Read at least 8 bits (predicates are stored as 8-bit values)
928 unsigned fromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits());
929 unsigned int fromType;
930
931 // Vector Setting
932 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
933 if (SimpleVT.isVector()) {
934 assert((Isv2x16VT(LoadedVT) || LoadedVT == MVT::v4i8) &&
935 "Unexpected vector type");
936 // v2f16/v2bf16/v2i16 is loaded using ld.b32
937 fromTypeWidth = 32;
938 }
939
940 if (PlainLoad && (PlainLoad->getExtensionType() == ISD::SEXTLOAD))
942 else
943 fromType = getLdStRegType(ScalarVT);
944
945 // Create the machine instruction DAG
946 SDValue Chain = N->getOperand(0);
947 SDValue N1 = N->getOperand(1);
950 std::optional<unsigned> Opcode;
951 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
952
953 if (SelectDirectAddr(N1, Addr)) {
954 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar,
955 NVPTX::LD_i32_avar, NVPTX::LD_i64_avar,
956 NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
957 if (!Opcode)
958 return false;
959 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
960 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
961 getI32Imm(fromTypeWidth, dl), Addr, Chain };
962 NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
963 } else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
964 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
965 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
966 NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
967 NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
968 if (!Opcode)
969 return false;
970 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
971 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
972 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
973 NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
974 } else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
975 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
976 if (PointerSize == 64)
977 Opcode =
978 pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
979 NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64,
980 NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
981 else
982 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari,
983 NVPTX::LD_i32_ari, NVPTX::LD_i64_ari,
984 NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
985 if (!Opcode)
986 return false;
987 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
988 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
989 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
990 NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
991 } else {
992 if (PointerSize == 64)
993 Opcode =
994 pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
995 NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64,
996 NVPTX::LD_f32_areg_64, NVPTX::LD_f64_areg_64);
997 else
998 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg,
999 NVPTX::LD_i32_areg, NVPTX::LD_i64_areg,
1000 NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
1001 if (!Opcode)
1002 return false;
1003 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
1004 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
1005 getI32Imm(fromTypeWidth, dl), N1, Chain };
1006 NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
1007 }
1008
1009 if (!NVPTXLD)
1010 return false;
1011
1012 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
1013 CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXLD), {MemRef});
1014
1015 ReplaceNode(N, NVPTXLD);
1016 return true;
1017}
1018
1019bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
1020
1021 SDValue Chain = N->getOperand(0);
1022 SDValue Op1 = N->getOperand(1);
1024 std::optional<unsigned> Opcode;
1025 SDLoc DL(N);
1026 SDNode *LD;
1027 MemSDNode *MemSD = cast<MemSDNode>(N);
1028 EVT LoadedVT = MemSD->getMemoryVT();
1029
1030 if (!LoadedVT.isSimple())
1031 return false;
1032
1033 // Address Space Setting
1034 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
1035 if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
1036 return tryLDGLDU(N);
1037 }
1038
1039 unsigned int PointerSize =
1041
1042 // Volatile Setting
1043 // - .volatile is only availalble for .global and .shared
1044 bool IsVolatile = MemSD->isVolatile();
1045 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1046 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1047 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1048 IsVolatile = false;
1049
1050 // Vector Setting
1051 MVT SimpleVT = LoadedVT.getSimpleVT();
1052
1053 // Type Setting: fromType + fromTypeWidth
1054 //
1055 // Sign : ISD::SEXTLOAD
1056 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
1057 // type is integer
1058 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
1059 MVT ScalarVT = SimpleVT.getScalarType();
1060 // Read at least 8 bits (predicates are stored as 8-bit values)
1061 unsigned FromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits());
1062 unsigned int FromType;
1063 // The last operand holds the original LoadSDNode::getExtensionType() value
1064 unsigned ExtensionType = cast<ConstantSDNode>(
1065 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
1066 if (ExtensionType == ISD::SEXTLOAD)
1068 else
1069 FromType = getLdStRegType(ScalarVT);
1070
1071 unsigned VecType;
1072
1073 switch (N->getOpcode()) {
1074 case NVPTXISD::LoadV2:
1076 break;
1077 case NVPTXISD::LoadV4:
1079 break;
1080 default:
1081 return false;
1082 }
1083
1084 EVT EltVT = N->getValueType(0);
1085
1086 // v8x16 is a special case. PTX doesn't have ld.v8.16
1087 // instruction. Instead, we split the vector into v2x16 chunks and
1088 // load them with ld.v4.b32.
1089 if (Isv2x16VT(EltVT)) {
1090 assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode.");
1091 EltVT = MVT::i32;
1093 FromTypeWidth = 32;
1094 }
1095
1096 if (SelectDirectAddr(Op1, Addr)) {
1097 switch (N->getOpcode()) {
1098 default:
1099 return false;
1100 case NVPTXISD::LoadV2:
1101 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1102 NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1103 NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1104 NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
1105 break;
1106 case NVPTXISD::LoadV4:
1107 Opcode =
1108 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar,
1109 NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar,
1110 std::nullopt, NVPTX::LDV_f32_v4_avar, std::nullopt);
1111 break;
1112 }
1113 if (!Opcode)
1114 return false;
1115 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1116 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1117 getI32Imm(FromTypeWidth, DL), Addr, Chain };
1118 LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
1119 } else if (PointerSize == 64
1120 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1121 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
1122 switch (N->getOpcode()) {
1123 default:
1124 return false;
1125 case NVPTXISD::LoadV2:
1126 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1127 NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1128 NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1129 NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
1130 break;
1131 case NVPTXISD::LoadV4:
1132 Opcode =
1133 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi,
1134 NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi,
1135 std::nullopt, NVPTX::LDV_f32_v4_asi, std::nullopt);
1136 break;
1137 }
1138 if (!Opcode)
1139 return false;
1140 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1141 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1142 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1143 LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
1144 } else if (PointerSize == 64
1145 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1146 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1147 if (PointerSize == 64) {
1148 switch (N->getOpcode()) {
1149 default:
1150 return false;
1151 case NVPTXISD::LoadV2:
1152 Opcode =
1154 NVPTX::LDV_i8_v2_ari_64, NVPTX::LDV_i16_v2_ari_64,
1155 NVPTX::LDV_i32_v2_ari_64, NVPTX::LDV_i64_v2_ari_64,
1156 NVPTX::LDV_f32_v2_ari_64, NVPTX::LDV_f64_v2_ari_64);
1157 break;
1158 case NVPTXISD::LoadV4:
1159 Opcode = pickOpcodeForVT(
1160 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64,
1161 NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, std::nullopt,
1162 NVPTX::LDV_f32_v4_ari_64, std::nullopt);
1163 break;
1164 }
1165 } else {
1166 switch (N->getOpcode()) {
1167 default:
1168 return false;
1169 case NVPTXISD::LoadV2:
1170 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1171 NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1172 NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1173 NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
1174 break;
1175 case NVPTXISD::LoadV4:
1176 Opcode =
1177 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari,
1178 NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari,
1179 std::nullopt, NVPTX::LDV_f32_v4_ari, std::nullopt);
1180 break;
1181 }
1182 }
1183 if (!Opcode)
1184 return false;
1185 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1186 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1187 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1188
1189 LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
1190 } else {
1191 if (PointerSize == 64) {
1192 switch (N->getOpcode()) {
1193 default:
1194 return false;
1195 case NVPTXISD::LoadV2:
1196 Opcode = pickOpcodeForVT(
1197 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64,
1198 NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1199 NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1200 NVPTX::LDV_f64_v2_areg_64);
1201 break;
1202 case NVPTXISD::LoadV4:
1203 Opcode = pickOpcodeForVT(
1204 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64,
1205 NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, std::nullopt,
1206 NVPTX::LDV_f32_v4_areg_64, std::nullopt);
1207 break;
1208 }
1209 } else {
1210 switch (N->getOpcode()) {
1211 default:
1212 return false;
1213 case NVPTXISD::LoadV2:
1214 Opcode =
1215 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg,
1216 NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1217 NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f32_v2_areg,
1218 NVPTX::LDV_f64_v2_areg);
1219 break;
1220 case NVPTXISD::LoadV4:
1221 Opcode =
1222 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg,
1223 NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg,
1224 std::nullopt, NVPTX::LDV_f32_v4_areg, std::nullopt);
1225 break;
1226 }
1227 }
1228 if (!Opcode)
1229 return false;
1230 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1231 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1232 getI32Imm(FromTypeWidth, DL), Op1, Chain };
1233 LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
1234 }
1235
1236 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
1237 CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef});
1238
1239 ReplaceNode(N, LD);
1240 return true;
1241}
1242
1243bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1244
1245 SDValue Chain = N->getOperand(0);
1246 SDValue Op1;
1247 MemSDNode *Mem;
1248 bool IsLDG = true;
1249
1250 // If this is an LDG intrinsic, the address is the third operand. If its an
1251 // LDG/LDU SD node (from custom vector handling), then its the second operand
1252 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1253 Op1 = N->getOperand(2);
1254 Mem = cast<MemIntrinsicSDNode>(N);
1255 unsigned IID = N->getConstantOperandVal(1);
1256 switch (IID) {
1257 default:
1258 return false;
1259 case Intrinsic::nvvm_ldg_global_f:
1260 case Intrinsic::nvvm_ldg_global_i:
1261 case Intrinsic::nvvm_ldg_global_p:
1262 IsLDG = true;
1263 break;
1264 case Intrinsic::nvvm_ldu_global_f:
1265 case Intrinsic::nvvm_ldu_global_i:
1266 case Intrinsic::nvvm_ldu_global_p:
1267 IsLDG = false;
1268 break;
1269 }
1270 } else {
1271 Op1 = N->getOperand(1);
1272 Mem = cast<MemSDNode>(N);
1273 }
1274
1275 std::optional<unsigned> Opcode;
1276 SDLoc DL(N);
1277 SDNode *LD;
1279 EVT OrigType = N->getValueType(0);
1280
1281 EVT EltVT = Mem->getMemoryVT();
1282 unsigned NumElts = 1;
1283 if (EltVT.isVector()) {
1284 NumElts = EltVT.getVectorNumElements();
1285 EltVT = EltVT.getVectorElementType();
1286 // vectors of 16bits type are loaded/stored as multiples of v2x16 elements.
1287 if ((EltVT == MVT::f16 && OrigType == MVT::v2f16) ||
1288 (EltVT == MVT::bf16 && OrigType == MVT::v2bf16) ||
1289 (EltVT == MVT::i16 && OrigType == MVT::v2i16)) {
1290 assert(NumElts % 2 == 0 && "Vector must have even number of elements");
1291 EltVT = OrigType;
1292 NumElts /= 2;
1293 } else if (OrigType == MVT::v4i8) {
1294 EltVT = OrigType;
1295 NumElts = 1;
1296 }
1297 }
1298
1299 // Build the "promoted" result VTList for the load. If we are really loading
1300 // i8s, then the return type will be promoted to i16 since we do not expose
1301 // 8-bit registers in NVPTX.
1302 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1303 SmallVector<EVT, 5> InstVTs;
1304 for (unsigned i = 0; i != NumElts; ++i) {
1305 InstVTs.push_back(NodeVT);
1306 }
1307 InstVTs.push_back(MVT::Other);
1308 SDVTList InstVTList = CurDAG->getVTList(InstVTs);
1309
1310 if (SelectDirectAddr(Op1, Addr)) {
1311 switch (N->getOpcode()) {
1312 default:
1313 return false;
1314 case ISD::LOAD:
1316 if (IsLDG)
1317 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1318 NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1319 NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1320 NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1321 NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1322 NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1323 NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1324 else
1325 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1326 NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1327 NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1328 NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1329 NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1330 NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1331 NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
1332 break;
1333 case NVPTXISD::LoadV2:
1334 case NVPTXISD::LDGV2:
1335 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1336 NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1337 NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1338 NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1339 NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1340 NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1341 NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
1342 break;
1343 case NVPTXISD::LDUV2:
1344 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1345 NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1346 NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1347 NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1348 NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1349 NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1350 NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
1351 break;
1352 case NVPTXISD::LoadV4:
1353 case NVPTXISD::LDGV4:
1354 Opcode = pickOpcodeForVT(
1355 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
1356 NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1357 NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, std::nullopt,
1358 NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, std::nullopt);
1359 break;
1360 case NVPTXISD::LDUV4:
1361 Opcode = pickOpcodeForVT(
1362 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar,
1363 NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1364 NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, std::nullopt,
1365 NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, std::nullopt);
1366 break;
1367 }
1368 if (!Opcode)
1369 return false;
1370 SDValue Ops[] = { Addr, Chain };
1371 LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops);
1372 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1373 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1374 if (TM.is64Bit()) {
1375 switch (N->getOpcode()) {
1376 default:
1377 return false;
1378 case ISD::LOAD:
1380 if (IsLDG)
1381 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1382 NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1383 NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1384 NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1385 NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1386 NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1387 NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1388 else
1389 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1390 NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1391 NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1392 NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1393 NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1394 NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1395 NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
1396 break;
1397 case NVPTXISD::LoadV2:
1398 case NVPTXISD::LDGV2:
1399 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1400 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1401 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1402 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1403 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1404 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1405 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
1406 break;
1407 case NVPTXISD::LDUV2:
1408 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1409 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1410 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1411 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1412 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1413 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1414 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
1415 break;
1416 case NVPTXISD::LoadV4:
1417 case NVPTXISD::LDGV4:
1418 Opcode = pickOpcodeForVT(
1419 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64,
1420 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1421 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, std::nullopt,
1422 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, std::nullopt);
1423 break;
1424 case NVPTXISD::LDUV4:
1425 Opcode = pickOpcodeForVT(
1426 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64,
1427 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1428 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, std::nullopt,
1429 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, std::nullopt);
1430 break;
1431 }
1432 } else {
1433 switch (N->getOpcode()) {
1434 default:
1435 return false;
1436 case ISD::LOAD:
1438 if (IsLDG)
1439 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1440 NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1441 NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1442 NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1443 NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1444 NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1445 NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1446 else
1447 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1448 NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1449 NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1450 NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1451 NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1452 NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1453 NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
1454 break;
1455 case NVPTXISD::LoadV2:
1456 case NVPTXISD::LDGV2:
1457 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1458 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1459 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1460 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1461 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1462 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1463 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
1464 break;
1465 case NVPTXISD::LDUV2:
1466 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1467 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1468 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1469 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1470 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1471 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1472 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
1473 break;
1474 case NVPTXISD::LoadV4:
1475 case NVPTXISD::LDGV4:
1476 Opcode = pickOpcodeForVT(
1477 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32,
1478 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1479 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, std::nullopt,
1480 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, std::nullopt);
1481 break;
1482 case NVPTXISD::LDUV4:
1483 Opcode = pickOpcodeForVT(
1484 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32,
1485 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1486 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, std::nullopt,
1487 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, std::nullopt);
1488 break;
1489 }
1490 }
1491 if (!Opcode)
1492 return false;
1493 SDValue Ops[] = {Base, Offset, Chain};
1494 LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops);
1495 } else {
1496 if (TM.is64Bit()) {
1497 switch (N->getOpcode()) {
1498 default:
1499 return false;
1500 case ISD::LOAD:
1502 if (IsLDG)
1503 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1504 NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1505 NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1506 NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1507 NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1508 NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1509 NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1510 else
1511 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1512 NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1513 NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1514 NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1515 NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1516 NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1517 NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
1518 break;
1519 case NVPTXISD::LoadV2:
1520 case NVPTXISD::LDGV2:
1521 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1522 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1523 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1524 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1525 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1526 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1527 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
1528 break;
1529 case NVPTXISD::LDUV2:
1530 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1531 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1532 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1533 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1534 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1535 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1536 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
1537 break;
1538 case NVPTXISD::LoadV4:
1539 case NVPTXISD::LDGV4:
1540 Opcode = pickOpcodeForVT(
1541 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64,
1542 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1543 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, std::nullopt,
1544 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, std::nullopt);
1545 break;
1546 case NVPTXISD::LDUV4:
1547 Opcode = pickOpcodeForVT(
1548 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64,
1549 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1550 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, std::nullopt,
1551 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, std::nullopt);
1552 break;
1553 }
1554 } else {
1555 switch (N->getOpcode()) {
1556 default:
1557 return false;
1558 case ISD::LOAD:
1560 if (IsLDG)
1561 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1562 NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1563 NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1564 NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1565 NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1566 NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1567 NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1568 else
1569 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1570 NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1571 NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1572 NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1573 NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1574 NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1575 NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
1576 break;
1577 case NVPTXISD::LoadV2:
1578 case NVPTXISD::LDGV2:
1579 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1580 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1581 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1582 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1583 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1584 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1585 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
1586 break;
1587 case NVPTXISD::LDUV2:
1588 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1589 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1590 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1591 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1592 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1593 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1594 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
1595 break;
1596 case NVPTXISD::LoadV4:
1597 case NVPTXISD::LDGV4:
1598 Opcode = pickOpcodeForVT(
1599 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32,
1600 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1601 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, std::nullopt,
1602 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, std::nullopt);
1603 break;
1604 case NVPTXISD::LDUV4:
1605 Opcode = pickOpcodeForVT(
1606 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32,
1607 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1608 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, std::nullopt,
1609 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, std::nullopt);
1610 break;
1611 }
1612 }
1613 if (!Opcode)
1614 return false;
1615 SDValue Ops[] = { Op1, Chain };
1616 LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops);
1617 }
1618
1619 // For automatic generation of LDG (through SelectLoad[Vector], not the
1620 // intrinsics), we may have an extending load like:
1621 //
1622 // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
1623 //
1624 // In this case, the matching logic above will select a load for the original
1625 // memory type (in this case, i8) and our types will not match (the node needs
1626 // to return an i32 in this case). Our LDG/LDU nodes do not support the
1627 // concept of sign-/zero-extension, so emulate it here by adding an explicit
1628 // CVT instruction. Ptxas should clean up any redundancies here.
1629
1630 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
1631
1632 if (OrigType != EltVT &&
1633 (LdNode || (OrigType.isFloatingPoint() && EltVT.isFloatingPoint()))) {
1634 // We have an extending-load. The instruction we selected operates on the
1635 // smaller type, but the SDNode we are replacing has the larger type. We
1636 // need to emit a CVT to make the types match.
1637 unsigned CvtOpc =
1638 GetConvertOpcode(OrigType.getSimpleVT(), EltVT.getSimpleVT(), LdNode);
1639
1640 // For each output value, apply the manual sign/zero-extension and make sure
1641 // all users of the load go through that CVT.
1642 for (unsigned i = 0; i != NumElts; ++i) {
1643 SDValue Res(LD, i);
1644 SDValue OrigVal(N, i);
1645
1646 SDNode *CvtNode =
1647 CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
1649 DL, MVT::i32));
1650 ReplaceUses(OrigVal, SDValue(CvtNode, 0));
1651 }
1652 }
1653
1654 ReplaceNode(N, LD);
1655 return true;
1656}
1657
1658bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
1659 SDLoc dl(N);
1660 MemSDNode *ST = cast<MemSDNode>(N);
1661 assert(ST->writeMem() && "Expected store");
1662 StoreSDNode *PlainStore = dyn_cast<StoreSDNode>(N);
1663 AtomicSDNode *AtomicStore = dyn_cast<AtomicSDNode>(N);
1664 assert((PlainStore || AtomicStore) && "Expected store");
1665 EVT StoreVT = ST->getMemoryVT();
1666 SDNode *NVPTXST = nullptr;
1667
1668 // do not support pre/post inc/dec
1669 if (PlainStore && PlainStore->isIndexed())
1670 return false;
1671
1672 if (!StoreVT.isSimple())
1673 return false;
1674
1675 AtomicOrdering Ordering = ST->getSuccessOrdering();
1676 // In order to lower atomic loads with stronger guarantees we would need to
1677 // use store.release or insert fences. However these features were only added
1678 // with PTX ISA 6.0 / sm_70.
1679 // TODO: Check if we can actually use the new instructions and implement them.
1680 if (isStrongerThanMonotonic(Ordering))
1681 return false;
1682
1683 // Address Space Setting
1684 unsigned int CodeAddrSpace = getCodeAddrSpace(ST);
1685 unsigned int PointerSize =
1686 CurDAG->getDataLayout().getPointerSizeInBits(ST->getAddressSpace());
1687
1688 // Volatile Setting
1689 // - .volatile is only available for .global and .shared
1690 // - .volatile has the same memory synchronization semantics as .relaxed.sys
1691 bool isVolatile = ST->isVolatile() || Ordering == AtomicOrdering::Monotonic;
1692 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1693 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1694 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1695 isVolatile = false;
1696
1697 // Vector Setting
1698 MVT SimpleVT = StoreVT.getSimpleVT();
1699 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1700
1701 // Type Setting: toType + toTypeWidth
1702 // - for integer type, always use 'u'
1703 //
1704 MVT ScalarVT = SimpleVT.getScalarType();
1705 unsigned toTypeWidth = ScalarVT.getSizeInBits();
1706 if (SimpleVT.isVector()) {
1707 assert((Isv2x16VT(StoreVT) || StoreVT == MVT::v4i8) &&
1708 "Unexpected vector type");
1709 // v2x16 is stored using st.b32
1710 toTypeWidth = 32;
1711 }
1712
1713 unsigned int toType = getLdStRegType(ScalarVT);
1714
1715 // Create the machine instruction DAG
1716 SDValue Chain = ST->getChain();
1717 SDValue Value = PlainStore ? PlainStore->getValue() : AtomicStore->getVal();
1718 SDValue BasePtr = ST->getBasePtr();
1719 SDValue Addr;
1721 std::optional<unsigned> Opcode;
1722 MVT::SimpleValueType SourceVT =
1723 Value.getNode()->getSimpleValueType(0).SimpleTy;
1724
1725 if (SelectDirectAddr(BasePtr, Addr)) {
1726 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1727 NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1728 NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1729 if (!Opcode)
1730 return false;
1731 SDValue Ops[] = {Value,
1732 getI32Imm(isVolatile, dl),
1733 getI32Imm(CodeAddrSpace, dl),
1734 getI32Imm(vecType, dl),
1735 getI32Imm(toType, dl),
1736 getI32Imm(toTypeWidth, dl),
1737 Addr,
1738 Chain};
1739 NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1740 } else if (PointerSize == 64
1741 ? SelectADDRsi64(BasePtr.getNode(), BasePtr, Base, Offset)
1742 : SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) {
1743 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1744 NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1745 NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1746 if (!Opcode)
1747 return false;
1748 SDValue Ops[] = {Value,
1749 getI32Imm(isVolatile, dl),
1750 getI32Imm(CodeAddrSpace, dl),
1751 getI32Imm(vecType, dl),
1752 getI32Imm(toType, dl),
1753 getI32Imm(toTypeWidth, dl),
1754 Base,
1755 Offset,
1756 Chain};
1757 NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1758 } else if (PointerSize == 64
1759 ? SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset)
1760 : SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset)) {
1761 if (PointerSize == 64)
1762 Opcode =
1763 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
1764 NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64,
1765 NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1766 else
1767 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1768 NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1769 NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1770 if (!Opcode)
1771 return false;
1772
1773 SDValue Ops[] = {Value,
1774 getI32Imm(isVolatile, dl),
1775 getI32Imm(CodeAddrSpace, dl),
1776 getI32Imm(vecType, dl),
1777 getI32Imm(toType, dl),
1778 getI32Imm(toTypeWidth, dl),
1779 Base,
1780 Offset,
1781 Chain};
1782 NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1783 } else {
1784 if (PointerSize == 64)
1785 Opcode =
1786 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1787 NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1788 NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1789 else
1790 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1791 NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1792 NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1793 if (!Opcode)
1794 return false;
1795 SDValue Ops[] = {Value,
1796 getI32Imm(isVolatile, dl),
1797 getI32Imm(CodeAddrSpace, dl),
1798 getI32Imm(vecType, dl),
1799 getI32Imm(toType, dl),
1800 getI32Imm(toTypeWidth, dl),
1801 BasePtr,
1802 Chain};
1803 NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1804 }
1805
1806 if (!NVPTXST)
1807 return false;
1808
1809 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
1810 CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXST), {MemRef});
1811 ReplaceNode(N, NVPTXST);
1812 return true;
1813}
1814
1815bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
1816 SDValue Chain = N->getOperand(0);
1817 SDValue Op1 = N->getOperand(1);
1819 std::optional<unsigned> Opcode;
1820 SDLoc DL(N);
1821 SDNode *ST;
1822 EVT EltVT = Op1.getValueType();
1823 MemSDNode *MemSD = cast<MemSDNode>(N);
1824 EVT StoreVT = MemSD->getMemoryVT();
1825
1826 // Address Space Setting
1827 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
1828 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1829 report_fatal_error("Cannot store to pointer that points to constant "
1830 "memory space");
1831 }
1832 unsigned int PointerSize =
1834
1835 // Volatile Setting
1836 // - .volatile is only availalble for .global and .shared
1837 bool IsVolatile = MemSD->isVolatile();
1838 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1839 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1840 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1841 IsVolatile = false;
1842
1843 // Type Setting: toType + toTypeWidth
1844 // - for integer type, always use 'u'
1845 assert(StoreVT.isSimple() && "Store value is not simple");
1846 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
1847 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
1848 unsigned ToType = getLdStRegType(ScalarVT);
1849
1851 SDValue N2;
1852 unsigned VecType;
1853
1854 switch (N->getOpcode()) {
1855 case NVPTXISD::StoreV2:
1857 StOps.push_back(N->getOperand(1));
1858 StOps.push_back(N->getOperand(2));
1859 N2 = N->getOperand(3);
1860 break;
1861 case NVPTXISD::StoreV4:
1863 StOps.push_back(N->getOperand(1));
1864 StOps.push_back(N->getOperand(2));
1865 StOps.push_back(N->getOperand(3));
1866 StOps.push_back(N->getOperand(4));
1867 N2 = N->getOperand(5);
1868 break;
1869 default:
1870 return false;
1871 }
1872
1873 // v8x16 is a special case. PTX doesn't have st.v8.x16
1874 // instruction. Instead, we split the vector into v2x16 chunks and
1875 // store them with st.v4.b32.
1876 if (Isv2x16VT(EltVT)) {
1877 assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode.");
1878 EltVT = MVT::i32;
1880 ToTypeWidth = 32;
1881 }
1882
1883 StOps.push_back(getI32Imm(IsVolatile, DL));
1884 StOps.push_back(getI32Imm(CodeAddrSpace, DL));
1885 StOps.push_back(getI32Imm(VecType, DL));
1886 StOps.push_back(getI32Imm(ToType, DL));
1887 StOps.push_back(getI32Imm(ToTypeWidth, DL));
1888
1889 if (SelectDirectAddr(N2, Addr)) {
1890 switch (N->getOpcode()) {
1891 default:
1892 return false;
1893 case NVPTXISD::StoreV2:
1894 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1895 NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1896 NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1897 NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
1898 break;
1899 case NVPTXISD::StoreV4:
1900 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1901 NVPTX::STV_i8_v4_avar, NVPTX::STV_i16_v4_avar,
1902 NVPTX::STV_i32_v4_avar, std::nullopt,
1903 NVPTX::STV_f32_v4_avar, std::nullopt);
1904 break;
1905 }
1906 StOps.push_back(Addr);
1907 } else if (PointerSize == 64 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1908 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1909 switch (N->getOpcode()) {
1910 default:
1911 return false;
1912 case NVPTXISD::StoreV2:
1913 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1914 NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1915 NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1916 NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
1917 break;
1918 case NVPTXISD::StoreV4:
1919 Opcode =
1920 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi,
1921 NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi,
1922 std::nullopt, NVPTX::STV_f32_v4_asi, std::nullopt);
1923 break;
1924 }
1925 StOps.push_back(Base);
1926 StOps.push_back(Offset);
1927 } else if (PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1928 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1929 if (PointerSize == 64) {
1930 switch (N->getOpcode()) {
1931 default:
1932 return false;
1933 case NVPTXISD::StoreV2:
1934 Opcode =
1936 NVPTX::STV_i8_v2_ari_64, NVPTX::STV_i16_v2_ari_64,
1937 NVPTX::STV_i32_v2_ari_64, NVPTX::STV_i64_v2_ari_64,
1938 NVPTX::STV_f32_v2_ari_64, NVPTX::STV_f64_v2_ari_64);
1939 break;
1940 case NVPTXISD::StoreV4:
1941 Opcode = pickOpcodeForVT(
1942 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64,
1943 NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, std::nullopt,
1944 NVPTX::STV_f32_v4_ari_64, std::nullopt);
1945 break;
1946 }
1947 } else {
1948 switch (N->getOpcode()) {
1949 default:
1950 return false;
1951 case NVPTXISD::StoreV2:
1952 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1953 NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
1954 NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
1955 NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
1956 break;
1957 case NVPTXISD::StoreV4:
1958 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1959 NVPTX::STV_i8_v4_ari, NVPTX::STV_i16_v4_ari,
1960 NVPTX::STV_i32_v4_ari, std::nullopt,
1961 NVPTX::STV_f32_v4_ari, std::nullopt);
1962 break;
1963 }
1964 }
1965 StOps.push_back(Base);
1966 StOps.push_back(Offset);
1967 } else {
1968 if (PointerSize == 64) {
1969 switch (N->getOpcode()) {
1970 default:
1971 return false;
1972 case NVPTXISD::StoreV2:
1973 Opcode = pickOpcodeForVT(
1974 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64,
1975 NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
1976 NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
1977 NVPTX::STV_f64_v2_areg_64);
1978 break;
1979 case NVPTXISD::StoreV4:
1980 Opcode = pickOpcodeForVT(
1981 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64,
1982 NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, std::nullopt,
1983 NVPTX::STV_f32_v4_areg_64, std::nullopt);
1984 break;
1985 }
1986 } else {
1987 switch (N->getOpcode()) {
1988 default:
1989 return false;
1990 case NVPTXISD::StoreV2:
1991 Opcode =
1992 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg,
1993 NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
1994 NVPTX::STV_i64_v2_areg, NVPTX::STV_f32_v2_areg,
1995 NVPTX::STV_f64_v2_areg);
1996 break;
1997 case NVPTXISD::StoreV4:
1998 Opcode =
1999 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg,
2000 NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg,
2001 std::nullopt, NVPTX::STV_f32_v4_areg, std::nullopt);
2002 break;
2003 }
2004 }
2005 StOps.push_back(N2);
2006 }
2007
2008 if (!Opcode)
2009 return false;
2010
2011 StOps.push_back(Chain);
2012
2013 ST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, StOps);
2014
2015 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2016 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ST), {MemRef});
2017
2018 ReplaceNode(N, ST);
2019 return true;
2020}
2021
2022bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
2023 SDValue Chain = Node->getOperand(0);
2024 SDValue Offset = Node->getOperand(2);
2025 SDValue Glue = Node->getOperand(3);
2026 SDLoc DL(Node);
2027 MemSDNode *Mem = cast<MemSDNode>(Node);
2028
2029 unsigned VecSize;
2030 switch (Node->getOpcode()) {
2031 default:
2032 return false;
2034 VecSize = 1;
2035 break;
2037 VecSize = 2;
2038 break;
2040 VecSize = 4;
2041 break;
2042 }
2043
2044 EVT EltVT = Node->getValueType(0);
2045 EVT MemVT = Mem->getMemoryVT();
2046
2047 std::optional<unsigned> Opcode;
2048
2049 switch (VecSize) {
2050 default:
2051 return false;
2052 case 1:
2053 Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
2054 NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2055 NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2056 NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
2057 break;
2058 case 2:
2059 Opcode =
2060 pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8,
2061 NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2062 NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F32,
2063 NVPTX::LoadParamMemV2F64);
2064 break;
2065 case 4:
2066 Opcode =
2067 pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8,
2068 NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32,
2069 std::nullopt, NVPTX::LoadParamMemV4F32, std::nullopt);
2070 break;
2071 }
2072 if (!Opcode)
2073 return false;
2074
2075 SDVTList VTs;
2076 if (VecSize == 1) {
2077 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2078 } else if (VecSize == 2) {
2079 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2080 } else {
2081 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2082 VTs = CurDAG->getVTList(EVTs);
2083 }
2084
2085 unsigned OffsetVal = Offset->getAsZExtVal();
2086
2088 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2089 Ops.push_back(Chain);
2090 Ops.push_back(Glue);
2091
2092 ReplaceNode(Node, CurDAG->getMachineNode(*Opcode, DL, VTs, Ops));
2093 return true;
2094}
2095
2096bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
2097 SDLoc DL(N);
2098 SDValue Chain = N->getOperand(0);
2099 SDValue Offset = N->getOperand(1);
2100 unsigned OffsetVal = Offset->getAsZExtVal();
2101 MemSDNode *Mem = cast<MemSDNode>(N);
2102
2103 // How many elements do we have?
2104 unsigned NumElts = 1;
2105 switch (N->getOpcode()) {
2106 default:
2107 return false;
2109 NumElts = 1;
2110 break;
2112 NumElts = 2;
2113 break;
2115 NumElts = 4;
2116 break;
2117 }
2118
2119 // Build vector of operands
2121 for (unsigned i = 0; i < NumElts; ++i)
2122 Ops.push_back(N->getOperand(i + 2));
2123 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2124 Ops.push_back(Chain);
2125
2126 // Determine target opcode
2127 // If we have an i1, use an 8-bit store. The lowering code in
2128 // NVPTXISelLowering will have already emitted an upcast.
2129 std::optional<unsigned> Opcode = 0;
2130 switch (NumElts) {
2131 default:
2132 return false;
2133 case 1:
2135 NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2136 NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2137 NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
2138 if (Opcode == NVPTX::StoreRetvalI8) {
2139 // Fine tune the opcode depending on the size of the operand.
2140 // This helps to avoid creating redundant COPY instructions in
2141 // InstrEmitter::AddRegisterOperand().
2142 switch (Ops[0].getSimpleValueType().SimpleTy) {
2143 default:
2144 break;
2145 case MVT::i32:
2146 Opcode = NVPTX::StoreRetvalI8TruncI32;
2147 break;
2148 case MVT::i64:
2149 Opcode = NVPTX::StoreRetvalI8TruncI64;
2150 break;
2151 }
2152 }
2153 break;
2154 case 2:
2156 NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2157 NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2158 NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
2159 break;
2160 case 4:
2162 NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2163 NVPTX::StoreRetvalV4I32, std::nullopt,
2164 NVPTX::StoreRetvalV4F32, std::nullopt);
2165 break;
2166 }
2167 if (!Opcode)
2168 return false;
2169
2170 SDNode *Ret = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops);
2171 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2172 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
2173
2174 ReplaceNode(N, Ret);
2175 return true;
2176}
2177
2178bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
2179 SDLoc DL(N);
2180 SDValue Chain = N->getOperand(0);
2181 SDValue Param = N->getOperand(1);
2182 unsigned ParamVal = Param->getAsZExtVal();
2183 SDValue Offset = N->getOperand(2);
2184 unsigned OffsetVal = Offset->getAsZExtVal();
2185 MemSDNode *Mem = cast<MemSDNode>(N);
2186 SDValue Glue = N->getOperand(N->getNumOperands() - 1);
2187
2188 // How many elements do we have?
2189 unsigned NumElts = 1;
2190 switch (N->getOpcode()) {
2191 default:
2192 return false;
2196 NumElts = 1;
2197 break;
2199 NumElts = 2;
2200 break;
2202 NumElts = 4;
2203 break;
2204 }
2205
2206 // Build vector of operands
2208 for (unsigned i = 0; i < NumElts; ++i)
2209 Ops.push_back(N->getOperand(i + 3));
2210 Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
2211 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2212 Ops.push_back(Chain);
2213 Ops.push_back(Glue);
2214
2215 // Determine target opcode
2216 // If we have an i1, use an 8-bit store. The lowering code in
2217 // NVPTXISelLowering will have already emitted an upcast.
2218 std::optional<unsigned> Opcode = 0;
2219 switch (N->getOpcode()) {
2220 default:
2221 switch (NumElts) {
2222 default:
2223 return false;
2224 case 1:
2226 NVPTX::StoreParamI8, NVPTX::StoreParamI16,
2227 NVPTX::StoreParamI32, NVPTX::StoreParamI64,
2228 NVPTX::StoreParamF32, NVPTX::StoreParamF64);
2229 if (Opcode == NVPTX::StoreParamI8) {
2230 // Fine tune the opcode depending on the size of the operand.
2231 // This helps to avoid creating redundant COPY instructions in
2232 // InstrEmitter::AddRegisterOperand().
2233 switch (Ops[0].getSimpleValueType().SimpleTy) {
2234 default:
2235 break;
2236 case MVT::i32:
2237 Opcode = NVPTX::StoreParamI8TruncI32;
2238 break;
2239 case MVT::i64:
2240 Opcode = NVPTX::StoreParamI8TruncI64;
2241 break;
2242 }
2243 }
2244 break;
2245 case 2:
2247 NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
2248 NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
2249 NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
2250 break;
2251 case 4:
2253 NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
2254 NVPTX::StoreParamV4I32, std::nullopt,
2255 NVPTX::StoreParamV4F32, std::nullopt);
2256 break;
2257 }
2258 if (!Opcode)
2259 return false;
2260 break;
2261 // Special case: if we have a sign-extend/zero-extend node, insert the
2262 // conversion instruction first, and use that as the value operand to
2263 // the selected StoreParam node.
2265 Opcode = NVPTX::StoreParamI32;
2267 MVT::i32);
2268 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2269 MVT::i32, Ops[0], CvtNone);
2270 Ops[0] = SDValue(Cvt, 0);
2271 break;
2272 }
2274 Opcode = NVPTX::StoreParamI32;
2276 MVT::i32);
2277 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2278 MVT::i32, Ops[0], CvtNone);
2279 Ops[0] = SDValue(Cvt, 0);
2280 break;
2281 }
2282 }
2283
2284 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2285 SDNode *Ret = CurDAG->getMachineNode(*Opcode, DL, RetVTs, Ops);
2286 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2287 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
2288
2289 ReplaceNode(N, Ret);
2290 return true;
2291}
2292
2293bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
2294 unsigned Opc = 0;
2295
2296 switch (N->getOpcode()) {
2297 default: return false;
2299 Opc = NVPTX::TEX_1D_F32_S32_RR;
2300 break;
2302 Opc = NVPTX::TEX_1D_F32_F32_RR;
2303 break;
2305 Opc = NVPTX::TEX_1D_F32_F32_LEVEL_RR;
2306 break;
2308 Opc = NVPTX::TEX_1D_F32_F32_GRAD_RR;
2309 break;
2311 Opc = NVPTX::TEX_1D_S32_S32_RR;
2312 break;
2314 Opc = NVPTX::TEX_1D_S32_F32_RR;
2315 break;
2317 Opc = NVPTX::TEX_1D_S32_F32_LEVEL_RR;
2318 break;
2320 Opc = NVPTX::TEX_1D_S32_F32_GRAD_RR;
2321 break;
2323 Opc = NVPTX::TEX_1D_U32_S32_RR;
2324 break;
2326 Opc = NVPTX::TEX_1D_U32_F32_RR;
2327 break;
2329 Opc = NVPTX::TEX_1D_U32_F32_LEVEL_RR;
2330 break;
2332 Opc = NVPTX::TEX_1D_U32_F32_GRAD_RR;
2333 break;
2335 Opc = NVPTX::TEX_1D_ARRAY_F32_S32_RR;
2336 break;
2338 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_RR;
2339 break;
2341 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR;
2342 break;
2344 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR;
2345 break;
2347 Opc = NVPTX::TEX_1D_ARRAY_S32_S32_RR;
2348 break;
2350 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_RR;
2351 break;
2353 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR;
2354 break;
2356 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR;
2357 break;
2359 Opc = NVPTX::TEX_1D_ARRAY_U32_S32_RR;
2360 break;
2362 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_RR;
2363 break;
2365 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR;
2366 break;
2368 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR;
2369 break;
2371 Opc = NVPTX::TEX_2D_F32_S32_RR;
2372 break;
2374 Opc = NVPTX::TEX_2D_F32_F32_RR;
2375 break;
2377 Opc = NVPTX::TEX_2D_F32_F32_LEVEL_RR;
2378 break;
2380 Opc = NVPTX::TEX_2D_F32_F32_GRAD_RR;
2381 break;
2383 Opc = NVPTX::TEX_2D_S32_S32_RR;
2384 break;
2386 Opc = NVPTX::TEX_2D_S32_F32_RR;
2387 break;
2389 Opc = NVPTX::TEX_2D_S32_F32_LEVEL_RR;
2390 break;
2392 Opc = NVPTX::TEX_2D_S32_F32_GRAD_RR;
2393 break;
2395 Opc = NVPTX::TEX_2D_U32_S32_RR;
2396 break;
2398 Opc = NVPTX::TEX_2D_U32_F32_RR;
2399 break;
2401 Opc = NVPTX::TEX_2D_U32_F32_LEVEL_RR;
2402 break;
2404 Opc = NVPTX::TEX_2D_U32_F32_GRAD_RR;
2405 break;
2407 Opc = NVPTX::TEX_2D_ARRAY_F32_S32_RR;
2408 break;
2410 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_RR;
2411 break;
2413 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR;
2414 break;
2416 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR;
2417 break;
2419 Opc = NVPTX::TEX_2D_ARRAY_S32_S32_RR;
2420 break;
2422 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_RR;
2423 break;
2425 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR;
2426 break;
2428 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR;
2429 break;
2431 Opc = NVPTX::TEX_2D_ARRAY_U32_S32_RR;
2432 break;
2434 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_RR;
2435 break;
2437 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR;
2438 break;
2440 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR;
2441 break;
2443 Opc = NVPTX::TEX_3D_F32_S32_RR;
2444 break;
2446 Opc = NVPTX::TEX_3D_F32_F32_RR;
2447 break;
2449 Opc = NVPTX::TEX_3D_F32_F32_LEVEL_RR;
2450 break;
2452 Opc = NVPTX::TEX_3D_F32_F32_GRAD_RR;
2453 break;
2455 Opc = NVPTX::TEX_3D_S32_S32_RR;
2456 break;
2458 Opc = NVPTX::TEX_3D_S32_F32_RR;
2459 break;
2461 Opc = NVPTX::TEX_3D_S32_F32_LEVEL_RR;
2462 break;
2464 Opc = NVPTX::TEX_3D_S32_F32_GRAD_RR;
2465 break;
2467 Opc = NVPTX::TEX_3D_U32_S32_RR;
2468 break;
2470 Opc = NVPTX::TEX_3D_U32_F32_RR;
2471 break;
2473 Opc = NVPTX::TEX_3D_U32_F32_LEVEL_RR;
2474 break;
2476 Opc = NVPTX::TEX_3D_U32_F32_GRAD_RR;
2477 break;
2479 Opc = NVPTX::TEX_CUBE_F32_F32_RR;
2480 break;
2482 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL_RR;
2483 break;
2485 Opc = NVPTX::TEX_CUBE_S32_F32_RR;
2486 break;
2488 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL_RR;
2489 break;
2491 Opc = NVPTX::TEX_CUBE_U32_F32_RR;
2492 break;
2494 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL_RR;
2495 break;
2497 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_RR;
2498 break;
2500 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR;
2501 break;
2503 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_RR;
2504 break;
2506 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR;
2507 break;
2509 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_RR;
2510 break;
2512 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR;
2513 break;
2515 Opc = NVPTX::TLD4_R_2D_F32_F32_RR;
2516 break;
2518 Opc = NVPTX::TLD4_G_2D_F32_F32_RR;
2519 break;
2521 Opc = NVPTX::TLD4_B_2D_F32_F32_RR;
2522 break;
2524 Opc = NVPTX::TLD4_A_2D_F32_F32_RR;
2525 break;
2527 Opc = NVPTX::TLD4_R_2D_S32_F32_RR;
2528 break;
2530 Opc = NVPTX::TLD4_G_2D_S32_F32_RR;
2531 break;
2533 Opc = NVPTX::TLD4_B_2D_S32_F32_RR;
2534 break;
2536 Opc = NVPTX::TLD4_A_2D_S32_F32_RR;
2537 break;
2539 Opc = NVPTX::TLD4_R_2D_U32_F32_RR;
2540 break;
2542 Opc = NVPTX::TLD4_G_2D_U32_F32_RR;
2543 break;
2545 Opc = NVPTX::TLD4_B_2D_U32_F32_RR;
2546 break;
2548 Opc = NVPTX::TLD4_A_2D_U32_F32_RR;
2549 break;
2551 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32_R;
2552 break;
2554 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_R;
2555 break;
2557 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_R;
2558 break;
2560 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_R;
2561 break;
2563 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32_R;
2564 break;
2566 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_R;
2567 break;
2569 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_R;
2570 break;
2572 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_R;
2573 break;
2575 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32_R;
2576 break;
2578 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_R;
2579 break;
2581 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_R;
2582 break;
2584 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_R;
2585 break;
2587 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_R;
2588 break;
2590 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_R;
2591 break;
2593 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_R;
2594 break;
2596 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_R;
2597 break;
2599 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_R;
2600 break;
2602 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_R;
2603 break;
2605 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_R;
2606 break;
2608 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_R;
2609 break;
2611 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_R;
2612 break;
2614 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_R;
2615 break;
2617 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_R;
2618 break;
2620 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_R;
2621 break;
2623 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32_R;
2624 break;
2626 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_R;
2627 break;
2629 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_R;
2630 break;
2632 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_R;
2633 break;
2635 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32_R;
2636 break;
2638 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_R;
2639 break;
2641 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_R;
2642 break;
2644 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_R;
2645 break;
2647 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32_R;
2648 break;
2650 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_R;
2651 break;
2653 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_R;
2654 break;
2656 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_R;
2657 break;
2659 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_R;
2660 break;
2662 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_R;
2663 break;
2665 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_R;
2666 break;
2668 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_R;
2669 break;
2671 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_R;
2672 break;
2674 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_R;
2675 break;
2677 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_R;
2678 break;
2680 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_R;
2681 break;
2683 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_R;
2684 break;
2686 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_R;
2687 break;
2689 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_R;
2690 break;
2692 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_R;
2693 break;
2695 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32_R;
2696 break;
2698 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_R;
2699 break;
2701 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_R;
2702 break;
2704 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_R;
2705 break;
2707 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32_R;
2708 break;
2710 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_R;
2711 break;
2713 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_R;
2714 break;
2716 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_R;
2717 break;
2719 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32_R;
2720 break;
2722 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_R;
2723 break;
2725 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_R;
2726 break;
2728 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_R;
2729 break;
2731 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_R;
2732 break;
2734 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_R;
2735 break;
2737 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_R;
2738 break;
2740 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_R;
2741 break;
2743 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_R;
2744 break;
2746 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_R;
2747 break;
2749 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_R;
2750 break;
2752 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_R;
2753 break;
2755 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_R;
2756 break;
2758 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_R;
2759 break;
2761 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_R;
2762 break;
2764 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R;
2765 break;
2767 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R;
2768 break;
2770 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R;
2771 break;
2773 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32_R;
2774 break;
2776 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32_R;
2777 break;
2779 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32_R;
2780 break;
2782 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32_R;
2783 break;
2785 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32_R;
2786 break;
2788 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32_R;
2789 break;
2791 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32_R;
2792 break;
2794 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32_R;
2795 break;
2797 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32_R;
2798 break;
2800 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R;
2801 break;
2803 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_GRAD_R;
2804 break;
2806 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_GRAD_R;
2807 break;
2809 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_GRAD_R;
2810 break;
2812 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD_R;
2813 break;
2815 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD_R;
2816 break;
2818 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD_R;
2819 break;
2820 }
2821
2822 // Copy over operands
2824 Ops.push_back(N->getOperand(0)); // Move chain to the back.
2825
2826 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
2827 return true;
2828}
2829
2830bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
2831 unsigned Opc = 0;
2832 switch (N->getOpcode()) {
2833 default: return false;
2835 Opc = NVPTX::SULD_1D_I8_CLAMP_R;
2836 break;
2838 Opc = NVPTX::SULD_1D_I16_CLAMP_R;
2839 break;
2841 Opc = NVPTX::SULD_1D_I32_CLAMP_R;
2842 break;
2844 Opc = NVPTX::SULD_1D_I64_CLAMP_R;
2845 break;
2847 Opc = NVPTX::SULD_1D_V2I8_CLAMP_R;
2848 break;
2850 Opc = NVPTX::SULD_1D_V2I16_CLAMP_R;
2851 break;
2853 Opc = NVPTX::SULD_1D_V2I32_CLAMP_R;
2854 break;
2856 Opc = NVPTX::SULD_1D_V2I64_CLAMP_R;
2857 break;
2859 Opc = NVPTX::SULD_1D_V4I8_CLAMP_R;
2860 break;
2862 Opc = NVPTX::SULD_1D_V4I16_CLAMP_R;
2863 break;
2865 Opc = NVPTX::SULD_1D_V4I32_CLAMP_R;
2866 break;
2868 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP_R;
2869 break;
2871 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP_R;
2872 break;
2874 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP_R;
2875 break;
2877 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP_R;
2878 break;
2880 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_R;
2881 break;
2883 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_R;
2884 break;
2886 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_R;
2887 break;
2889 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_R;
2890 break;
2892 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_R;
2893 break;
2895 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_R;
2896 break;
2898 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_R;
2899 break;
2901 Opc = NVPTX::SULD_2D_I8_CLAMP_R;
2902 break;
2904 Opc = NVPTX::SULD_2D_I16_CLAMP_R;
2905 break;
2907 Opc = NVPTX::SULD_2D_I32_CLAMP_R;
2908 break;
2910 Opc = NVPTX::SULD_2D_I64_CLAMP_R;
2911 break;
2913 Opc = NVPTX::SULD_2D_V2I8_CLAMP_R;
2914 break;
2916 Opc = NVPTX::SULD_2D_V2I16_CLAMP_R;
2917 break;
2919 Opc = NVPTX::SULD_2D_V2I32_CLAMP_R;
2920 break;
2922 Opc = NVPTX::SULD_2D_V2I64_CLAMP_R;
2923 break;
2925 Opc = NVPTX::SULD_2D_V4I8_CLAMP_R;
2926 break;
2928 Opc = NVPTX::SULD_2D_V4I16_CLAMP_R;
2929 break;
2931 Opc = NVPTX::SULD_2D_V4I32_CLAMP_R;
2932 break;
2934 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP_R;
2935 break;
2937 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP_R;
2938 break;
2940 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP_R;
2941 break;
2943 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP_R;
2944 break;
2946 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_R;
2947 break;
2949 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_R;
2950 break;
2952 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_R;
2953 break;
2955 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_R;
2956 break;
2958 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_R;
2959 break;
2961 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_R;
2962 break;
2964 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_R;
2965 break;
2967 Opc = NVPTX::SULD_3D_I8_CLAMP_R;
2968 break;
2970 Opc = NVPTX::SULD_3D_I16_CLAMP_R;
2971 break;
2973 Opc = NVPTX::SULD_3D_I32_CLAMP_R;
2974 break;
2976 Opc = NVPTX::SULD_3D_I64_CLAMP_R;
2977 break;
2979 Opc = NVPTX::SULD_3D_V2I8_CLAMP_R;
2980 break;
2982 Opc = NVPTX::SULD_3D_V2I16_CLAMP_R;
2983 break;
2985 Opc = NVPTX::SULD_3D_V2I32_CLAMP_R;
2986 break;
2988 Opc = NVPTX::SULD_3D_V2I64_CLAMP_R;
2989 break;
2991 Opc = NVPTX::SULD_3D_V4I8_CLAMP_R;
2992 break;
2994 Opc = NVPTX::SULD_3D_V4I16_CLAMP_R;
2995 break;
2997 Opc = NVPTX::SULD_3D_V4I32_CLAMP_R;
2998 break;
3000 Opc = NVPTX::SULD_1D_I8_TRAP_R;
3001 break;
3003 Opc = NVPTX::SULD_1D_I16_TRAP_R;
3004 break;
3006 Opc = NVPTX::SULD_1D_I32_TRAP_R;
3007 break;
3009 Opc = NVPTX::SULD_1D_I64_TRAP_R;
3010 break;
3012 Opc = NVPTX::SULD_1D_V2I8_TRAP_R;
3013 break;
3015 Opc = NVPTX::SULD_1D_V2I16_TRAP_R;
3016 break;
3018 Opc = NVPTX::SULD_1D_V2I32_TRAP_R;
3019 break;
3021 Opc = NVPTX::SULD_1D_V2I64_TRAP_R;
3022 break;
3024 Opc = NVPTX::SULD_1D_V4I8_TRAP_R;
3025 break;
3027 Opc = NVPTX::SULD_1D_V4I16_TRAP_R;
3028 break;
3030 Opc = NVPTX::SULD_1D_V4I32_TRAP_R;
3031 break;
3033 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP_R;
3034 break;
3036 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP_R;
3037 break;
3039 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP_R;
3040 break;
3042 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP_R;
3043 break;
3045 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP_R;
3046 break;
3048 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP_R;
3049 break;
3051 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP_R;
3052 break;
3054 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP_R;
3055 break;
3057 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP_R;
3058 break;
3060 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP_R;
3061 break;
3063 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP_R;
3064 break;
3066 Opc = NVPTX::SULD_2D_I8_TRAP_R;
3067 break;
3069 Opc = NVPTX::SULD_2D_I16_TRAP_R;
3070 break;
3072 Opc = NVPTX::SULD_2D_I32_TRAP_R;
3073 break;
3075 Opc = NVPTX::SULD_2D_I64_TRAP_R;
3076 break;
3078 Opc = NVPTX::SULD_2D_V2I8_TRAP_R;
3079 break;
3081 Opc = NVPTX::SULD_2D_V2I16_TRAP_R;
3082 break;
3084 Opc = NVPTX::SULD_2D_V2I32_TRAP_R;
3085 break;
3087 Opc = NVPTX::SULD_2D_V2I64_TRAP_R;
3088 break;
3090 Opc = NVPTX::SULD_2D_V4I8_TRAP_R;
3091 break;
3093 Opc = NVPTX::SULD_2D_V4I16_TRAP_R;
3094 break;
3096 Opc = NVPTX::SULD_2D_V4I32_TRAP_R;
3097 break;
3099 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP_R;
3100 break;
3102 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP_R;
3103 break;
3105 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP_R;
3106 break;
3108 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP_R;
3109 break;
3111 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP_R;
3112 break;
3114 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP_R;
3115 break;
3117 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP_R;
3118 break;
3120 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP_R;
3121 break;
3123 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP_R;
3124 break;
3126 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP_R;
3127 break;
3129 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP_R;
3130 break;
3132 Opc = NVPTX::SULD_3D_I8_TRAP_R;
3133 break;
3135 Opc = NVPTX::SULD_3D_I16_TRAP_R;
3136 break;
3138 Opc = NVPTX::SULD_3D_I32_TRAP_R;
3139 break;
3141 Opc = NVPTX::SULD_3D_I64_TRAP_R;
3142 break;
3144 Opc = NVPTX::SULD_3D_V2I8_TRAP_R;
3145 break;
3147 Opc = NVPTX::SULD_3D_V2I16_TRAP_R;
3148 break;
3150 Opc = NVPTX::SULD_3D_V2I32_TRAP_R;
3151 break;
3153 Opc = NVPTX::SULD_3D_V2I64_TRAP_R;
3154 break;
3156 Opc = NVPTX::SULD_3D_V4I8_TRAP_R;
3157 break;
3159 Opc = NVPTX::SULD_3D_V4I16_TRAP_R;
3160 break;
3162 Opc = NVPTX::SULD_3D_V4I32_TRAP_R;
3163 break;
3165 Opc = NVPTX::SULD_1D_I8_ZERO_R;
3166 break;
3168 Opc = NVPTX::SULD_1D_I16_ZERO_R;
3169 break;
3171 Opc = NVPTX::SULD_1D_I32_ZERO_R;
3172 break;
3174 Opc = NVPTX::SULD_1D_I64_ZERO_R;
3175 break;
3177 Opc = NVPTX::SULD_1D_V2I8_ZERO_R;
3178 break;
3180 Opc = NVPTX::SULD_1D_V2I16_ZERO_R;
3181 break;
3183 Opc = NVPTX::SULD_1D_V2I32_ZERO_R;
3184 break;
3186 Opc = NVPTX::SULD_1D_V2I64_ZERO_R;
3187 break;
3189 Opc = NVPTX::SULD_1D_V4I8_ZERO_R;
3190 break;
3192 Opc = NVPTX::SULD_1D_V4I16_ZERO_R;
3193 break;
3195 Opc = NVPTX::SULD_1D_V4I32_ZERO_R;
3196 break;
3198 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO_R;
3199 break;
3201 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO_R;
3202 break;
3204 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO_R;
3205 break;
3207 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO_R;
3208 break;
3210 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO_R;
3211 break;
3213 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO_R;
3214 break;
3216 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO_R;
3217 break;
3219 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO_R;
3220 break;
3222 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO_R;
3223 break;
3225 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO_R;
3226 break;
3228 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO_R;
3229 break;
3231 Opc = NVPTX::SULD_2D_I8_ZERO_R;
3232 break;
3234 Opc = NVPTX::SULD_2D_I16_ZERO_R;
3235 break;
3237 Opc = NVPTX::SULD_2D_I32_ZERO_R;
3238 break;
3240 Opc = NVPTX::SULD_2D_I64_ZERO_R;
3241 break;
3243 Opc = NVPTX::SULD_2D_V2I8_ZERO_R;
3244 break;
3246 Opc = NVPTX::SULD_2D_V2I16_ZERO_R;
3247 break;
3249 Opc = NVPTX::SULD_2D_V2I32_ZERO_R;
3250 break;
3252 Opc = NVPTX::SULD_2D_V2I64_ZERO_R;
3253 break;
3255 Opc = NVPTX::SULD_2D_V4I8_ZERO_R;
3256 break;
3258 Opc = NVPTX::SULD_2D_V4I16_ZERO_R;
3259 break;
3261 Opc = NVPTX::SULD_2D_V4I32_ZERO_R;
3262 break;
3264 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO_R;
3265 break;
3267 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO_R;
3268 break;
3270 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO_R;
3271 break;
3273 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO_R;
3274 break;
3276 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO_R;
3277 break;
3279 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO_R;
3280 break;
3282 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO_R;
3283 break;
3285 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO_R;
3286 break;
3288 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO_R;
3289 break;
3291 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO_R;
3292 break;
3294 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO_R;
3295 break;
3297 Opc = NVPTX::SULD_3D_I8_ZERO_R;
3298 break;
3300 Opc = NVPTX::SULD_3D_I16_ZERO_R;
3301 break;
3303 Opc = NVPTX::SULD_3D_I32_ZERO_R;
3304 break;
3306 Opc = NVPTX::SULD_3D_I64_ZERO_R;
3307 break;
3309 Opc = NVPTX::SULD_3D_V2I8_ZERO_R;
3310 break;
3312 Opc = NVPTX::SULD_3D_V2I16_ZERO_R;
3313 break;
3315 Opc = NVPTX::SULD_3D_V2I32_ZERO_R;
3316 break;
3318 Opc = NVPTX::SULD_3D_V2I64_ZERO_R;
3319 break;
3321 Opc = NVPTX::SULD_3D_V4I8_ZERO_R;
3322 break;
3324 Opc = NVPTX::SULD_3D_V4I16_ZERO_R;
3325 break;
3327 Opc = NVPTX::SULD_3D_V4I32_ZERO_R;
3328 break;
3329 }
3330
3331 // Copy over operands
3333 Ops.push_back(N->getOperand(0)); // Move chain to the back.
3334
3335 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
3336 return true;
3337}
3338
3339
3340/// SelectBFE - Look for instruction sequences that can be made more efficient
3341/// by using the 'bfe' (bit-field extract) PTX instruction
3342bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
3343 SDLoc DL(N);
3344 SDValue LHS = N->getOperand(0);
3345 SDValue RHS = N->getOperand(1);
3346 SDValue Len;
3347 SDValue Start;
3348 SDValue Val;
3349 bool IsSigned = false;
3350
3351 if (N->getOpcode() == ISD::AND) {
3352 // Canonicalize the operands
3353 // We want 'and %val, %mask'
3354 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
3355 std::swap(LHS, RHS);
3356 }
3357
3358 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
3359 if (!Mask) {
3360 // We need a constant mask on the RHS of the AND
3361 return false;
3362 }
3363
3364 // Extract the mask bits
3365 uint64_t MaskVal = Mask->getZExtValue();
3366 if (!isMask_64(MaskVal)) {
3367 // We *could* handle shifted masks here, but doing so would require an
3368 // 'and' operation to fix up the low-order bits so we would trade
3369 // shr+and for bfe+and, which has the same throughput
3370 return false;
3371 }
3372
3373 // How many bits are in our mask?
3374 int64_t NumBits = countr_one(MaskVal);
3375 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
3376
3377 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
3378 // We have a 'srl/and' pair, extract the effective start bit and length
3379 Val = LHS.getNode()->getOperand(0);
3380 Start = LHS.getNode()->getOperand(1);
3381 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
3382 if (StartConst) {
3383 uint64_t StartVal = StartConst->getZExtValue();
3384 // How many "good" bits do we have left? "good" is defined here as bits
3385 // that exist in the original value, not shifted in.
3386 int64_t GoodBits = Start.getValueSizeInBits() - StartVal;
3387 if (NumBits > GoodBits) {
3388 // Do not handle the case where bits have been shifted in. In theory
3389 // we could handle this, but the cost is likely higher than just
3390 // emitting the srl/and pair.
3391 return false;
3392 }
3393 Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
3394 } else {
3395 // Do not handle the case where the shift amount (can be zero if no srl
3396 // was found) is not constant. We could handle this case, but it would
3397 // require run-time logic that would be more expensive than just
3398 // emitting the srl/and pair.
3399 return false;
3400 }
3401 } else {
3402 // Do not handle the case where the LHS of the and is not a shift. While
3403 // it would be trivial to handle this case, it would just transform
3404 // 'and' -> 'bfe', but 'and' has higher-throughput.
3405 return false;
3406 }
3407 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
3408 if (LHS->getOpcode() == ISD::AND) {
3409 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
3410 if (!ShiftCnst) {
3411 // Shift amount must be constant
3412 return false;
3413 }
3414
3415 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
3416
3417 SDValue AndLHS = LHS->getOperand(0);
3418 SDValue AndRHS = LHS->getOperand(1);
3419
3420 // Canonicalize the AND to have the mask on the RHS
3421 if (isa<ConstantSDNode>(AndLHS)) {
3422 std::swap(AndLHS, AndRHS);
3423 }
3424
3425 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
3426 if (!MaskCnst) {
3427 // Mask must be constant
3428 return false;
3429 }
3430
3431 uint64_t MaskVal = MaskCnst->getZExtValue();
3432 uint64_t NumZeros;
3433 uint64_t NumBits;
3434 if (isMask_64(MaskVal)) {
3435 NumZeros = 0;
3436 // The number of bits in the result bitfield will be the number of
3437 // trailing ones (the AND) minus the number of bits we shift off
3438 NumBits = llvm::countr_one(MaskVal) - ShiftAmt;
3439 } else if (isShiftedMask_64(MaskVal)) {
3440 NumZeros = llvm::countr_zero(MaskVal);
3441 unsigned NumOnes = llvm::countr_one(MaskVal >> NumZeros);
3442 // The number of bits in the result bitfield will be the number of
3443 // trailing zeros plus the number of set bits in the mask minus the
3444 // number of bits we shift off
3445 NumBits = NumZeros + NumOnes - ShiftAmt;
3446 } else {
3447 // This is not a mask we can handle
3448 return false;
3449 }
3450
3451 if (ShiftAmt < NumZeros) {
3452 // Handling this case would require extra logic that would make this
3453 // transformation non-profitable
3454 return false;
3455 }
3456
3457 Val = AndLHS;
3458 Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
3459 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
3460 } else if (LHS->getOpcode() == ISD::SHL) {
3461 // Here, we have a pattern like:
3462 //
3463 // (sra (shl val, NN), MM)
3464 // or
3465 // (srl (shl val, NN), MM)
3466 //
3467 // If MM >= NN, we can efficiently optimize this with bfe
3468 Val = LHS->getOperand(0);
3469
3470 SDValue ShlRHS = LHS->getOperand(1);
3471 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
3472 if (!ShlCnst) {
3473 // Shift amount must be constant
3474 return false;
3475 }
3476 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
3477
3478 SDValue ShrRHS = RHS;
3479 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
3480 if (!ShrCnst) {
3481 // Shift amount must be constant
3482 return false;
3483 }
3484 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
3485
3486 // To avoid extra codegen and be profitable, we need Outer >= Inner
3487 if (OuterShiftAmt < InnerShiftAmt) {
3488 return false;
3489 }
3490
3491 // If the outer shift is more than the type size, we have no bitfield to
3492 // extract (since we also check that the inner shift is <= the outer shift
3493 // then this also implies that the inner shift is < the type size)
3494 if (OuterShiftAmt >= Val.getValueSizeInBits()) {
3495 return false;
3496 }
3497
3498 Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL,
3499 MVT::i32);
3500 Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt,
3501 DL, MVT::i32);
3502
3503 if (N->getOpcode() == ISD::SRA) {
3504 // If we have a arithmetic right shift, we need to use the signed bfe
3505 // variant
3506 IsSigned = true;
3507 }
3508 } else {
3509 // No can do...
3510 return false;
3511 }
3512 } else {
3513 // No can do...
3514 return false;
3515 }
3516
3517
3518 unsigned Opc;
3519 // For the BFE operations we form here from "and" and "srl", always use the
3520 // unsigned variants.
3521 if (Val.getValueType() == MVT::i32) {
3522 if (IsSigned) {
3523 Opc = NVPTX::BFE_S32rii;
3524 } else {
3525 Opc = NVPTX::BFE_U32rii;
3526 }
3527 } else if (Val.getValueType() == MVT::i64) {
3528 if (IsSigned) {
3529 Opc = NVPTX::BFE_S64rii;
3530 } else {
3531 Opc = NVPTX::BFE_U64rii;
3532 }
3533 } else {
3534 // We cannot handle this type
3535 return false;
3536 }
3537
3538 SDValue Ops[] = {
3539 Val, Start, Len
3540 };
3541
3542 ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops));
3543 return true;
3544}
3545
3546// SelectDirectAddr - Match a direct address for DAG.
3547// A direct address could be a globaladdress or externalsymbol.
3548bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
3549 // Return true if TGA or ES.
3550 if (N.getOpcode() == ISD::TargetGlobalAddress ||
3551 N.getOpcode() == ISD::TargetExternalSymbol) {
3552 Address = N;
3553 return true;
3554 }
3555 if (N.getOpcode() == NVPTXISD::Wrapper) {
3556 Address = N.getOperand(0);
3557 return true;
3558 }
3559 // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol
3560 if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) {
3561 if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
3564 return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address);
3565 }
3566 return false;
3567}
3568
3569// symbol+offset
3570bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3571 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3572 if (Addr.getOpcode() == ISD::ADD) {
3573 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3574 SDValue base = Addr.getOperand(0);
3575 if (SelectDirectAddr(base, Base)) {
3576 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3577 mvt);
3578 return true;
3579 }
3580 }
3581 }
3582 return false;
3583}
3584
3585// symbol+offset
3586bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
3588 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
3589}
3590
3591// symbol+offset
3592bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
3594 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
3595}
3596
3597// register+offset
3598bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3599 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3600 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3601 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3602 Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
3603 return true;
3604 }
3605 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
3606 Addr.getOpcode() == ISD::TargetGlobalAddress)
3607 return false; // direct calls.
3608
3609 if (Addr.getOpcode() == ISD::ADD) {
3610 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
3611 return false;
3612 }
3613 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3614 if (FrameIndexSDNode *FIN =
3615 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
3616 // Constant offset from frame ref.
3617 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3618 else
3619 Base = Addr.getOperand(0);
3620 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3621 mvt);
3622 return true;
3623 }
3624 }
3625 return false;
3626}
3627
3628// register+offset
3629bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
3631 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
3632}
3633
3634// register+offset
3635bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
3637 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
3638}
3639
3640bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
3641 unsigned int spN) const {
3642 const Value *Src = nullptr;
3643 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
3644 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3645 return true;
3646 Src = mN->getMemOperand()->getValue();
3647 }
3648 if (!Src)
3649 return false;
3650 if (auto *PT = dyn_cast<PointerType>(Src->getType()))
3651 return (PT->getAddressSpace() == spN);
3652 return false;
3653}
3654
3655/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
3656/// inline asm expressions.
3658 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
3659 std::vector<SDValue> &OutOps) {
3660 SDValue Op0, Op1;
3661 switch (ConstraintID) {
3662 default:
3663 return true;
3664 case InlineAsm::ConstraintCode::m: // memory
3665 if (SelectDirectAddr(Op, Op0)) {
3666 OutOps.push_back(Op0);
3667 OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
3668 return false;
3669 }
3670 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
3671 OutOps.push_back(Op0);
3672 OutOps.push_back(Op1);
3673 return false;
3674 }
3675 break;
3676 }
3677 return true;
3678}
3679
3680/// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
3681/// conversion from \p SrcTy to \p DestTy.
3682unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
3683 LoadSDNode *LdNode) {
3684 bool IsSigned = LdNode && LdNode->getExtensionType() == ISD::SEXTLOAD;
3685 switch (SrcTy.SimpleTy) {
3686 default:
3687 llvm_unreachable("Unhandled source type");
3688 case MVT::i8:
3689 switch (DestTy.SimpleTy) {
3690 default:
3691 llvm_unreachable("Unhandled dest type");
3692 case MVT::i16:
3693 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
3694 case MVT::i32:
3695 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
3696 case MVT::i64:
3697 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
3698 }
3699 case MVT::i16:
3700 switch (DestTy.SimpleTy) {
3701 default:
3702 llvm_unreachable("Unhandled dest type");
3703 case MVT::i8:
3704 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
3705 case MVT::i32:
3706 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
3707 case MVT::i64:
3708 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
3709 }
3710 case MVT::i32:
3711 switch (DestTy.SimpleTy) {
3712 default:
3713 llvm_unreachable("Unhandled dest type");
3714 case MVT::i8:
3715 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
3716 case MVT::i16:
3717 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
3718 case MVT::i64:
3719 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
3720 }
3721 case MVT::i64:
3722 switch (DestTy.SimpleTy) {
3723 default:
3724 llvm_unreachable("Unhandled dest type");
3725 case MVT::i8:
3726 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
3727 case MVT::i16:
3728 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
3729 case MVT::i32:
3730 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
3731 }
3732 case MVT::f16:
3733 switch (DestTy.SimpleTy) {
3734 default:
3735 llvm_unreachable("Unhandled dest type");
3736 case MVT::f32:
3737 return NVPTX::CVT_f32_f16;
3738 case MVT::f64:
3739 return NVPTX::CVT_f64_f16;
3740 }
3741 }
3742}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
Atomic ordering constants.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
uint64_t Addr
#define F(x, y, z)
Definition: MD5.cpp:55
static unsigned int getCodeAddrSpace(MemSDNode *N)
static int getLdStRegType(EVT VT)
static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ)
static std::optional< unsigned > pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16, unsigned Opcode_i32, std::optional< unsigned > Opcode_i64, unsigned Opcode_f32, std::optional< unsigned > Opcode_f64)
#define PASS_NAME
#define DEBUG_TYPE
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, unsigned CodeAddrSpace, MachineFunction *F)
if(VerifyEach)
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Value * RHS
Value * LHS
unsigned getSrcAddressSpace() const
unsigned getDestAddressSpace() const
This is an SDNode representing atomic operations.
const SDValue & getVal() const
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:410
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Machine Value Type.
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
A description of a memory reference used in the backend.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
bool isVolatile() const
EVT getMemoryVT() const
Return the type of the in-memory value.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions.
const NVPTXSubtarget * Subtarget
const NVPTXTargetLowering * getTargetLowering() const override
bool useF32FTZ(const MachineFunction &MF) const
bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const
bool allowUnsafeFPMath(MachineFunction &MF) const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const SDValue & getOperand(unsigned Num) const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
MachineFunction * MF
CodeGenOptLevel OptLevel
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:727
SDValue getTargetConstantFP(double Val, const SDLoc &DL, EVT VT)
Definition: SelectionDAG.h:708
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
bool empty() const
Definition: SmallVector.h:94
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class is used to represent ISD::STORE nodes.
const SDValue & getValue() const
LLVM Value Representation.
Definition: Value.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: Lint.cpp:86
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1241
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1031
@ TargetExternalSymbol
Definition: ISDOpcodes.h:169
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1237
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:164
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:535
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition: ISDOpcodes.h:907
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1523
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
bool Isv2x16VT(EVT VT)
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
@ ADDRESS_SPACE_GENERIC
Definition: NVPTXBaseInfo.h:22
@ ADDRESS_SPACE_LOCAL
Definition: NVPTXBaseInfo.h:26
@ ADDRESS_SPACE_CONST
Definition: NVPTXBaseInfo.h:25
@ ADDRESS_SPACE_GLOBAL
Definition: NVPTXBaseInfo.h:23
@ ADDRESS_SPACE_PARAM
Definition: NVPTXBaseInfo.h:29
@ ADDRESS_SPACE_SHARED
Definition: NVPTXBaseInfo.h:24
FunctionPass * createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOptLevel OptLevel)
createNVPTXISelDag - This pass converts a legalized DAG into a NVPTX-specific DAG,...
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:258
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:246
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
AtomicOrdering
Atomic ordering for LLVM's memory model.
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
bool isKernelFunction(const Function &F)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Extended Value Type.
Definition: ValueTypes.h:34
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
This represents a list of ValueType's that has been intern'd by a SelectionDAG.