LLVM 20.0.0git
ARMSelectionDAGInfo.cpp
Go to the documentation of this file.
1//===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the ARMSelectionDAGInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARMTargetMachine.h"
18using namespace llvm;
19
20#define DEBUG_TYPE "arm-selectiondag-info"
21
23 "arm-memtransfer-tploop", cl::Hidden,
24 cl::desc("Control conversion of memcpy to "
25 "Tail predicated loops (WLSTP)"),
28 "Don't convert memcpy to TP loop."),
29 clEnumValN(TPLoop::ForceEnabled, "force-enabled",
30 "Always convert memcpy to TP loop."),
32 "Allow (may be subject to certain conditions) "
33 "conversion of memcpy to TP loop.")));
34
35// Emit, if possible, a specialized version of the given Libcall. Typically this
36// means selecting the appropriately aligned version, but we also convert memset
37// of 0 into memclr.
39 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
40 SDValue Size, unsigned Align, RTLIB::Libcall LC) const {
41 const ARMSubtarget &Subtarget =
43 const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
44
45 // Only use a specialized AEABI function if the default version of this
46 // Libcall is an AEABI function.
47 if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
48 return SDValue();
49
50 // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
51 // able to translate memset to memclr and use the value to index the function
52 // name array.
53 enum {
54 AEABI_MEMCPY = 0,
55 AEABI_MEMMOVE,
56 AEABI_MEMSET,
57 AEABI_MEMCLR
58 } AEABILibcall;
59 switch (LC) {
60 case RTLIB::MEMCPY:
61 AEABILibcall = AEABI_MEMCPY;
62 break;
63 case RTLIB::MEMMOVE:
64 AEABILibcall = AEABI_MEMMOVE;
65 break;
66 case RTLIB::MEMSET:
67 AEABILibcall = AEABI_MEMSET;
68 if (isNullConstant(Src))
69 AEABILibcall = AEABI_MEMCLR;
70 break;
71 default:
72 return SDValue();
73 }
74
75 // Choose the most-aligned libcall variant that we can
76 enum {
77 ALIGN1 = 0,
78 ALIGN4,
79 ALIGN8
80 } AlignVariant;
81 if ((Align & 7) == 0)
82 AlignVariant = ALIGN8;
83 else if ((Align & 3) == 0)
84 AlignVariant = ALIGN4;
85 else
86 AlignVariant = ALIGN1;
87
90 Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
91 Entry.Node = Dst;
92 Args.push_back(Entry);
93 if (AEABILibcall == AEABI_MEMCLR) {
94 Entry.Node = Size;
95 Args.push_back(Entry);
96 } else if (AEABILibcall == AEABI_MEMSET) {
97 // Adjust parameters for memset, EABI uses format (ptr, size, value),
98 // GNU library uses (ptr, value, size)
99 // See RTABI section 4.3.4
100 Entry.Node = Size;
101 Args.push_back(Entry);
102
103 // Extend or truncate the argument to be an i32 value for the call.
104 if (Src.getValueType().bitsGT(MVT::i32))
105 Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
106 else if (Src.getValueType().bitsLT(MVT::i32))
107 Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
108
109 Entry.Node = Src;
110 Entry.Ty = Type::getInt32Ty(*DAG.getContext());
111 Entry.IsSExt = false;
112 Args.push_back(Entry);
113 } else {
114 Entry.Node = Src;
115 Args.push_back(Entry);
116
117 Entry.Node = Size;
118 Args.push_back(Entry);
119 }
120
121 char const *FunctionNames[4][3] = {
122 { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" },
123 { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
124 { "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" },
125 { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" }
126 };
128 CLI.setDebugLoc(dl)
129 .setChain(Chain)
131 TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
132 DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
133 TLI->getPointerTy(DAG.getDataLayout())),
134 std::move(Args))
136 std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
137
138 return CallResult.second;
139}
140
141static bool shouldGenerateInlineTPLoop(const ARMSubtarget &Subtarget,
142 const SelectionDAG &DAG,
143 ConstantSDNode *ConstantSize,
144 Align Alignment, bool IsMemcpy) {
145 auto &F = DAG.getMachineFunction().getFunction();
147 return false;
149 return true;
150 // Do not generate inline TP loop if optimizations is disabled,
151 // or if optimization for size (-Os or -Oz) is on.
152 if (F.hasOptNone() || F.hasOptSize())
153 return false;
154 // If cli option is unset, for memset always generate inline TP.
155 // For memcpy, check some conditions
156 if (!IsMemcpy)
157 return true;
158 if (!ConstantSize && Alignment >= Align(4))
159 return true;
160 if (ConstantSize &&
161 ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold() &&
162 ConstantSize->getZExtValue() <
164 return true;
165 return false;
166}
167
169 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
170 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
171 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
172 const ARMSubtarget &Subtarget =
174 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
175
176 if (Subtarget.hasMVEIntegerOps() &&
177 shouldGenerateInlineTPLoop(Subtarget, DAG, ConstantSize, Alignment, true))
178 return DAG.getNode(ARMISD::MEMCPYLOOP, dl, MVT::Other, Chain, Dst, Src,
179 DAG.getZExtOrTrunc(Size, dl, MVT::i32));
180
181 // Do repeated 4-byte loads and stores. To be improved.
182 // This requires 4-byte alignment.
183 if (Alignment < Align(4))
184 return SDValue();
185 // This requires the copy size to be a constant, preferably
186 // within a subtarget-specific limit.
187 if (!ConstantSize)
188 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
189 Alignment.value(), RTLIB::MEMCPY);
190 uint64_t SizeVal = ConstantSize->getZExtValue();
191 if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
192 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
193 Alignment.value(), RTLIB::MEMCPY);
194
195 unsigned BytesLeft = SizeVal & 3;
196 unsigned NumMemOps = SizeVal >> 2;
197 unsigned EmittedNumMemOps = 0;
198 EVT VT = MVT::i32;
199 unsigned VTSize = 4;
200 unsigned i = 0;
201 // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
202 const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
203 SDValue TFOps[6];
204 SDValue Loads[6];
205 uint64_t SrcOff = 0, DstOff = 0;
206
207 // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
208 // VLDM/VSTM and make this code emit it when appropriate. This would reduce
209 // pressure on the general purpose registers. However this seems harder to map
210 // onto the register allocator's view of the world.
211
212 // The number of MEMCPY pseudo-instructions to emit. We use up to
213 // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
214 // later on. This is a lower bound on the number of MEMCPY operations we must
215 // emit.
216 unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
217
218 // Code size optimisation: do not inline memcpy if expansion results in
219 // more instructions than the libary call.
220 if (NumMEMCPYs > 1 && Subtarget.hasMinSize()) {
221 return SDValue();
222 }
223
224 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
225
226 for (unsigned I = 0; I != NumMEMCPYs; ++I) {
227 // Evenly distribute registers among MEMCPY operations to reduce register
228 // pressure.
229 unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
230 unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
231
232 Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
233 DAG.getConstant(NumRegs, dl, MVT::i32));
234 Src = Dst.getValue(1);
235 Chain = Dst.getValue(2);
236
237 DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
238 SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
239
240 EmittedNumMemOps = NextEmittedNumMemOps;
241 }
242
243 if (BytesLeft == 0)
244 return Chain;
245
246 // Issue loads / stores for the trailing (1 - 3) bytes.
247 auto getRemainingValueType = [](unsigned BytesLeft) {
248 return (BytesLeft >= 2) ? MVT::i16 : MVT::i8;
249 };
250 auto getRemainingSize = [](unsigned BytesLeft) {
251 return (BytesLeft >= 2) ? 2 : 1;
252 };
253
254 unsigned BytesLeftSave = BytesLeft;
255 i = 0;
256 while (BytesLeft) {
257 VT = getRemainingValueType(BytesLeft);
258 VTSize = getRemainingSize(BytesLeft);
259 Loads[i] = DAG.getLoad(VT, dl, Chain,
260 DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
261 DAG.getConstant(SrcOff, dl, MVT::i32)),
262 SrcPtrInfo.getWithOffset(SrcOff));
263 TFOps[i] = Loads[i].getValue(1);
264 ++i;
265 SrcOff += VTSize;
266 BytesLeft -= VTSize;
267 }
268 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(TFOps, i));
269
270 i = 0;
271 BytesLeft = BytesLeftSave;
272 while (BytesLeft) {
273 VT = getRemainingValueType(BytesLeft);
274 VTSize = getRemainingSize(BytesLeft);
275 TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
276 DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
277 DAG.getConstant(DstOff, dl, MVT::i32)),
278 DstPtrInfo.getWithOffset(DstOff));
279 ++i;
280 DstOff += VTSize;
281 BytesLeft -= VTSize;
282 }
283 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(TFOps, i));
284}
285
287 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
288 SDValue Size, Align Alignment, bool isVolatile,
289 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
290 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
291 Alignment.value(), RTLIB::MEMMOVE);
292}
293
295 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
296 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
297 MachinePointerInfo DstPtrInfo) const {
298
299 const ARMSubtarget &Subtarget =
301
302 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
303
304 // Generate TP loop for llvm.memset
305 if (Subtarget.hasMVEIntegerOps() &&
306 shouldGenerateInlineTPLoop(Subtarget, DAG, ConstantSize, Alignment,
307 false)) {
308 Src = DAG.getSplatBuildVector(MVT::v16i8, dl,
309 DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src));
310 return DAG.getNode(ARMISD::MEMSETLOOP, dl, MVT::Other, Chain, Dst, Src,
311 DAG.getZExtOrTrunc(Size, dl, MVT::i32));
312 }
313
314 if (!AlwaysInline)
315 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
316 Alignment.value(), RTLIB::MEMSET);
317
318 return SDValue();
319}
static bool shouldGenerateInlineTPLoop(const ARMSubtarget &Subtarget, const SelectionDAG &DAG, ConstantSDNode *ConstantSize, Align Alignment, bool IsMemcpy)
cl::opt< TPLoop::MemTransfer > EnableMemtransferTPLoop("arm-memtransfer-tploop", cl::Hidden, cl::desc("Control conversion of memcpy to " "Tail predicated loops (WLSTP)"), cl::init(TPLoop::ForceDisabled), cl::values(clEnumValN(TPLoop::ForceDisabled, "force-disabled", "Don't convert memcpy to TP loop."), clEnumValN(TPLoop::ForceEnabled, "force-enabled", "Always convert memcpy to TP loop."), clEnumValN(TPLoop::Allow, "allow", "Allow (may be subject to certain conditions) " "conversion of memcpy to TP loop.")))
This file a TargetTransformInfo::Concept conforming object specific to the ARM target machine.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
SDValue EmitSpecializedLibcall(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, RTLIB::Libcall LC) const
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2, SDValue Op3, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo) const override
Emit target-specific code that performs a memset.
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
Emit target-specific code that performs a memcpy.
SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
Emit target-specific code that performs a memmove.
bool isThumb1Only() const
Definition: ARMSubtarget.h:364
const ARMTargetLowering * getTargetLowering() const override
Definition: ARMSubtarget.h:200
unsigned getMaxMemcpyTPInlineSizeThreshold() const
getMaxMemcpyTPInlineSizeThreshold - Returns the maximum size that still makes it profitable to inline...
Definition: ARMSubtarget.h:182
unsigned getMaxInlineSizeThreshold() const
getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size that still makes it profitable t...
Definition: ARMSubtarget.h:174
bool hasMinSize() const
Definition: ARMSubtarget.h:363
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
uint64_t getZExtValue() const
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:872
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDValue getValue(unsigned R) const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:489
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getExternalSymbol(const char *Sym, EVT VT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:484
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:861
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
Definition: SelectionDAG.h:502
std::vector< ArgListEntry > ArgListTy
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:810
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:816
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:34
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setChain(SDValue InChain)