LLVM 17.0.0git
ARMSelectionDAGInfo.cpp
Go to the documentation of this file.
1//===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the ARMSelectionDAGInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARMTargetMachine.h"
18using namespace llvm;
19
20#define DEBUG_TYPE "arm-selectiondag-info"
21
23 "arm-memtransfer-tploop", cl::Hidden,
24 cl::desc("Control conversion of memcpy to "
25 "Tail predicated loops (WLSTP)"),
28 "Don't convert memcpy to TP loop."),
29 clEnumValN(TPLoop::ForceEnabled, "force-enabled",
30 "Always convert memcpy to TP loop."),
32 "Allow (may be subject to certain conditions) "
33 "conversion of memcpy to TP loop.")));
34
35// Emit, if possible, a specialized version of the given Libcall. Typically this
36// means selecting the appropriately aligned version, but we also convert memset
37// of 0 into memclr.
39 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
40 SDValue Size, unsigned Align, RTLIB::Libcall LC) const {
41 const ARMSubtarget &Subtarget =
43 const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
44
45 // Only use a specialized AEABI function if the default version of this
46 // Libcall is an AEABI function.
47 if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
48 return SDValue();
49
50 // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
51 // able to translate memset to memclr and use the value to index the function
52 // name array.
53 enum {
54 AEABI_MEMCPY = 0,
55 AEABI_MEMMOVE,
56 AEABI_MEMSET,
57 AEABI_MEMCLR
58 } AEABILibcall;
59 switch (LC) {
60 case RTLIB::MEMCPY:
61 AEABILibcall = AEABI_MEMCPY;
62 break;
63 case RTLIB::MEMMOVE:
64 AEABILibcall = AEABI_MEMMOVE;
65 break;
66 case RTLIB::MEMSET:
67 AEABILibcall = AEABI_MEMSET;
68 if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
69 if (ConstantSrc->getZExtValue() == 0)
70 AEABILibcall = AEABI_MEMCLR;
71 break;
72 default:
73 return SDValue();
74 }
75
76 // Choose the most-aligned libcall variant that we can
77 enum {
78 ALIGN1 = 0,
79 ALIGN4,
80 ALIGN8
81 } AlignVariant;
82 if ((Align & 7) == 0)
83 AlignVariant = ALIGN8;
84 else if ((Align & 3) == 0)
85 AlignVariant = ALIGN4;
86 else
87 AlignVariant = ALIGN1;
88
91 Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
92 Entry.Node = Dst;
93 Args.push_back(Entry);
94 if (AEABILibcall == AEABI_MEMCLR) {
95 Entry.Node = Size;
96 Args.push_back(Entry);
97 } else if (AEABILibcall == AEABI_MEMSET) {
98 // Adjust parameters for memset, EABI uses format (ptr, size, value),
99 // GNU library uses (ptr, value, size)
100 // See RTABI section 4.3.4
101 Entry.Node = Size;
102 Args.push_back(Entry);
103
104 // Extend or truncate the argument to be an i32 value for the call.
105 if (Src.getValueType().bitsGT(MVT::i32))
106 Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
107 else if (Src.getValueType().bitsLT(MVT::i32))
108 Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
109
110 Entry.Node = Src;
111 Entry.Ty = Type::getInt32Ty(*DAG.getContext());
112 Entry.IsSExt = false;
113 Args.push_back(Entry);
114 } else {
115 Entry.Node = Src;
116 Args.push_back(Entry);
117
118 Entry.Node = Size;
119 Args.push_back(Entry);
120 }
121
122 char const *FunctionNames[4][3] = {
123 { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" },
124 { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
125 { "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" },
126 { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" }
127 };
129 CLI.setDebugLoc(dl)
130 .setChain(Chain)
132 TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
133 DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
134 TLI->getPointerTy(DAG.getDataLayout())),
135 std::move(Args))
137 std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
138
139 return CallResult.second;
140}
141
142static bool shouldGenerateInlineTPLoop(const ARMSubtarget &Subtarget,
143 const SelectionDAG &DAG,
144 ConstantSDNode *ConstantSize,
145 Align Alignment, bool IsMemcpy) {
146 auto &F = DAG.getMachineFunction().getFunction();
148 return false;
150 return true;
151 // Do not generate inline TP loop if optimizations is disabled,
152 // or if optimization for size (-Os or -Oz) is on.
153 if (F.hasOptNone() || F.hasOptSize())
154 return false;
155 // If cli option is unset, for memset always generate inline TP.
156 // For memcpy, check some conditions
157 if (!IsMemcpy)
158 return true;
159 if (!ConstantSize && Alignment >= Align(4))
160 return true;
161 if (ConstantSize &&
162 ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold() &&
163 ConstantSize->getZExtValue() <
165 return true;
166 return false;
167}
168
170 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
171 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
172 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
173 const ARMSubtarget &Subtarget =
175 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
176
177 if (Subtarget.hasMVEIntegerOps() &&
178 shouldGenerateInlineTPLoop(Subtarget, DAG, ConstantSize, Alignment, true))
179 return DAG.getNode(ARMISD::MEMCPYLOOP, dl, MVT::Other, Chain, Dst, Src,
180 DAG.getZExtOrTrunc(Size, dl, MVT::i32));
181
182 // Do repeated 4-byte loads and stores. To be improved.
183 // This requires 4-byte alignment.
184 if (Alignment < Align(4))
185 return SDValue();
186 // This requires the copy size to be a constant, preferably
187 // within a subtarget-specific limit.
188 if (!ConstantSize)
189 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
190 Alignment.value(), RTLIB::MEMCPY);
191 uint64_t SizeVal = ConstantSize->getZExtValue();
192 if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
193 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
194 Alignment.value(), RTLIB::MEMCPY);
195
196 unsigned BytesLeft = SizeVal & 3;
197 unsigned NumMemOps = SizeVal >> 2;
198 unsigned EmittedNumMemOps = 0;
199 EVT VT = MVT::i32;
200 unsigned VTSize = 4;
201 unsigned i = 0;
202 // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
203 const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
204 SDValue TFOps[6];
205 SDValue Loads[6];
206 uint64_t SrcOff = 0, DstOff = 0;
207
208 // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
209 // VLDM/VSTM and make this code emit it when appropriate. This would reduce
210 // pressure on the general purpose registers. However this seems harder to map
211 // onto the register allocator's view of the world.
212
213 // The number of MEMCPY pseudo-instructions to emit. We use up to
214 // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
215 // later on. This is a lower bound on the number of MEMCPY operations we must
216 // emit.
217 unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
218
219 // Code size optimisation: do not inline memcpy if expansion results in
220 // more instructions than the libary call.
221 if (NumMEMCPYs > 1 && Subtarget.hasMinSize()) {
222 return SDValue();
223 }
224
226
227 for (unsigned I = 0; I != NumMEMCPYs; ++I) {
228 // Evenly distribute registers among MEMCPY operations to reduce register
229 // pressure.
230 unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
231 unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
232
233 Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
234 DAG.getConstant(NumRegs, dl, MVT::i32));
235 Src = Dst.getValue(1);
236 Chain = Dst.getValue(2);
237
238 DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
239 SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
240
241 EmittedNumMemOps = NextEmittedNumMemOps;
242 }
243
244 if (BytesLeft == 0)
245 return Chain;
246
247 // Issue loads / stores for the trailing (1 - 3) bytes.
248 auto getRemainingValueType = [](unsigned BytesLeft) {
249 return (BytesLeft >= 2) ? MVT::i16 : MVT::i8;
250 };
251 auto getRemainingSize = [](unsigned BytesLeft) {
252 return (BytesLeft >= 2) ? 2 : 1;
253 };
254
255 unsigned BytesLeftSave = BytesLeft;
256 i = 0;
257 while (BytesLeft) {
258 VT = getRemainingValueType(BytesLeft);
259 VTSize = getRemainingSize(BytesLeft);
260 Loads[i] = DAG.getLoad(VT, dl, Chain,
261 DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
262 DAG.getConstant(SrcOff, dl, MVT::i32)),
263 SrcPtrInfo.getWithOffset(SrcOff));
264 TFOps[i] = Loads[i].getValue(1);
265 ++i;
266 SrcOff += VTSize;
267 BytesLeft -= VTSize;
268 }
269 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(TFOps, i));
270
271 i = 0;
272 BytesLeft = BytesLeftSave;
273 while (BytesLeft) {
274 VT = getRemainingValueType(BytesLeft);
275 VTSize = getRemainingSize(BytesLeft);
276 TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
277 DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
278 DAG.getConstant(DstOff, dl, MVT::i32)),
279 DstPtrInfo.getWithOffset(DstOff));
280 ++i;
281 DstOff += VTSize;
282 BytesLeft -= VTSize;
283 }
284 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(TFOps, i));
285}
286
288 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
289 SDValue Size, Align Alignment, bool isVolatile,
290 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
291 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
292 Alignment.value(), RTLIB::MEMMOVE);
293}
294
296 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
297 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
298 MachinePointerInfo DstPtrInfo) const {
299
300 const ARMSubtarget &Subtarget =
302
303 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
304
305 // Generate TP loop for llvm.memset
306 if (Subtarget.hasMVEIntegerOps() &&
307 shouldGenerateInlineTPLoop(Subtarget, DAG, ConstantSize, Alignment,
308 false)) {
309 Src = DAG.getSplatBuildVector(MVT::v16i8, dl,
310 DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src));
311 return DAG.getNode(ARMISD::MEMSETLOOP, dl, MVT::Other, Chain, Dst, Src,
312 DAG.getZExtOrTrunc(Size, dl, MVT::i32));
313 }
314
315 if (!AlwaysInline)
316 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
317 Alignment.value(), RTLIB::MEMSET);
318
319 return SDValue();
320}
static bool shouldGenerateInlineTPLoop(const ARMSubtarget &Subtarget, const SelectionDAG &DAG, ConstantSDNode *ConstantSize, Align Alignment, bool IsMemcpy)
cl::opt< TPLoop::MemTransfer > EnableMemtransferTPLoop("arm-memtransfer-tploop", cl::Hidden, cl::desc("Control conversion of memcpy to " "Tail predicated loops (WLSTP)"), cl::init(TPLoop::ForceDisabled), cl::values(clEnumValN(TPLoop::ForceDisabled, "force-disabled", "Don't convert memcpy to TP loop."), clEnumValN(TPLoop::ForceEnabled, "force-enabled", "Always convert memcpy to TP loop."), clEnumValN(TPLoop::Allow, "allow", "Allow (may be subject to certain conditions) " "conversion of memcpy to TP loop.")))
This file a TargetTransformInfo::Concept conforming object specific to the ARM target machine.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:678
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
SDValue EmitSpecializedLibcall(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, RTLIB::Libcall LC) const
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2, SDValue Op3, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo) const override
Emit target-specific code that performs a memset.
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
Emit target-specific code that performs a memcpy.
SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
Emit target-specific code that performs a memmove.
bool isThumb1Only() const
Definition: ARMSubtarget.h:421
const ARMTargetLowering * getTargetLowering() const override
Definition: ARMSubtarget.h:266
unsigned getMaxMemcpyTPInlineSizeThreshold() const
getMaxMemcpyTPInlineSizeThreshold - Returns the maximum size that still makes it profitable to inline...
Definition: ARMSubtarget.h:248
unsigned getMaxInlineSizeThreshold() const
getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size that still makes it profitable t...
Definition: ARMSubtarget.h:240
bool hasMinSize() const
Definition: ARMSubtarget.h:420
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
uint64_t getZExtValue() const
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:861
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDValue getValue(unsigned R) const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getExternalSymbol(const char *Sym, EVT VT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:845
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
std::vector< ArgListEntry > ArgListTy
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:776
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:782
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:703
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:34
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setChain(SDValue InChain)